soundcloud fix, for good this time
This commit is contained in:
		
							parent
							
								
									77931f3ee9
								
							
						
					
					
						commit
						e252bf4fce
					
				
							
								
								
									
										103
									
								
								scraper/sc.php
									
									
									
									
									
								
							
							
						
						
									
										103
									
								
								scraper/sc.php
									
									
									
									
									
								
							| @ -6,6 +6,9 @@ class sc{ | |||||||
| 		 | 		 | ||||||
| 		include "lib/backend.php"; | 		include "lib/backend.php"; | ||||||
| 		$this->backend = new backend("sc"); | 		$this->backend = new backend("sc"); | ||||||
|  | 		 | ||||||
|  | 		include "lib/fuckhtml.php"; | ||||||
|  | 		$this->fuckhtml = new fuckhtml(); | ||||||
| 	} | 	} | ||||||
| 	 | 	 | ||||||
| 	public function getfilters($page){ | 	public function getfilters($page){ | ||||||
| @ -25,7 +28,7 @@ class sc{ | |||||||
| 		]; | 		]; | ||||||
| 	} | 	} | ||||||
| 	 | 	 | ||||||
| 	private function get($proxy, $url, $get = []){ | 	private function get($proxy, $url, $get = [], $web_req = false){ | ||||||
| 		 | 		 | ||||||
| 		$curlproc = curl_init(); | 		$curlproc = curl_init(); | ||||||
| 		 | 		 | ||||||
| @ -37,19 +40,42 @@ class sc{ | |||||||
| 		curl_setopt($curlproc, CURLOPT_URL, $url); | 		curl_setopt($curlproc, CURLOPT_URL, $url); | ||||||
| 		 | 		 | ||||||
| 		curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
 | 		curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
 | ||||||
| 		curl_setopt($curlproc, CURLOPT_HTTPHEADER, | 		 | ||||||
| 			["User-Agent: " . config::USER_AGENT, | 		// use http2
 | ||||||
| 			"Accept: application/json, text/javascript, */*; q=0.01", | 		curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); | ||||||
| 			"Accept-Language: en-US,en;q=0.5", | 		 | ||||||
| 			"Accept-Encoding: gzip", | 		if($web_req === false){ | ||||||
| 			"Referer: https://soundcloud.com/", | 			 | ||||||
| 			"Origin: https://soundcloud.com", | 			curl_setopt($curlproc, CURLOPT_HTTPHEADER, | ||||||
| 			"DNT: 1", | 				["User-Agent: " . config::USER_AGENT, | ||||||
| 			"Connection: keep-alive", | 				"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", | ||||||
| 			"Sec-Fetch-Dest: empty", | 				"Accept-Language: en-US,en;q=0.5", | ||||||
| 			"Sec-Fetch-Mode: cors", | 				"Accept-Encoding: gzip", | ||||||
| 			"Sec-Fetch-Site: same-site"] | 				"Referer: https://soundcloud.com/", | ||||||
| 		); | 				"Origin: https://soundcloud.com", | ||||||
|  | 				"DNT: 1", | ||||||
|  | 				"Connection: keep-alive", | ||||||
|  | 				"Sec-Fetch-Dest: empty", | ||||||
|  | 				"Sec-Fetch-Mode: cors", | ||||||
|  | 				"Sec-Fetch-Site: same-site"] | ||||||
|  | 			); | ||||||
|  | 		}else{ | ||||||
|  | 			 | ||||||
|  | 			curl_setopt($curlproc, CURLOPT_HTTPHEADER, | ||||||
|  | 				["User-Agent: " . config::USER_AGENT, | ||||||
|  | 				"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", | ||||||
|  | 				"Accept-Language: en-US,en;q=0.5", | ||||||
|  | 				"Accept-Encoding: gzip", | ||||||
|  | 				"DNT: 1", | ||||||
|  | 				"Connection: keep-alive", | ||||||
|  | 				"Upgrade-Insecure-Requests: 1", | ||||||
|  | 				"Sec-Fetch-Dest: document", | ||||||
|  | 				"Sec-Fetch-Mode: navigate", | ||||||
|  | 				"Sec-Fetch-Site: cross-site", | ||||||
|  | 				"Priority: u=1", | ||||||
|  | 				"TE: trailers"] | ||||||
|  | 			); | ||||||
|  | 		} | ||||||
| 		 | 		 | ||||||
| 		curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); | 		curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); | ||||||
| 		curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); | 		curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); | ||||||
| @ -396,13 +422,47 @@ class sc{ | |||||||
| 		 | 		 | ||||||
| 		$token = apcu_fetch("sc_token"); | 		$token = apcu_fetch("sc_token"); | ||||||
| 		 | 		 | ||||||
| 		if($token === false){ | 		if($token !== false){ | ||||||
|  | 			 | ||||||
|  | 			return $token; | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		// search through all javascript components on the main page
 | ||||||
|  | 		try{ | ||||||
|  | 			$html = | ||||||
|  | 				$this->get( | ||||||
|  | 					$proxy, | ||||||
|  | 					"https://soundcloud.com", | ||||||
|  | 					[] | ||||||
|  | 				); | ||||||
|  | 		}catch(Exception $error){ | ||||||
|  | 			 | ||||||
|  | 			throw new Exception("Failed to fetch front page"); | ||||||
|  | 		} | ||||||
|  | 		 | ||||||
|  | 		$this->fuckhtml->load($html); | ||||||
|  | 		 | ||||||
|  | 		$scripts = | ||||||
|  | 			$this->fuckhtml | ||||||
|  | 			->getElementsByTagName( | ||||||
|  | 				"script" | ||||||
|  | 			); | ||||||
|  | 		 | ||||||
|  | 		foreach($scripts as $script){ | ||||||
|  | 			 | ||||||
|  | 			if( | ||||||
|  | 				!isset($script["attributes"]["src"]) || | ||||||
|  | 				strpos($script["attributes"]["src"], "sndcdn.com") === false | ||||||
|  | 			){ | ||||||
|  | 				 | ||||||
|  | 				continue; | ||||||
|  | 			} | ||||||
| 			 | 			 | ||||||
| 			try{ | 			try{ | ||||||
| 				$js = | 				$js = | ||||||
| 					$this->get( | 					$this->get( | ||||||
| 						$proxy, | 						$proxy, | ||||||
| 						"https://a-v2.sndcdn.com/assets/0-a901c1e0.js", | 						$script["attributes"]["src"], | ||||||
| 						[] | 						[] | ||||||
| 					); | 					); | ||||||
| 			}catch(Exception $error){ | 			}catch(Exception $error){ | ||||||
| @ -416,16 +476,15 @@ class sc{ | |||||||
| 				$token | 				$token | ||||||
| 			); | 			); | ||||||
| 			 | 			 | ||||||
| 			if(!isset($token[1])){ | 			if(isset($token[1])){ | ||||||
| 				 | 				 | ||||||
| 				throw new Exception("Failed to get search token"); | 				apcu_store("sc_token", $token[1]); | ||||||
|  | 				return $token[1]; | ||||||
|  | 				break; | ||||||
| 			} | 			} | ||||||
| 			 |  | ||||||
| 			apcu_store("sc_token", $token[1]); |  | ||||||
| 			return $token[1]; |  | ||||||
| 		} | 		} | ||||||
| 		 | 		 | ||||||
| 		return $token; | 		throw new Exception("Did not find a Soundcloud token in the Javascript blobs"); | ||||||
| 	} | 	} | ||||||
| 	 | 	 | ||||||
| 	private function limitstrlen($text){ | 	private function limitstrlen($text){ | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 lolcat
						lolcat