handle mojeek block
This commit is contained in:
		
							parent
							
								
									2c4dc7da84
								
							
						
					
					
						commit
						f30872134f
					
				| @ -501,11 +501,6 @@ class mojeek{ | ||||
| 				 | ||||
| 				throw new Exception("Failed to get HTML"); | ||||
| 			} | ||||
| 			/* | ||||
| 			$handle = fopen("scraper/mojeek.html", "r"); | ||||
| 			$html = fread($handle, filesize("scraper/mojeek.html")); | ||||
| 			fclose($handle);*/ | ||||
| 			 | ||||
| 		} | ||||
| 		 | ||||
| 		$out = [ | ||||
| @ -526,6 +521,8 @@ class mojeek{ | ||||
| 		 | ||||
| 		$this->fuckhtml->load($html); | ||||
| 		 | ||||
| 		$this->detect_block(); | ||||
| 		 | ||||
| 		$results = | ||||
| 			$this->fuckhtml | ||||
| 			->getElementsByClassName("results-standard", "ul"); | ||||
| @ -1034,6 +1031,8 @@ class mojeek{ | ||||
| 		 | ||||
| 		$this->fuckhtml->load($html); | ||||
| 		 | ||||
| 		$this->detect_block(); | ||||
| 		 | ||||
| 		$articles = | ||||
| 			$this->fuckhtml->getElementsByTagName("article"); | ||||
| 		 | ||||
| @ -1166,6 +1165,26 @@ class mojeek{ | ||||
| 		return $out; | ||||
| 	} | ||||
| 	 | ||||
| 	private function detect_block(){ | ||||
| 		 | ||||
| 		$title = | ||||
| 			$this->fuckhtml | ||||
| 			->getElementsByTagName( | ||||
| 				"title" | ||||
| 			); | ||||
| 		 | ||||
| 		if( | ||||
| 			count($title) !== 0 && | ||||
| 			$this->fuckhtml | ||||
| 			->getTextContent( | ||||
| 				$title[0]["innerHTML"] | ||||
| 			) == "403 - Forbidden" | ||||
| 		){ | ||||
| 			 | ||||
| 			throw new Exception("Mojeek blocked this instance or request proxy."); | ||||
| 		} | ||||
| 	} | ||||
| 	 | ||||
| 	private function titledots($title){ | ||||
| 		 | ||||
| 		return trim($title, ". \t\n\r\0\x0B"); | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 lolcat
						lolcat