Compare commits

...

3 Commits

Author SHA1 Message Date
lolcat
2b8d90af12 forgot the config fucking dementiacatwill 2025-07-27 21:53:20 -04:00
lolcat
0f803804a4 forgot the settings damn it 2025-07-27 21:48:10 -04:00
lolcat
f43feff0aa added baidu, the best search engine 2025-07-27 21:46:03 -04:00
5 changed files with 2266 additions and 4 deletions

View File

@ -119,7 +119,7 @@ class config{
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0";
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:141.0) Gecko/20100101 Firefox/141.0";
// Proxy pool assignments for each scraper
// false = Use server's raw IP
@ -133,6 +133,7 @@ class config{
const PROXY_GOOGLE_CSE = false;
const PROXY_STARTPAGE = false;
const PROXY_QWANT = false;
const PROXY_BAIDU = false;
const PROXY_GHOSTERY = false;
const PROXY_MARGINALIA = false;
const PROXY_MOJEEK = false;

View File

@ -949,6 +949,7 @@ class frontend{
"crowdview" => "Crowdview",
"mwmbl" => "Mwmbl",
"mojeek" => "Mojeek",
"baidu" => "Baidu",
"solofield" => "Solofield",
"marginalia" => "Marginalia",
"wiby" => "wiby",
@ -969,6 +970,7 @@ class frontend{
"startpage" => "Startpage",
"qwant" => "Qwant",
"yep" => "Yep",
"baidu" => "Baidu",
"solofield" => "Solofield",
"pinterest" => "Pinterest",
"flickr" => "Flickr",
@ -993,6 +995,7 @@ class frontend{
"google" => "Google",
"startpage" => "Startpage",
"qwant" => "Qwant",
"baidu" => "Baidu",
"solofield" => "Solofield"
]
];
@ -1008,7 +1011,8 @@ class frontend{
"startpage" => "Startpage",
"qwant" => "Qwant",
"yep" => "Yep",
"mojeek" => "Mojeek"
"mojeek" => "Mojeek",
"baidu" => "Baidu"
]
];
break;

View File

@ -240,12 +240,13 @@ class fuckhtml{
public function getElementsByFuzzyAttributeValue(string $name, string $value, $collection = null){
$elems = $this->getElementsByAttributeName($name, $collection);
$value =
explode(
" ",
trim(
preg_replace(
'/ +/',
'/\s+/',
" ",
$value
)
@ -258,7 +259,18 @@ class fuckhtml{
foreach($elem["attributes"] as $attrib_name => $attrib_value){
$attrib_value = explode(" ", $attrib_value);
$attrib_value =
explode(
" ",
trim(
preg_replace(
'/\s+/',
" ",
$attrib_value
)
)
);
$ac = count($attrib_value);
$nc = count($value);
$cr = 0;

2229
scraper/baidu.php Normal file

File diff suppressed because it is too large Load Diff

View File

@ -169,6 +169,10 @@ $settings = [
"value" => "mojeek",
"text" => "Mojeek"
],
[
"value" => "baidu",
"text" => "Baidu"
],
[
"value" => "solofield",
"text" => "Solofield"
@ -223,6 +227,10 @@ $settings = [
"value" => "yep",
"text" => "Yep"
],
[
"value" => "baidu",
"text" => "Baidu"
],
[
"value" => "solofield",
"text" => "Solofield"
@ -285,6 +293,10 @@ $settings = [
"value" => "qwant",
"text" => "Qwant"
],
[
"value" => "baidu",
"text" => "Baidu"
],
[
"value" => "solofield",
"text" => "Solofield"
@ -322,6 +334,10 @@ $settings = [
[
"value" => "mojeek",
"text" => "Mojeek"
],
[
"value" => "baidu",
"text" => "Baidu"
]
]
],