Compare commits
3 Commits
0bdd5e73df
...
2b8d90af12
Author | SHA1 | Date | |
---|---|---|---|
![]() |
2b8d90af12 | ||
![]() |
0f803804a4 | ||
![]() |
f43feff0aa |
@ -119,7 +119,7 @@ class config{
|
|||||||
|
|
||||||
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
||||||
// Changing this might break things.
|
// Changing this might break things.
|
||||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0";
|
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:141.0) Gecko/20100101 Firefox/141.0";
|
||||||
|
|
||||||
// Proxy pool assignments for each scraper
|
// Proxy pool assignments for each scraper
|
||||||
// false = Use server's raw IP
|
// false = Use server's raw IP
|
||||||
@ -133,6 +133,7 @@ class config{
|
|||||||
const PROXY_GOOGLE_CSE = false;
|
const PROXY_GOOGLE_CSE = false;
|
||||||
const PROXY_STARTPAGE = false;
|
const PROXY_STARTPAGE = false;
|
||||||
const PROXY_QWANT = false;
|
const PROXY_QWANT = false;
|
||||||
|
const PROXY_BAIDU = false;
|
||||||
const PROXY_GHOSTERY = false;
|
const PROXY_GHOSTERY = false;
|
||||||
const PROXY_MARGINALIA = false;
|
const PROXY_MARGINALIA = false;
|
||||||
const PROXY_MOJEEK = false;
|
const PROXY_MOJEEK = false;
|
||||||
|
@ -949,6 +949,7 @@ class frontend{
|
|||||||
"crowdview" => "Crowdview",
|
"crowdview" => "Crowdview",
|
||||||
"mwmbl" => "Mwmbl",
|
"mwmbl" => "Mwmbl",
|
||||||
"mojeek" => "Mojeek",
|
"mojeek" => "Mojeek",
|
||||||
|
"baidu" => "Baidu",
|
||||||
"solofield" => "Solofield",
|
"solofield" => "Solofield",
|
||||||
"marginalia" => "Marginalia",
|
"marginalia" => "Marginalia",
|
||||||
"wiby" => "wiby",
|
"wiby" => "wiby",
|
||||||
@ -969,6 +970,7 @@ class frontend{
|
|||||||
"startpage" => "Startpage",
|
"startpage" => "Startpage",
|
||||||
"qwant" => "Qwant",
|
"qwant" => "Qwant",
|
||||||
"yep" => "Yep",
|
"yep" => "Yep",
|
||||||
|
"baidu" => "Baidu",
|
||||||
"solofield" => "Solofield",
|
"solofield" => "Solofield",
|
||||||
"pinterest" => "Pinterest",
|
"pinterest" => "Pinterest",
|
||||||
"flickr" => "Flickr",
|
"flickr" => "Flickr",
|
||||||
@ -993,6 +995,7 @@ class frontend{
|
|||||||
"google" => "Google",
|
"google" => "Google",
|
||||||
"startpage" => "Startpage",
|
"startpage" => "Startpage",
|
||||||
"qwant" => "Qwant",
|
"qwant" => "Qwant",
|
||||||
|
"baidu" => "Baidu",
|
||||||
"solofield" => "Solofield"
|
"solofield" => "Solofield"
|
||||||
]
|
]
|
||||||
];
|
];
|
||||||
@ -1008,7 +1011,8 @@ class frontend{
|
|||||||
"startpage" => "Startpage",
|
"startpage" => "Startpage",
|
||||||
"qwant" => "Qwant",
|
"qwant" => "Qwant",
|
||||||
"yep" => "Yep",
|
"yep" => "Yep",
|
||||||
"mojeek" => "Mojeek"
|
"mojeek" => "Mojeek",
|
||||||
|
"baidu" => "Baidu"
|
||||||
]
|
]
|
||||||
];
|
];
|
||||||
break;
|
break;
|
||||||
|
@ -240,12 +240,13 @@ class fuckhtml{
|
|||||||
public function getElementsByFuzzyAttributeValue(string $name, string $value, $collection = null){
|
public function getElementsByFuzzyAttributeValue(string $name, string $value, $collection = null){
|
||||||
|
|
||||||
$elems = $this->getElementsByAttributeName($name, $collection);
|
$elems = $this->getElementsByAttributeName($name, $collection);
|
||||||
|
|
||||||
$value =
|
$value =
|
||||||
explode(
|
explode(
|
||||||
" ",
|
" ",
|
||||||
trim(
|
trim(
|
||||||
preg_replace(
|
preg_replace(
|
||||||
'/ +/',
|
'/\s+/',
|
||||||
" ",
|
" ",
|
||||||
$value
|
$value
|
||||||
)
|
)
|
||||||
@ -258,7 +259,18 @@ class fuckhtml{
|
|||||||
|
|
||||||
foreach($elem["attributes"] as $attrib_name => $attrib_value){
|
foreach($elem["attributes"] as $attrib_name => $attrib_value){
|
||||||
|
|
||||||
$attrib_value = explode(" ", $attrib_value);
|
$attrib_value =
|
||||||
|
explode(
|
||||||
|
" ",
|
||||||
|
trim(
|
||||||
|
preg_replace(
|
||||||
|
'/\s+/',
|
||||||
|
" ",
|
||||||
|
$attrib_value
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
$ac = count($attrib_value);
|
$ac = count($attrib_value);
|
||||||
$nc = count($value);
|
$nc = count($value);
|
||||||
$cr = 0;
|
$cr = 0;
|
||||||
|
2229
scraper/baidu.php
Normal file
2229
scraper/baidu.php
Normal file
File diff suppressed because it is too large
Load Diff
16
settings.php
16
settings.php
@ -169,6 +169,10 @@ $settings = [
|
|||||||
"value" => "mojeek",
|
"value" => "mojeek",
|
||||||
"text" => "Mojeek"
|
"text" => "Mojeek"
|
||||||
],
|
],
|
||||||
|
[
|
||||||
|
"value" => "baidu",
|
||||||
|
"text" => "Baidu"
|
||||||
|
],
|
||||||
[
|
[
|
||||||
"value" => "solofield",
|
"value" => "solofield",
|
||||||
"text" => "Solofield"
|
"text" => "Solofield"
|
||||||
@ -223,6 +227,10 @@ $settings = [
|
|||||||
"value" => "yep",
|
"value" => "yep",
|
||||||
"text" => "Yep"
|
"text" => "Yep"
|
||||||
],
|
],
|
||||||
|
[
|
||||||
|
"value" => "baidu",
|
||||||
|
"text" => "Baidu"
|
||||||
|
],
|
||||||
[
|
[
|
||||||
"value" => "solofield",
|
"value" => "solofield",
|
||||||
"text" => "Solofield"
|
"text" => "Solofield"
|
||||||
@ -285,6 +293,10 @@ $settings = [
|
|||||||
"value" => "qwant",
|
"value" => "qwant",
|
||||||
"text" => "Qwant"
|
"text" => "Qwant"
|
||||||
],
|
],
|
||||||
|
[
|
||||||
|
"value" => "baidu",
|
||||||
|
"text" => "Baidu"
|
||||||
|
],
|
||||||
[
|
[
|
||||||
"value" => "solofield",
|
"value" => "solofield",
|
||||||
"text" => "Solofield"
|
"text" => "Solofield"
|
||||||
@ -322,6 +334,10 @@ $settings = [
|
|||||||
[
|
[
|
||||||
"value" => "mojeek",
|
"value" => "mojeek",
|
||||||
"text" => "Mojeek"
|
"text" => "Mojeek"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
"value" => "baidu",
|
||||||
|
"text" => "Baidu"
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
|
Loading…
Reference in New Issue
Block a user