Compare commits

...

4 Commits

Author SHA1 Message Date
lolcat
cdf958d293 fix wikipedia crash 2025-08-10 21:55:15 -04:00
lolcat
2d63475b07 fix MDN answers not rendering properly 2025-08-10 21:49:51 -04:00
lolcat
ae31274db9 these comments were too long 2025-08-10 21:39:19 -04:00
lolcat
20ef5b3e3a re-added stackoverflow instant answers 2025-08-10 21:37:45 -04:00

View File

@ -1046,20 +1046,38 @@ class ddg{
if(isset($json["Abstract"])){
$description[] =
[
"type" => "text",
"value" => $json["Abstract"]
];
$description = $this->parse_rich_text($json["Abstract"]);
}
if(
!isset($json["Image"]) ||
$json["Image"] == "" ||
$json["Image"] === null ||
$json["Image"] == "https://duckduckgo.com/i/"
){
$image = null;
}else{
if(
preg_match(
'/^https?:\/\//',
$json["Image"]
)
){
$image = $json["Image"];
}else{
$image = "https://duckduckgo.com" . $json["Image"];
}
}
$out["answer"][] = [
"title" => $json["Heading"],
"description" => $description,
"url" => $json["AbstractURL"],
"thumb" =>
(!isset($json["Image"]) || $json["Image"] == "" || $json["Image"] === null) ?
null : "https://duckduckgo.com" . $json["Image"],
"thumb" => $image,
"table" => $table,
"sublink" => $sublinks
];
@ -1072,11 +1090,11 @@ class ddg{
}
//
// Get wordnik definition
// Parse additional data endpoints
//
//nrj('/js/spice/dictionary/definition/create', null, null, null, null, 'dictionary_definition');
preg_match(
preg_match_all(
'/nrj\(\s*\'([^\']+)\'/',
$js,
$nrj
@ -1084,234 +1102,318 @@ class ddg{
if(isset($nrj[1])){
$nrj = $nrj[1];
preg_match(
'/\/js\/spice\/dictionary\/definition\/([^\/]+)/',
$nrj,
$word
);
if(isset($word[1])){
foreach($nrj[1] as $potential_endpoint){
$word = $word[1];
//
// Probe for wordnik definition
//
preg_match(
'/\/js\/spice\/dictionary\/definition\/([^\/]+)/',
$potential_endpoint,
$word
);
// found wordnik definition & word
try{
$nik =
$this->get(
$proxy,
"https://duckduckgo.com/js/spice/dictionary/definition/" . $word,
[],
ddg::req_xhr
if(isset($word[1])){
$word = $word[1];
// found wordnik definition & word
try{
$nik =
$this->get(
$proxy,
"https://duckduckgo.com/js/spice/dictionary/definition/" . $word,
[],
ddg::req_xhr
);
}catch(Exception $e){
// fail gracefully
return $out;
}
// remove javascript
$js_tmp =
preg_split(
'/ddg_spice_dictionary_definition\(\s*/',
$nik,
2
);
}catch(Exception $e){
// fail gracefully
return $out;
}
// remove javascript
$js_tmp =
preg_split(
'/ddg_spice_dictionary_definition\(\s*/',
$nik,
2
);
if(count($js_tmp) > 1){
$nik =
json_decode(
$this->fuckhtml
->extract_json(
$js_tmp[1]
),
true
);
}
if($nik === null){
return $out;
}
$answer_cat = [];
$answer = [];
foreach($nik as $snippet){
if(!isset($snippet["partOfSpeech"])){ continue; }
$push = [];
// add text snippet
if(isset($snippet["text"])){
if(count($js_tmp) > 1){
$push[] = [
"type" => "text",
"value" =>
$nik =
json_decode(
$this->fuckhtml
->getTextContent(
$snippet["text"]
)
];
}
// add example uses
if(isset($snippet["exampleUses"])){
foreach($snippet["exampleUses"] as $example){
$push[] = [
"type" => "quote",
"value" => "\"" .
$this->fuckhtml
->getTextContent(
$example["text"]
) . "\""
];
}
}
// add citations
if(isset($snippet["citations"])){
foreach($snippet["citations"] as $citation){
if(!isset($citation["cite"])){ continue; }
$text =
$this->fuckhtml
->getTextContent(
$citation["cite"]
);
if(isset($citation["source"])){
$text .=
" - " .
$this->fuckhtml
->getTextContent(
$citation["source"]
);
}
$push[] = [
"type" => "quote",
"value" => $text
];
}
}
// add related words
if(isset($snippet["relatedWords"])){
$relations = [];
foreach($snippet["relatedWords"] as $related){
$words = [];
foreach($related["words"] as $wrd){
$words[] =
$this->fuckhtml
->getTextContent(
$wrd
);
}
if(
count($words) !== 0 &&
isset($related["relationshipType"])
){
$relations[ucfirst($related["relationshipType"]) . "s"] =
implode(", ", $words);
}
}
foreach($relations as $relation_title => $relation_content){
$push[] = [
"type" => "quote",
"value" => $relation_title . ": " . $relation_content
];
}
}
if(count($push) !== 0){
// push data to answer_cat
if(!isset($answer_cat[$snippet["partOfSpeech"]])){
$answer_cat[$snippet["partOfSpeech"]] = [];
}
$answer_cat[$snippet["partOfSpeech"]] =
array_merge(
$answer_cat[$snippet["partOfSpeech"]],
$push
->extract_json(
$js_tmp[1]
),
true
);
}
}
foreach($answer_cat as $answer_title => $answer_content){
$i = 0;
$answer[] = [
"type" => "title",
"value" => $answer_title
];
$old_type = $answer[count($answer) - 1]["type"];
foreach($answer_content as $ans){
if($nik === null){
if(
$ans["type"] == "text" &&
$old_type == "text"
){
return $out;
}
$answer_cat = [];
$answer = [];
foreach($nik as $snippet){
if(!isset($snippet["partOfSpeech"])){ continue; }
$push = [];
// add text snippet
if(isset($snippet["text"])){
$i++;
$c = count($answer) - 1;
// append text to existing textfield
$answer[$c] = [
$push[] = [
"type" => "text",
"value" => $answer[$c]["value"] . "\n" . $i . ". " . $ans["value"]
"value" =>
$this->fuckhtml
->getTextContent(
$snippet["text"]
)
];
}elseif($ans["type"] == "text"){
$i++;
$answer[] = [
"type" => "text",
"value" => $i . ". " . $ans["value"]
];
}else{
// append normally
$answer[] = $ans;
}
$old_type = $ans["type"];
// add example uses
if(isset($snippet["exampleUses"])){
foreach($snippet["exampleUses"] as $example){
$push[] = [
"type" => "quote",
"value" => "\"" .
$this->fuckhtml
->getTextContent(
$example["text"]
) . "\""
];
}
}
// add citations
if(isset($snippet["citations"])){
foreach($snippet["citations"] as $citation){
if(!isset($citation["cite"])){ continue; }
$text =
$this->fuckhtml
->getTextContent(
$citation["cite"]
);
if(isset($citation["source"])){
$text .=
" - " .
$this->fuckhtml
->getTextContent(
$citation["source"]
);
}
$push[] = [
"type" => "quote",
"value" => $text
];
}
}
// add related words
if(isset($snippet["relatedWords"])){
$relations = [];
foreach($snippet["relatedWords"] as $related){
$words = [];
foreach($related["words"] as $wrd){
$words[] =
$this->fuckhtml
->getTextContent(
$wrd
);
}
if(
count($words) !== 0 &&
isset($related["relationshipType"])
){
$relations[ucfirst($related["relationshipType"]) . "s"] =
implode(", ", $words);
}
}
foreach($relations as $relation_title => $relation_content){
$push[] = [
"type" => "quote",
"value" => $relation_title . ": " . $relation_content
];
}
}
if(count($push) !== 0){
// push data to answer_cat
if(!isset($answer_cat[$snippet["partOfSpeech"]])){
$answer_cat[$snippet["partOfSpeech"]] = [];
}
$answer_cat[$snippet["partOfSpeech"]] =
array_merge(
$answer_cat[$snippet["partOfSpeech"]],
$push
);
}
}
foreach($answer_cat as $answer_title => $answer_content){
$i = 0;
$answer[] = [
"type" => "title",
"value" => $answer_title
];
$old_type = $answer[count($answer) - 1]["type"];
foreach($answer_content as $ans){
if(
$ans["type"] == "text" &&
$old_type == "text"
){
$i++;
$c = count($answer) - 1;
// append text to existing textfield
$answer[$c] = [
"type" => "text",
"value" => $answer[$c]["value"] . "\n" . $i . ". " . $ans["value"]
];
}elseif($ans["type"] == "text"){
$i++;
$answer[] = [
"type" => "text",
"value" => $i . ". " . $ans["value"]
];
}else{
// append normally
$answer[] = $ans;
}
$old_type = $ans["type"];
}
}
// yeah.. sometimes duckduckgo doesnt give us a definition back
if(count($answer) !== 0){
$out["answer"][] = [
"title" => ucfirst($word),
"description" => $answer,
"url" => "https://www.wordnik.com/words/" . $word,
"thumb" => null,
"table" => [],
"sublink" => []
];
}
}
// yeah.. sometimes duckduckgo doesnt give us a definition back
if(count($answer) !== 0){
//
// Parse stackoverflow answer
//
if(
preg_match(
'/^\/a\.js.*src_id=stack_overflow/',
$potential_endpoint
)
){
$out["answer"][] = [
"title" => ucfirst($word),
"description" => $answer,
"url" => "https://www.wordnik.com/words/" . $word,
"thumb" => null,
"table" => [],
"sublink" => []
];
// found stackoverflow answer
try{
$json =
$this->get(
$proxy,
"https://duckduckgo.com" . $potential_endpoint,
[],
ddg::req_xhr
);
}catch(Exception $e){
// fail gracefully
return $out;
}
$json = explode("DDG.duckbar.add_array(", $json, 2);
if(count($json) === 2){
$json =
json_decode(
$this->fuckhtml
->extract_json(
$json[1]
),
true
);
if(
$json !== null &&
isset($json[0]["data"])
){
$json = $json[0]["data"];
foreach($json as $answer){
if(isset($answer["Heading"])){
$title = $answer["Heading"];
}elseif(isset($answer["title"])){
$title = $answer["title"];
}else{
$title = null;
}
if(
$title !== null &&
isset($answer["Abstract"])
){
$description = $this->parse_rich_text($answer["Abstract"]);
$out["answer"][] = [
"title" => $title,
"description" => $description,
"url" => $answer["AbstractURL"],
"thumb" => null,
"table" => [],
"sublink" => []
];
}
}
}
}
}
}
}
@ -1841,6 +1943,146 @@ class ddg{
return $out;
}
private function parse_rich_text($html){
$description = [];
// pre-process the html, remove useless elements
$html =
strip_tags(
$html,
[
"h1", "h2", "h3", "h4", "h5", "h6", "h7",
"pre", "code"
]
);
$html =
preg_replace(
'/<(\/?)pre *[^>]*>\s*<\/?code *[^>]*>/i',
'<$1pre>',
$html
);
$this->fuckhtml->load($html);
$tags =
$this->fuckhtml
->getElementsByTagName(
"*"
);
if(count($tags) === 0){
$description[] = [
"type" => "text",
"value" =>
trim(
$this->fuckhtml
->getTextContent(
$html,
true,
false
)
)
];
}else{
$start = 0;
$was_code_block = true;
foreach($tags as $tag){
$text =
$this->fuckhtml
->getTextContent(
substr(
$html,
$start,
$tag["startPos"] - $start
),
true,
false
);
if($was_code_block){
$text = ltrim($text);
$was_code_block = false;
}
$description[] = [
"type" => "text",
"value" => $text
];
switch($tag["tagName"]){
case "pre":
$append = "code";
$was_code_block = true;
$c = count($description) - 1;
$description[$c]["value"] =
rtrim($description[$c]["value"]);
break;
case "code":
$append = "inline_code";
$c = count($description) - 1;
$description[$c]["value"] =
rtrim($description[$c]["value"]) . " ";
break;
case "h1":
case "h2":
case "h3":
case "h4":
case "h5":
case "h6":
case "h7":
$append = "title";
$c = count($description) - 1;
$description[$c]["value"] =
rtrim($description[$c]["value"]);
break;
}
$description[] = [
"type" => $append,
"value" =>
trim(
$this->fuckhtml
->getTextContent(
$tag,
true,
false
)
)
];
$start = $tag["endPos"];
}
// shit out remainder
$description[] = [
"type" => "text",
"value" =>
trim(
$this->fuckhtml
->getTextContent(
substr(
$html,
$start
),
true,
false
)
)
];
}
return $description;
}
private function titledots($title){
$substr = substr($title, -3);