mirror of https://github.com/FreshRSS/FreshRSS.git
Replace lib_phpQuery by PhpGt/CssXPath (#4261)
https://github.com/PhpGt/CssXPath
This commit is contained in:
parent
be5848fd4f
commit
ae54a590b9
|
@ -1,3 +1,4 @@
|
|||
lib/CssXPath/
|
||||
node_modules/
|
||||
p/scripts/vendor/
|
||||
vendor/
|
||||
|
|
|
@ -29,7 +29,7 @@ extend-exclude = [
|
|||
"bin/",
|
||||
"data/",
|
||||
"docs/fr/",
|
||||
"lib/lib_phpQuery.php",
|
||||
"lib/CssXPath/",
|
||||
"lib/PHPMailer",
|
||||
"lib/SimplePie/",
|
||||
"node_modules/",
|
||||
|
|
|
@ -249,6 +249,7 @@ et [l’API Fever](https://freshrss.github.io/FreshRSS/fr/users/06_Fever_API.htm
|
|||
* [MINZ](https://github.com/marienfressinaud/MINZ)
|
||||
* [php-http-304](https://alexandre.alapetite.fr/doc-alex/php-http-304/)
|
||||
* [lib_opml](https://github.com/marienfressinaud/lib_opml)
|
||||
* [PhpGt/CssXPath](https://github.com/PhpGt/CssXPath)
|
||||
* [PHPMailer](https://github.com/PHPMailer/PHPMailer)
|
||||
* [Chart.js](https://www.chartjs.org)
|
||||
|
||||
|
|
|
@ -138,6 +138,7 @@ and [Fever API](https://freshrss.github.io/FreshRSS/en/users/06_Fever_API.html)
|
|||
* [MINZ](https://github.com/marienfressinaud/MINZ)
|
||||
* [php-http-304](https://alexandre.alapetite.fr/doc-alex/php-http-304/)
|
||||
* [lib_opml](https://github.com/marienfressinaud/lib_opml)
|
||||
* [PhpGt/CssXPath](https://github.com/PhpGt/CssXPath)
|
||||
* [PHPMailer](https://github.com/PHPMailer/PHPMailer)
|
||||
* [Chart.js](https://www.chartjs.org)
|
||||
|
||||
|
|
|
@ -480,40 +480,37 @@ class FreshRSS_Entry extends Minz_Model {
|
|||
public static function getContentByParsing(string $url, string $path, array $attributes = [], int $maxRedirs = 3): string {
|
||||
$html = getHtml($url, $attributes);
|
||||
if (strlen($html) > 0) {
|
||||
require_once(LIB_PATH . '/lib_phpQuery.php');
|
||||
/**
|
||||
* @var phpQueryObject @doc
|
||||
*/
|
||||
$doc = phpQuery::newDocumentHTML($html);
|
||||
$doc = new DOMDocument();
|
||||
$doc->loadHTML($html, LIBXML_NONET | LIBXML_NOERROR | LIBXML_NOWARNING);
|
||||
$xpath = new DOMXPath($doc);
|
||||
|
||||
if ($maxRedirs > 0) {
|
||||
//Follow any HTML redirection
|
||||
/**
|
||||
* @var phpQueryObject @metas
|
||||
*/
|
||||
$metas = $doc->find('meta[http-equiv][content]');
|
||||
$metas = $xpath->query('//meta[@content]');
|
||||
/** @var array<DOMElement> $metas */
|
||||
foreach ($metas as $meta) {
|
||||
if (strtolower(trim($meta->getAttribute('http-equiv'))) === 'refresh') {
|
||||
$refresh = preg_replace('/^[0-9.; ]*\s*(url\s*=)?\s*/i', '', trim($meta->getAttribute('content')));
|
||||
$refresh = SimplePie_Misc::absolutize_url($refresh, $url);
|
||||
if ($refresh != false && $refresh !== $url) {
|
||||
phpQuery::unloadDocuments();
|
||||
return self::getContentByParsing($refresh, $path, $attributes, $maxRedirs - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @var phpQueryObject @content
|
||||
*/
|
||||
$content = $doc->find($path);
|
||||
$bases = $doc->document->getElementsByTagName('base');
|
||||
if (!empty($bases[0]) && $bases[0]->getAttribute('href') != '') {
|
||||
$url = $bases[0]->getAttribute('href');
|
||||
$base = $xpath->evaluate('normalize-space(//base/@href)');
|
||||
if ($base != false && is_string($base)) {
|
||||
$url = $base;
|
||||
}
|
||||
$html = trim(sanitizeHTML($content->__toString(), $url));
|
||||
phpQuery::unloadDocuments();
|
||||
$content = '';
|
||||
$nodes = $xpath->query(new Gt\CssXPath\Translator($path));
|
||||
if ($nodes != false) {
|
||||
foreach ($nodes as $node) {
|
||||
$content .= $doc->saveHtml($node) . "\n";
|
||||
}
|
||||
}
|
||||
$html = trim(sanitizeHTML($content, $url));
|
||||
return $html;
|
||||
} else {
|
||||
throw new Exception();
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
<?php
|
||||
namespace Gt\CssXPath;
|
||||
|
||||
use RuntimeException;
|
||||
|
||||
class CssXPathException extends RuntimeException {}
|
|
@ -0,0 +1,21 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright © PHP.Gt contributors.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,4 @@
|
|||
<?php
|
||||
namespace Gt\CssXPath;
|
||||
|
||||
class NotYetImplementedException extends CssXPathException {}
|
|
@ -0,0 +1,53 @@
|
|||
Translate CSS selectors to XPath queries.
|
||||
=========================================
|
||||
|
||||
A lightweight and dependency free CSS to XPath translator. This repository is used to bring modern DOM functionality like [`querySelectorAll()`][qsa] to PHP in the [PHP.Gt/Dom][gt-dom] project.
|
||||
|
||||
***
|
||||
|
||||
<a href="https://github.com/PhpGt/CssXPath/actions" target="_blank">
|
||||
<img src="https://badge.status.php.gt/cssxpath-build.svg" alt="Build status" />
|
||||
</a>
|
||||
<a href="https://scrutinizer-ci.com/g/PhpGt/CssXPath" target="_blank">
|
||||
<img src="https://badge.status.php.gt/cssxpath-quality.svg" alt="Code quality" />
|
||||
</a>
|
||||
<a href="https://scrutinizer-ci.com/g/PhpGt/CssXPath" target="_blank">
|
||||
<img src="https://badge.status.php.gt/cssxpath-coverage.svg" alt="Code coverage" />
|
||||
</a>
|
||||
<a href="https://packagist.org/packages/PhpGt/CssXPath" target="_blank">
|
||||
<img src="https://badge.status.php.gt/cssxpath-version.svg" alt="Current version" />
|
||||
</a>
|
||||
<a href="http://www.php.gt/cssxpath" target="_blank">
|
||||
<img src="https://badge.status.php.gt/cssxpath-docs.svg" alt="PHP.Gt/CssXPath documentation" />
|
||||
</a>
|
||||
|
||||
Example usage
|
||||
-------------
|
||||
|
||||
|
||||
```php
|
||||
use Gt\CssXPath\Translator;
|
||||
|
||||
$html = <<<HTML
|
||||
<form>
|
||||
<label>
|
||||
Name
|
||||
<input name="name" />
|
||||
</label>
|
||||
<label>
|
||||
Code:
|
||||
<input name="code" />
|
||||
</label>
|
||||
<button name="do" value="submit">Submit code</button>
|
||||
</form>
|
||||
HTML;
|
||||
|
||||
$document = new DOMDocument();
|
||||
$document->loadHTML($html);
|
||||
|
||||
$xpath = new DOMXPath($document);
|
||||
$inputElementList = $xpath->query(new Translator("form>label>input");
|
||||
```
|
||||
|
||||
[qsa]: https://developer.mozilla.org/en-US/docs/Web/API/Document/querySelectorAll
|
||||
[gt-dom]: https://www.php.gt/dom
|
|
@ -0,0 +1,323 @@
|
|||
<?php /** @noinspection HtmlDeprecatedTag */
|
||||
namespace Gt\CssXPath;
|
||||
|
||||
class Translator {
|
||||
const cssRegex =
|
||||
'/'
|
||||
. '(?P<star>\*)'
|
||||
. '|(:(?P<pseudo>[\w-]*))'
|
||||
. '|\(*(?P<pseudospecifier>["\']*[\w\h-]*["\']*)\)'
|
||||
. '|(?P<element>[\w-]*)'
|
||||
. '|(?P<child>\s*>\s*)'
|
||||
. '|(#(?P<id>[\w-]*))'
|
||||
. '|(\.(?P<class>[\w-]*))'
|
||||
. '|(?P<sibling>\s*\+\s*)'
|
||||
. "|(\[(?P<attribute>[\w-]*)((?P<attribute_equals>[=~$]+)(?P<attribute_value>(.+\[\]'?)|[^\]]+))*\])+"
|
||||
. '|(?P<descendant>\s+)'
|
||||
. '/';
|
||||
|
||||
const EQUALS_EXACT = "=";
|
||||
const EQUALS_CONTAINS_WORD = "~=";
|
||||
const EQUALS_ENDS_WITH = "$=";
|
||||
const EQUALS_CONTAINS = "*=";
|
||||
const EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED = "|=";
|
||||
const EQUALS_STARTS_WITH = "^=";
|
||||
|
||||
/** @var string */
|
||||
protected $cssSelector;
|
||||
/** @var string */
|
||||
protected $prefix;
|
||||
|
||||
public function __construct(string $cssSelector, string $prefix = ".//") {
|
||||
$this->cssSelector = $cssSelector;
|
||||
$this->prefix = $prefix;
|
||||
}
|
||||
|
||||
public function __toString():string {
|
||||
return $this->asXPath();
|
||||
}
|
||||
|
||||
public function asXPath():string {
|
||||
return $this->convert($this->cssSelector);
|
||||
}
|
||||
|
||||
protected function convert(string $css):string {
|
||||
$cssArray = preg_split(
|
||||
'/(["\']).*?\1(*SKIP)(*F)|,/',
|
||||
$css
|
||||
);
|
||||
$xPathArray = [];
|
||||
|
||||
foreach($cssArray as $input) {
|
||||
$output = $this->convertSingleSelector(trim($input));
|
||||
$xPathArray []= $output;
|
||||
}
|
||||
|
||||
return implode(" | ", $xPathArray);
|
||||
}
|
||||
|
||||
protected function convertSingleSelector(string $css):string {
|
||||
$thread = $this->preg_match_collated(self::cssRegex, $css);
|
||||
$thread = array_values($thread);
|
||||
|
||||
$xpath = [$this->prefix];
|
||||
$prevType = "";
|
||||
foreach($thread as $threadKey => $currentThreadItem) {
|
||||
$next = isset($thread[$threadKey + 1])
|
||||
? $thread[$threadKey + 1]
|
||||
: false;
|
||||
|
||||
switch ($currentThreadItem["type"]) {
|
||||
case "star":
|
||||
case "element":
|
||||
$xpath []= $currentThreadItem['content'];
|
||||
break;
|
||||
|
||||
case "pseudo":
|
||||
$specifier = "";
|
||||
if ($next && $next["type"] == "pseudospecifier") {
|
||||
$specifier = "{$next['content']}";
|
||||
}
|
||||
|
||||
switch ($currentThreadItem["content"]) {
|
||||
case "disabled":
|
||||
case "checked":
|
||||
case "selected":
|
||||
array_push(
|
||||
$xpath,
|
||||
"[@{$currentThreadItem['content']}]"
|
||||
);
|
||||
break;
|
||||
|
||||
case "text":
|
||||
array_push(
|
||||
$xpath,
|
||||
'[@type="text"]'
|
||||
);
|
||||
break;
|
||||
|
||||
case "contains":
|
||||
if(empty($specifier)) {
|
||||
continue 3;
|
||||
}
|
||||
|
||||
array_push(
|
||||
$xpath,
|
||||
"[contains(text(),$specifier)]"
|
||||
);
|
||||
break;
|
||||
|
||||
case "first-child":
|
||||
$prev = count($xpath) - 1;
|
||||
$xpath[$prev] = '*[1]/self::' . $xpath[$prev];
|
||||
break;
|
||||
|
||||
case "nth-child":
|
||||
if (empty($specifier)) {
|
||||
continue 3;
|
||||
}
|
||||
|
||||
$prev = count($xpath) - 1;
|
||||
$previous = $xpath[$prev];
|
||||
|
||||
if (substr($previous, -1, 1) === "]") {
|
||||
$xpath[$prev] = str_replace(
|
||||
"]",
|
||||
" and position() = $specifier]",
|
||||
$xpath[$prev]
|
||||
);
|
||||
}
|
||||
else {
|
||||
array_push(
|
||||
$xpath,
|
||||
"[$specifier]"
|
||||
);
|
||||
}
|
||||
break;
|
||||
case "nth-of-type":
|
||||
if (empty($specifier)) {
|
||||
continue 3;
|
||||
}
|
||||
|
||||
$prev = count($xpath) - 1;
|
||||
$previous = $xpath[$prev];
|
||||
|
||||
if(substr($previous, -1, 1) === "]") {
|
||||
array_push(
|
||||
$xpath,
|
||||
"[$specifier]"
|
||||
);
|
||||
}
|
||||
else {
|
||||
array_push(
|
||||
$xpath,
|
||||
"[$specifier]"
|
||||
);
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case "child":
|
||||
array_push($xpath, "/");
|
||||
break;
|
||||
|
||||
case "id":
|
||||
array_push(
|
||||
$xpath,
|
||||
($prevType != "element" ? '*' : '')
|
||||
. "[@id='{$currentThreadItem['content']}']"
|
||||
);
|
||||
break;
|
||||
|
||||
case "class":
|
||||
// https://devhints.io/xpath#class-check
|
||||
array_push(
|
||||
$xpath,
|
||||
(($prevType != "element" && $prevType != "class") ? '*' : '')
|
||||
. "[contains(concat(' ',normalize-space(@class),' '),' {$currentThreadItem['content']} ')]"
|
||||
);
|
||||
break;
|
||||
|
||||
case "sibling":
|
||||
array_push(
|
||||
$xpath,
|
||||
"/following-sibling::*[1]/self::"
|
||||
);
|
||||
break;
|
||||
|
||||
case "attribute":
|
||||
if(!$prevType) {
|
||||
array_push($xpath, "*");
|
||||
}
|
||||
|
||||
/** @var null|array<int, array<string, string>> $detail */
|
||||
$detail = $currentThreadItem["detail"] ?? null;
|
||||
$detailType = $detail[0] ?? null;
|
||||
$detailValue = $detail[1] ?? null;
|
||||
|
||||
if(!$detailType
|
||||
|| $detailType["type"] !== "attribute_equals") {
|
||||
array_push(
|
||||
$xpath,
|
||||
"[@{$currentThreadItem['content']}]"
|
||||
);
|
||||
continue 2;
|
||||
}
|
||||
|
||||
$valueString = trim(
|
||||
$detailValue["content"],
|
||||
" '\""
|
||||
);
|
||||
|
||||
$equalsType = $detailType["content"];
|
||||
switch ($equalsType) {
|
||||
case self::EQUALS_EXACT:
|
||||
array_push(
|
||||
$xpath,
|
||||
"[@{$currentThreadItem['content']}=\"{$valueString}\"]"
|
||||
);
|
||||
break;
|
||||
|
||||
case self::EQUALS_CONTAINS:
|
||||
throw new NotYetImplementedException();
|
||||
|
||||
case self::EQUALS_CONTAINS_WORD:
|
||||
array_push(
|
||||
$xpath,
|
||||
"["
|
||||
. "contains("
|
||||
. "concat(\" \",@{$currentThreadItem['content']},\" \"),"
|
||||
. "concat(\" \",\"{$valueString}\",\" \")"
|
||||
. ")"
|
||||
. "]"
|
||||
);
|
||||
break;
|
||||
|
||||
case self::EQUALS_STARTS_WITH_OR_STARTS_WITH_HYPHENATED:
|
||||
throw new NotYetImplementedException();
|
||||
|
||||
case self::EQUALS_STARTS_WITH:
|
||||
throw new NotYetImplementedException();
|
||||
|
||||
case self::EQUALS_ENDS_WITH:
|
||||
array_push(
|
||||
$xpath,
|
||||
"["
|
||||
. "substring("
|
||||
. "@{$currentThreadItem['content']},"
|
||||
. "string-length(@{$currentThreadItem['content']}) - "
|
||||
. "string-length(\"{$valueString}\") + 1)"
|
||||
. "=\"{$valueString}\""
|
||||
. "]"
|
||||
);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case "descendant":
|
||||
array_push($xpath, "//");
|
||||
break;
|
||||
}
|
||||
|
||||
$prevType = $currentThreadItem["type"];
|
||||
}
|
||||
|
||||
return implode("", $xpath);
|
||||
}
|
||||
|
||||
/** @return array<int, array<string, string>> */
|
||||
protected function preg_match_collated(
|
||||
string $regex,
|
||||
string $string,
|
||||
callable $transform = null
|
||||
):array {
|
||||
preg_match_all(
|
||||
$regex,
|
||||
$string,
|
||||
$matches,
|
||||
PREG_PATTERN_ORDER
|
||||
);
|
||||
|
||||
$set = [];
|
||||
foreach($matches[0] as $k => $v) {
|
||||
if(!empty($v)) {
|
||||
$set[$k] = null;
|
||||
}
|
||||
}
|
||||
|
||||
foreach($matches as $k => $m) {
|
||||
if(is_numeric($k)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach($m as $i => $match) {
|
||||
if($match === "") {
|
||||
continue;
|
||||
}
|
||||
|
||||
$toSet = null;
|
||||
|
||||
if($transform) {
|
||||
$toSet = $transform($k, $match);
|
||||
}
|
||||
else {
|
||||
$toSet = ["type" => $k, "content" => $match];
|
||||
}
|
||||
|
||||
if(!isset($set[$i])) {
|
||||
$set[$i] = $toSet;
|
||||
}
|
||||
else {
|
||||
if(!isset($set[$i]["detail"])) {
|
||||
$set[$i]["detail"] = [];
|
||||
}
|
||||
|
||||
array_push($set[$i]["detail"], $toSet);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $set;
|
||||
}
|
||||
}
|
5710
lib/lib_phpQuery.php
5710
lib/lib_phpQuery.php
File diff suppressed because it is too large
Load Diff
|
@ -45,6 +45,8 @@ function classAutoloader($class) {
|
|||
include(LIB_PATH . '/' . str_replace('_', '/', $class) . '.php');
|
||||
} elseif (strpos($class, 'SimplePie') === 0) {
|
||||
include(LIB_PATH . '/SimplePie/' . str_replace('_', '/', $class) . '.php');
|
||||
} elseif (strpos($class, 'CssXPath') !== false) {
|
||||
include(LIB_PATH . '/CssXPath/' . basename(str_replace('\\', '/', $class)) . '.php');
|
||||
} elseif (strpos($class, 'PHPMailer') === 0) {
|
||||
include(LIB_PATH . '/' . str_replace('\\', '/', $class) . '.php');
|
||||
}
|
||||
|
|
|
@ -3,10 +3,10 @@
|
|||
<description>Created with the PHP Coding Standard Generator. https://edorian.github.com/php-coding-standard-generator/</description>
|
||||
<arg name="extensions" value="php,phtml,css,js"/>
|
||||
<arg name="tab-width" value="4"/>
|
||||
<exclude-pattern>./lib/CssXPath/</exclude-pattern>
|
||||
<exclude-pattern>./lib/SimplePie/</exclude-pattern>
|
||||
<exclude-pattern>./lib/PHPMailer/</exclude-pattern>
|
||||
<exclude-pattern>./lib/http-conditional.php</exclude-pattern>
|
||||
<exclude-pattern>./lib/lib_phpQuery.php</exclude-pattern>
|
||||
<exclude-pattern>./node_modules/</exclude-pattern>
|
||||
<exclude-pattern>./data/config.php</exclude-pattern>
|
||||
<exclude-pattern>./data/users/*/config.php</exclude-pattern>
|
||||
|
|
|
@ -7,7 +7,6 @@ parameters:
|
|||
paths:
|
||||
- .
|
||||
excludePaths:
|
||||
- lib/lib_phpQuery.php
|
||||
- lib/PHPMailer/*
|
||||
- lib/SimplePie/*
|
||||
- node_modules/*
|
||||
|
@ -20,6 +19,5 @@ parameters:
|
|||
bootstrapFiles:
|
||||
- cli/_cli.php
|
||||
- lib/favicons.php
|
||||
- lib/lib_phpQuery.php
|
||||
- lib/SimplePie/SimplePie.php
|
||||
- app/SQL/install.sql.sqlite.php
|
||||
|
|
Loading…
Reference in New Issue