Merge pull request #7837 from wallabag/use-domcrawler-component

Use DomCrawler component
This commit is contained in:
Yassine Guedidi 2024-11-20 09:26:02 +01:00 committed by GitHub
commit 1321b2ce66
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 36 additions and 53 deletions

View File

@ -3,6 +3,7 @@
namespace Wallabag\ExpressionLanguage; namespace Wallabag\ExpressionLanguage;
use GuzzleHttp\ClientInterface; use GuzzleHttp\ClientInterface;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\ExpressionLanguage\ExpressionFunction; use Symfony\Component\ExpressionLanguage\ExpressionFunction;
use Symfony\Component\ExpressionLanguage\ExpressionFunctionProviderInterface; use Symfony\Component\ExpressionLanguage\ExpressionFunctionProviderInterface;
@ -69,27 +70,19 @@ class AuthenticatorProvider implements ExpressionFunctionProviderInterface
throw new \Exception('Not supported'); throw new \Exception('Not supported');
}, },
function (array $arguments, $xpathQuery, $html) { function (array $arguments, $xpathQuery, $html) {
$useInternalErrors = libxml_use_internal_errors(true); try {
$crawler = new Crawler((string) $html);
$doc = new \DOMDocument(); $crawler = $crawler->filterXPath($xpathQuery);
$doc->loadHTML((string) $html, \LIBXML_NOCDATA | \LIBXML_NOWARNING | \LIBXML_NOERROR); } catch (\Throwable $e) {
$xpath = new \DOMXPath($doc);
$domNodeList = $xpath->query($xpathQuery);
if (0 === $domNodeList->length) {
return ''; return '';
} }
$domNode = $domNodeList->item(0); if (0 === $crawler->count()) {
libxml_use_internal_errors($useInternalErrors);
if (null === $domNode || null === $domNode->attributes) {
return ''; return '';
} }
return $domNode->attributes->getNamedItem('value')->nodeValue; return (string) $crawler->first()->attr('value');
} }
); );
} }

View File

@ -2,6 +2,7 @@
namespace Wallabag\Import; namespace Wallabag\Import;
use Symfony\Component\DomCrawler\Crawler;
use Wallabag\Entity\Entry; use Wallabag\Entity\Entry;
use Wallabag\Event\EntrySavedEvent; use Wallabag\Event\EntrySavedEvent;
@ -29,27 +30,23 @@ abstract class HtmlImport extends AbstractImport
return false; return false;
} }
$html = new \DOMDocument(); $crawler = new Crawler(file_get_contents($this->filepath));
libxml_use_internal_errors(true); $hrefs = $crawler->filterXPath('//a');
$html->loadHTMLFile($this->filepath);
$hrefs = $html->getElementsByTagName('a');
libxml_use_internal_errors(false);
if (0 === $hrefs->length) { if (0 === $hrefs->count()) {
$this->logger->error('Wallabag HTML: no entries in imported file'); $this->logger->error('Wallabag HTML: no entries in imported file');
return false; return false;
} }
$entries = []; $entries = $hrefs->each(function (Crawler $node) {
foreach ($hrefs as $href) { return [
$entry = []; 'url' => $node->attr('href'),
$entry['url'] = $href->getAttribute('href'); 'tags' => $node->attr('tags'),
$entry['tags'] = $href->getAttribute('tags'); 'created_at' => $node->attr('add_date'),
$entry['created_at'] = $href->getAttribute('add_date'); ];
$entries[] = $entry; });
}
if ($this->producer) { if ($this->producer) {
$this->parseEntriesForProducer($entries); $this->parseEntriesForProducer($entries);

View File

@ -2,6 +2,8 @@
namespace Wallabag\Import; namespace Wallabag\Import;
use Symfony\Component\DomCrawler\Crawler;
class PocketHtmlImport extends HtmlImport class PocketHtmlImport extends HtmlImport
{ {
protected $filepath; protected $filepath;
@ -44,27 +46,23 @@ class PocketHtmlImport extends HtmlImport
return false; return false;
} }
$html = new \DOMDocument(); $crawler = new Crawler(file_get_contents($this->filepath));
libxml_use_internal_errors(true); $hrefs = $crawler->filterXPath('//a');
$html->loadHTMLFile($this->filepath);
$hrefs = $html->getElementsByTagName('a');
libxml_use_internal_errors(false);
if (0 === $hrefs->length) { if (0 === $hrefs->count()) {
$this->logger->error('Pocket HTML: no entries in imported file'); $this->logger->error('Pocket HTML: no entries in imported file');
return false; return false;
} }
$entries = []; $entries = $hrefs->each(function (Crawler $node) {
foreach ($hrefs as $href) { return [
$entry = []; 'url' => $node->attr('href'),
$entry['url'] = $href->getAttribute('href'); 'tags' => $node->attr('tags'),
$entry['tags'] = $href->getAttribute('tags'); 'created_at' => $node->attr('time_added'),
$entry['created_at'] = $href->getAttribute('time_added'); ];
$entries[] = $entry; });
}
if ($this->producer) { if ($this->producer) {
$this->parseEntriesForProducer($entries); $this->parseEntriesForProducer($entries);

View File

@ -4,6 +4,7 @@ namespace Wallabag\SiteConfig\Authenticator;
use GuzzleHttp\ClientInterface; use GuzzleHttp\ClientInterface;
use GuzzleHttp\Cookie\CookieJar; use GuzzleHttp\Cookie\CookieJar;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\ExpressionLanguage\ExpressionLanguage; use Symfony\Component\ExpressionLanguage\ExpressionLanguage;
use Wallabag\ExpressionLanguage\AuthenticatorProvider; use Wallabag\ExpressionLanguage\AuthenticatorProvider;
use Wallabag\SiteConfig\SiteConfig; use Wallabag\SiteConfig\SiteConfig;
@ -54,22 +55,16 @@ class LoginFormAuthenticator implements Authenticator
public function isLoginRequired($html) public function isLoginRequired($html)
{ {
$useInternalErrors = libxml_use_internal_errors(true);
// need to check for the login dom element ($options['not_logged_in_xpath']) in the HTML // need to check for the login dom element ($options['not_logged_in_xpath']) in the HTML
$doc = new \DOMDocument(); try {
$doc->loadHTML($html); $crawler = new Crawler((string) $html);
$xpath = new \DOMXPath($doc); $loggedIn = $crawler->evaluate((string) $this->siteConfig->getNotLoggedInXpath());
$loggedIn = $xpath->evaluate((string) $this->siteConfig->getNotLoggedInXpath()); } catch (\Throwable $e) {
if (false === $loggedIn) {
return false; return false;
} }
libxml_use_internal_errors($useInternalErrors); return \count($loggedIn) > 0;
return $loggedIn->length > 0;
} }
/** /**