mirror of
https://github.com/wallabag/wallabag.git
synced 2024-12-13 17:09:07 +01:00
More robust srcset image attribute handling
Linked to HTMLawed PR https://github.com/kesar/HTMLawed/pull/17
This commit is contained in:
parent
0f36a88e16
commit
e6f12c0734
@ -185,7 +185,7 @@ class DownloadImages
|
|||||||
*
|
*
|
||||||
* @return array An array of urls
|
* @return array An array of urls
|
||||||
*/
|
*/
|
||||||
protected function getSrcsetUrls(Crawler $imagesCrawler)
|
private function getSrcsetUrls(Crawler $imagesCrawler)
|
||||||
{
|
{
|
||||||
$urls = [];
|
$urls = [];
|
||||||
$iterator = $imagesCrawler
|
$iterator = $imagesCrawler
|
||||||
@ -193,9 +193,14 @@ class DownloadImages
|
|||||||
while ($iterator->valid()) {
|
while ($iterator->valid()) {
|
||||||
$srcsetAttribute = $iterator->current()->getAttribute('srcset');
|
$srcsetAttribute = $iterator->current()->getAttribute('srcset');
|
||||||
if ('' !== $srcsetAttribute) {
|
if ('' !== $srcsetAttribute) {
|
||||||
$srcset = array_map('trim', explode(',', $srcsetAttribute));
|
// Couldn't start with " OR ' OR a white space
|
||||||
|
// Could be one or more white space
|
||||||
|
// Must be one or more digits followed by w OR x
|
||||||
|
$pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
|
||||||
|
preg_match_all($pattern, $srcsetAttribute, $matches);
|
||||||
|
$srcset = call_user_func_array('array_merge', $matches);
|
||||||
$srcsetUrls = array_map(function ($src) {
|
$srcsetUrls = array_map(function ($src) {
|
||||||
return explode(' ', $src)[0];
|
return trim(explode(' ', $src, 2)[0]);
|
||||||
}, $srcset);
|
}, $srcset);
|
||||||
$urls = array_merge($srcsetUrls, $urls);
|
$urls = array_merge($srcsetUrls, $urls);
|
||||||
}
|
}
|
||||||
|
@ -205,6 +205,31 @@ class DownloadImagesTest extends TestCase
|
|||||||
$this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced');
|
$this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testProcessImageWithTrickySrcset()
|
||||||
|
{
|
||||||
|
$client = new Client();
|
||||||
|
|
||||||
|
$mock = new Mock([
|
||||||
|
new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
|
||||||
|
new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
|
||||||
|
new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
|
||||||
|
]);
|
||||||
|
|
||||||
|
$client->getEmitter()->attach($mock);
|
||||||
|
|
||||||
|
$logHandler = new TestHandler();
|
||||||
|
$logger = new Logger('test', [$logHandler]);
|
||||||
|
|
||||||
|
$download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
|
||||||
|
$res = $download->processHtml(123, '<figure id="post-257260" class="align-none media-257260"><img src="https://cdn.css-tricks.com/wp-content/uploads/2017/08/the-critical-request.png" srcset="https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_1000,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 1000w, https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_200,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 200w" sizes="(min-width: 1850px) calc( (100vw - 555px) / 3 )
|
||||||
|
(min-width: 1251px) calc( (100vw - 530px) / 2 )
|
||||||
|
(min-width: 1086px) calc(100vw - 480px)
|
||||||
|
(min-width: 626px) calc(100vw - 335px)
|
||||||
|
calc(100vw - 30px)" alt="" /></figure>', 'https://css-tricks.com/the-critical-request/');
|
||||||
|
|
||||||
|
$this->assertNotContains('f_auto,q_auto', $res, 'Image srcset attribute were not replaced');
|
||||||
|
}
|
||||||
|
|
||||||
public function testProcessImageWithNullPath()
|
public function testProcessImageWithNullPath()
|
||||||
{
|
{
|
||||||
$client = new Client();
|
$client = new Client();
|
||||||
|
Loading…
Reference in New Issue
Block a user