2015-09-10 21:57:25 +02:00
< ? php
2024-02-19 01:30:12 +01:00
namespace Tests\Wallabag\Helper ;
2015-09-10 21:57:25 +02:00
2017-07-01 09:52:38 +02:00
use Graby\Graby ;
2017-05-30 17:48:24 +02:00
use Monolog\Handler\TestHandler ;
2017-07-01 09:52:38 +02:00
use Monolog\Logger ;
2017-12-16 22:17:42 +01:00
use PHPUnit\Framework\TestCase ;
2017-07-01 09:52:38 +02:00
use Psr\Log\NullLogger ;
use Symfony\Component\Validator\ConstraintViolation ;
use Symfony\Component\Validator\ConstraintViolationList ;
use Symfony\Component\Validator\Validator\RecursiveValidator ;
2024-02-19 01:30:12 +01:00
use Wallabag\Entity\Entry ;
use Wallabag\Entity\User ;
use Wallabag\Helper\ContentProxy ;
use Wallabag\Helper\RuleBasedIgnoreOriginProcessor ;
use Wallabag\Helper\RuleBasedTagger ;
2015-09-10 21:57:25 +02:00
2017-12-16 22:17:42 +01:00
class ContentProxyTest extends TestCase
2015-09-10 21:57:25 +02:00
{
2017-05-24 12:57:46 +02:00
private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.' ;
2016-12-03 09:44:34 -05:00
2016-03-27 20:35:56 +02:00
public function testWithBadUrl ()
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2016-04-12 11:36:01 +02:00
-> setMethods ([ 'fetchContent' ])
2016-03-27 20:35:56 +02:00
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
2016-04-12 11:36:01 +02:00
-> willReturn ([
2016-03-27 20:35:56 +02:00
'html' => false ,
'title' => '' ,
'url' => '' ,
2019-02-11 10:59:56 +01:00
'headers' => [
'content-type' => '' ,
],
2016-03-27 20:35:56 +02:00
'language' => '' ,
2016-04-12 11:36:01 +02:00
]);
2016-03-27 20:35:56 +02:00
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2016-12-06 22:17:44 -05:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://user@:80' );
2016-03-27 20:35:56 +02:00
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://user@:80' , $entry -> getUrl ());
2016-03-27 20:35:56 +02:00
$this -> assertEmpty ( $entry -> getTitle ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( $this -> fetchingErrorMessage , $entry -> getContent ());
2016-03-27 20:35:56 +02:00
$this -> assertEmpty ( $entry -> getPreviewPicture ());
$this -> assertEmpty ( $entry -> getMimetype ());
$this -> assertEmpty ( $entry -> getLanguage ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 0.0 , $entry -> getReadingTime ());
2017-11-21 10:37:36 +01:00
$this -> assertNull ( $entry -> getDomainName ());
2023-07-28 14:58:43 +02:00
$this -> assertTrue ( $entry -> isNotParsed ());
2016-03-27 20:35:56 +02:00
}
2015-09-10 21:57:25 +02:00
public function testWithEmptyContent ()
{
2015-10-11 22:27:47 +02:00
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2016-04-12 11:36:01 +02:00
-> setMethods ([ 'fetchContent' ])
2015-09-10 21:57:25 +02:00
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
2016-04-12 11:36:01 +02:00
-> willReturn ([
2015-09-20 22:37:27 +02:00
'html' => false ,
'title' => '' ,
'url' => '' ,
2019-02-11 10:59:56 +01:00
'headers' => [
'content-type' => '' ,
],
2015-09-20 22:37:27 +02:00
'language' => '' ,
2016-04-12 11:36:01 +02:00
]);
2015-09-10 21:57:25 +02:00
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2016-12-06 22:17:44 -05:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
2015-09-10 21:57:25 +02:00
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://0.0.0.0' , $entry -> getUrl ());
2015-09-10 21:57:25 +02:00
$this -> assertEmpty ( $entry -> getTitle ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( $this -> fetchingErrorMessage , $entry -> getContent ());
2015-09-10 21:57:25 +02:00
$this -> assertEmpty ( $entry -> getPreviewPicture ());
$this -> assertEmpty ( $entry -> getMimetype ());
2015-09-20 22:37:27 +02:00
$this -> assertEmpty ( $entry -> getLanguage ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 0.0 , $entry -> getReadingTime ());
$this -> assertSame ( '0.0.0.0' , $entry -> getDomainName ());
2023-07-28 14:58:43 +02:00
$this -> assertTrue ( $entry -> isNotParsed ());
2015-09-10 21:57:25 +02:00
}
public function testWithEmptyContentButOG ()
{
2015-10-11 22:27:47 +02:00
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2016-04-12 11:36:01 +02:00
-> setMethods ([ 'fetchContent' ])
2015-09-10 21:57:25 +02:00
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
2016-04-12 11:36:01 +02:00
-> willReturn ([
2015-09-20 22:37:27 +02:00
'html' => false ,
2017-11-11 20:04:15 +01:00
'title' => 'my title' ,
2015-09-20 22:37:27 +02:00
'url' => '' ,
2019-02-11 10:59:56 +01:00
'headers' => [
'content-type' => '' ,
],
2015-09-20 22:37:27 +02:00
'language' => '' ,
2016-11-18 15:09:21 +01:00
'status' => '' ,
2017-11-11 20:04:15 +01:00
'description' => 'desc' ,
2016-04-12 11:36:01 +02:00
]);
2015-09-10 21:57:25 +02:00
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2016-12-06 22:17:44 -05:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://domain.io' );
2015-09-10 21:57:25 +02:00
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://domain.io' , $entry -> getUrl ());
$this -> assertSame ( 'my title' , $entry -> getTitle ());
$this -> assertSame ( $this -> fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc' , $entry -> getContent ());
2015-09-10 21:57:25 +02:00
$this -> assertEmpty ( $entry -> getPreviewPicture ());
2015-09-20 22:37:27 +02:00
$this -> assertEmpty ( $entry -> getLanguage ());
2016-11-18 15:09:21 +01:00
$this -> assertEmpty ( $entry -> getHttpStatus ());
2015-09-10 21:57:25 +02:00
$this -> assertEmpty ( $entry -> getMimetype ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 0.0 , $entry -> getReadingTime ());
$this -> assertSame ( 'domain.io' , $entry -> getDomainName ());
2023-07-28 14:58:43 +02:00
$this -> assertTrue ( $entry -> isNotParsed ());
2015-09-10 21:57:25 +02:00
}
public function testWithContent ()
{
2015-10-11 22:27:47 +02:00
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
$ruleBasedIgnoreOriginProcessor -> expects ( $this -> once ())
-> method ( 'process' );
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2016-04-12 11:36:01 +02:00
-> setMethods ([ 'fetchContent' ])
2015-09-10 21:57:25 +02:00
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
2016-04-12 11:36:01 +02:00
-> willReturn ([
2015-09-28 19:35:33 +02:00
'html' => str_repeat ( 'this is my content' , 325 ),
2015-09-10 21:57:25 +02:00
'title' => 'this is my title' ,
'url' => 'http://1.1.1.1' ,
2015-09-20 22:37:27 +02:00
'language' => 'fr' ,
2016-11-18 15:09:21 +01:00
'status' => '200' ,
2017-11-11 20:04:15 +01:00
'description' => 'OG desc' ,
'image' => 'http://3.3.3.3/cover.jpg' ,
'headers' => [
'content-type' => 'text/html' ,
2016-04-12 11:36:01 +02:00
],
]);
2015-09-10 21:57:25 +02:00
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2016-12-06 22:17:44 -05:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
2015-09-10 21:57:25 +02:00
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://1.1.1.1' , $entry -> getUrl ());
$this -> assertSame ( 'this is my title' , $entry -> getTitle ());
2020-06-15 13:37:50 +02:00
$this -> assertStringContainsString ( 'content' , $entry -> getContent ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://3.3.3.3/cover.jpg' , $entry -> getPreviewPicture ());
$this -> assertSame ( 'text/html' , $entry -> getMimetype ());
$this -> assertSame ( 'fr' , $entry -> getLanguage ());
$this -> assertSame ( '200' , $entry -> getHttpStatus ());
$this -> assertSame ( 4.0 , $entry -> getReadingTime ());
$this -> assertSame ( '1.1.1.1' , $entry -> getDomainName ());
2023-07-28 14:58:43 +02:00
$this -> assertFalse ( $entry -> isNotParsed ());
2015-09-10 21:57:25 +02:00
}
2015-10-11 22:27:47 +02:00
2017-01-10 17:42:34 +01:00
public function testWithContentAndNoOgImage ()
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
$ruleBasedIgnoreOriginProcessor -> expects ( $this -> once ())
-> method ( 'process' );
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2017-01-10 17:42:34 +01:00
-> setMethods ([ 'fetchContent' ])
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
-> willReturn ([
'html' => str_repeat ( 'this is my content' , 325 ),
'title' => 'this is my title' ,
'url' => 'http://1.1.1.1' ,
'language' => 'fr' ,
'status' => '200' ,
2017-11-11 20:04:15 +01:00
'description' => 'OG desc' ,
'image' => null ,
'headers' => [
'content-type' => 'text/html' ,
2017-01-10 17:42:34 +01:00
],
]);
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2017-05-30 17:48:24 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
2017-01-10 17:42:34 +01:00
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://1.1.1.1' , $entry -> getUrl ());
$this -> assertSame ( 'this is my title' , $entry -> getTitle ());
2020-06-15 13:37:50 +02:00
$this -> assertStringContainsString ( 'content' , $entry -> getContent ());
2017-06-12 16:46:33 +02:00
$this -> assertNull ( $entry -> getPreviewPicture ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'text/html' , $entry -> getMimetype ());
$this -> assertSame ( 'fr' , $entry -> getLanguage ());
$this -> assertSame ( '200' , $entry -> getHttpStatus ());
$this -> assertSame ( 4.0 , $entry -> getReadingTime ());
$this -> assertSame ( '1.1.1.1' , $entry -> getDomainName ());
2023-07-28 14:58:43 +02:00
$this -> assertFalse ( $entry -> isNotParsed ());
2017-06-08 21:51:46 +02:00
}
2019-05-13 21:56:52 +02:00
public function testWithContentAndContentImage ()
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
$ruleBasedIgnoreOriginProcessor -> expects ( $this -> once ())
-> method ( 'process' );
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2019-05-13 21:56:52 +02:00
-> setMethods ([ 'fetchContent' ])
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
-> willReturn ([
'html' => " <h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p> " ,
'title' => 'this is my title' ,
'url' => 'http://1.1.1.1' ,
2019-05-28 12:02:17 +02:00
'headers' => [
'content-type' => 'text/html' ,
],
2019-05-13 21:56:52 +02:00
'language' => 'fr' ,
'status' => '200' ,
2019-05-28 12:02:17 +02:00
'image' => null ,
2019-05-13 21:56:52 +02:00
]);
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2019-05-13 21:56:52 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
$this -> assertSame ( 'http://1.1.1.1' , $entry -> getUrl ());
$this -> assertSame ( 'this is my title' , $entry -> getTitle ());
$this -> assertSame ( " <h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p> " , $entry -> getContent ());
$this -> assertSame ( 'http://3.3.3.3/cover.jpg' , $entry -> getPreviewPicture ());
$this -> assertSame ( 'text/html' , $entry -> getMimetype ());
$this -> assertSame ( 'fr' , $entry -> getLanguage ());
$this -> assertSame ( '200' , $entry -> getHttpStatus ());
$this -> assertSame ( 0.0 , $entry -> getReadingTime ());
$this -> assertSame ( '1.1.1.1' , $entry -> getDomainName ());
2023-07-28 14:58:43 +02:00
$this -> assertFalse ( $entry -> isNotParsed ());
2019-05-13 21:56:52 +02:00
}
public function testWithContentImageAndOgImage ()
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
$ruleBasedIgnoreOriginProcessor -> expects ( $this -> once ())
-> method ( 'process' );
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2019-05-13 21:56:52 +02:00
-> setMethods ([ 'fetchContent' ])
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
-> willReturn ([
'html' => " <h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p> " ,
'title' => 'this is my title' ,
'url' => 'http://1.1.1.1' ,
2019-05-28 12:02:17 +02:00
'headers' => [
'content-type' => 'text/html' ,
],
2019-05-13 21:56:52 +02:00
'language' => 'fr' ,
'status' => '200' ,
2019-05-28 12:02:17 +02:00
'image' => 'http://3.3.3.3/cover.jpg' ,
2019-05-13 21:56:52 +02:00
]);
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2019-05-13 21:56:52 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
$this -> assertSame ( 'http://1.1.1.1' , $entry -> getUrl ());
$this -> assertSame ( 'this is my title' , $entry -> getTitle ());
$this -> assertSame ( " <h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p> " , $entry -> getContent ());
$this -> assertSame ( 'http://3.3.3.3/cover.jpg' , $entry -> getPreviewPicture ());
$this -> assertSame ( 'text/html' , $entry -> getMimetype ());
$this -> assertSame ( 'fr' , $entry -> getLanguage ());
$this -> assertSame ( '200' , $entry -> getHttpStatus ());
$this -> assertSame ( 0.0 , $entry -> getReadingTime ());
$this -> assertSame ( '1.1.1.1' , $entry -> getDomainName ());
2023-07-28 14:58:43 +02:00
$this -> assertFalse ( $entry -> isNotParsed ());
2019-05-13 21:56:52 +02:00
}
2017-06-08 21:51:46 +02:00
public function testWithContentAndBadLanguage ()
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
$ruleBasedIgnoreOriginProcessor -> expects ( $this -> once ())
-> method ( 'process' );
2017-12-18 10:14:00 +01:00
$validator = $this -> getValidator ( false );
2017-06-30 16:54:26 +02:00
$validator -> expects ( $this -> once ())
2017-06-08 21:51:46 +02:00
-> method ( 'validate' )
2017-06-30 16:54:26 +02:00
-> willReturn ( new ConstraintViolationList ([ new ConstraintViolation ( 'oops' , 'oops' , [], 'oops' , 'language' , 'dontexist' )]));
2017-06-08 21:51:46 +02:00
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2017-06-08 21:51:46 +02:00
-> setMethods ([ 'fetchContent' ])
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
-> willReturn ([
'html' => str_repeat ( 'this is my content' , 325 ),
'title' => 'this is my title' ,
'url' => 'http://1.1.1.1' ,
'language' => 'dontexist' ,
'status' => '200' ,
2017-11-11 20:04:15 +01:00
'headers' => [
'content-type' => 'text/html' ,
],
2017-06-08 21:51:46 +02:00
]);
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $validator , $this -> getLogger (), $this -> fetchingErrorMessage );
2017-06-08 21:51:46 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://1.1.1.1' , $entry -> getUrl ());
$this -> assertSame ( 'this is my title' , $entry -> getTitle ());
2020-06-15 13:37:50 +02:00
$this -> assertStringContainsString ( 'content' , $entry -> getContent ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'text/html' , $entry -> getMimetype ());
2017-06-12 16:46:33 +02:00
$this -> assertNull ( $entry -> getLanguage ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( '200' , $entry -> getHttpStatus ());
$this -> assertSame ( 4.0 , $entry -> getReadingTime ());
$this -> assertSame ( '1.1.1.1' , $entry -> getDomainName ());
2023-07-28 14:58:43 +02:00
$this -> assertFalse ( $entry -> isNotParsed ());
2017-06-08 21:51:46 +02:00
}
public function testWithContentAndBadOgImage ()
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
$ruleBasedIgnoreOriginProcessor -> expects ( $this -> once ())
-> method ( 'process' );
2017-12-18 10:14:00 +01:00
$validator = $this -> getValidator ( false );
2017-06-08 21:51:46 +02:00
$validator -> expects ( $this -> exactly ( 2 ))
-> method ( 'validate' )
-> will ( $this -> onConsecutiveCalls (
new ConstraintViolationList (),
new ConstraintViolationList ([ new ConstraintViolation ( 'oops' , 'oops' , [], 'oops' , 'url' , 'https://' )])
));
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2017-06-08 21:51:46 +02:00
-> setMethods ([ 'fetchContent' ])
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
-> willReturn ([
'html' => str_repeat ( 'this is my content' , 325 ),
'title' => 'this is my title' ,
'url' => 'http://1.1.1.1' ,
2019-02-11 10:59:56 +01:00
'headers' => [
'content-type' => 'text/html' ,
],
2017-06-08 21:51:46 +02:00
'language' => 'fr' ,
'status' => '200' ,
2017-11-11 20:04:15 +01:00
'description' => 'OG desc' ,
'image' => 'https://' ,
2017-06-08 21:51:46 +02:00
]);
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $validator , $this -> getLogger (), $this -> fetchingErrorMessage );
2017-06-08 21:51:46 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://1.1.1.1' , $entry -> getUrl ());
$this -> assertSame ( 'this is my title' , $entry -> getTitle ());
2020-06-15 13:37:50 +02:00
$this -> assertStringContainsString ( 'content' , $entry -> getContent ());
2017-06-12 16:46:33 +02:00
$this -> assertNull ( $entry -> getPreviewPicture ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'text/html' , $entry -> getMimetype ());
$this -> assertSame ( 'fr' , $entry -> getLanguage ());
$this -> assertSame ( '200' , $entry -> getHttpStatus ());
$this -> assertSame ( 4.0 , $entry -> getReadingTime ());
$this -> assertSame ( '1.1.1.1' , $entry -> getDomainName ());
2023-07-28 14:58:43 +02:00
$this -> assertFalse ( $entry -> isNotParsed ());
2017-01-10 17:42:34 +01:00
}
2016-03-27 20:35:56 +02:00
public function testWithForcedContent ()
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
$ruleBasedIgnoreOriginProcessor -> expects ( $this -> once ())
-> method ( 'process' );
2024-01-01 19:11:01 +01:00
$proxy = new ContentProxy ( new Graby (), $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage , true );
2016-12-06 22:17:44 -05:00
$entry = new Entry ( new User ());
$proxy -> updateEntry (
$entry ,
2017-05-16 23:11:20 +02:00
'http://0.0.0.0' ,
[
'html' => str_repeat ( 'this is my content' , 325 ),
'title' => 'this is my title' ,
'url' => 'http://1.1.1.1' ,
'language' => 'fr' ,
2017-05-24 16:44:03 +02:00
'date' => '1395635872' ,
'authors' => [ 'Jeremy' , 'Nico' , 'Thomas' ],
2017-11-11 20:04:15 +01:00
'headers' => [
'cache-control' => 'no-cache' ,
'content-type' => 'text/html' ,
2017-05-29 10:14:01 +02:00
],
2017-05-16 23:11:20 +02:00
]
);
2016-03-27 20:35:56 +02:00
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://1.1.1.1' , $entry -> getUrl ());
$this -> assertSame ( 'this is my title' , $entry -> getTitle ());
2020-06-15 13:37:50 +02:00
$this -> assertStringContainsString ( 'content' , $entry -> getContent ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'text/html' , $entry -> getMimetype ());
$this -> assertSame ( 'fr' , $entry -> getLanguage ());
$this -> assertSame ( 4.0 , $entry -> getReadingTime ());
$this -> assertSame ( '1.1.1.1' , $entry -> getDomainName ());
$this -> assertSame ( '24/03/2014' , $entry -> getPublishedAt () -> format ( 'd/m/Y' ));
2017-05-24 16:44:03 +02:00
$this -> assertContains ( 'Jeremy' , $entry -> getPublishedBy ());
$this -> assertContains ( 'Nico' , $entry -> getPublishedBy ());
$this -> assertContains ( 'Thomas' , $entry -> getPublishedBy ());
2017-11-21 10:37:36 +01:00
$this -> assertNotNull ( $entry -> getHeaders (), 'Headers are stored, so value is not null' );
2017-05-24 16:44:03 +02:00
$this -> assertContains ( 'no-cache' , $entry -> getHeaders ());
2023-07-28 14:58:43 +02:00
$this -> assertFalse ( $entry -> isNotParsed ());
2017-05-24 16:44:03 +02:00
}
2022-05-08 14:12:35 +02:00
public function testWithForcedContentAndDateTime ()
2017-05-24 16:44:03 +02:00
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2017-05-30 17:48:24 +02:00
$logHandler = new TestHandler ();
2017-06-01 11:31:45 +02:00
$logger = new Logger ( 'test' , [ $logHandler ]);
2017-05-30 17:48:24 +02:00
2024-01-01 19:11:01 +01:00
$proxy = new ContentProxy ( new Graby (), $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $logger , $this -> fetchingErrorMessage );
2016-12-06 22:17:44 -05:00
$entry = new Entry ( new User ());
2017-06-01 11:31:45 +02:00
$proxy -> updateEntry (
2016-12-06 22:17:44 -05:00
$entry ,
2017-06-01 11:31:45 +02:00
'http://1.1.1.1' ,
2017-05-24 16:44:03 +02:00
[
'html' => str_repeat ( 'this is my content' , 325 ),
'title' => 'this is my title' ,
'url' => 'http://1.1.1.1' ,
'language' => 'fr' ,
'date' => '2016-09-08T11:55:58+0200' ,
2017-11-11 20:04:15 +01:00
'headers' => [
'content-type' => 'text/html' ,
],
2017-05-24 16:44:03 +02:00
]
);
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://1.1.1.1' , $entry -> getUrl ());
$this -> assertSame ( 'this is my title' , $entry -> getTitle ());
2020-06-15 13:37:50 +02:00
$this -> assertStringContainsString ( 'content' , $entry -> getContent ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'text/html' , $entry -> getMimetype ());
$this -> assertSame ( 'fr' , $entry -> getLanguage ());
$this -> assertSame ( 4.0 , $entry -> getReadingTime ());
$this -> assertSame ( '1.1.1.1' , $entry -> getDomainName ());
$this -> assertSame ( '08/09/2016' , $entry -> getPublishedAt () -> format ( 'd/m/Y' ));
2023-07-28 14:58:43 +02:00
$this -> assertFalse ( $entry -> isNotParsed ());
2017-05-24 16:44:03 +02:00
}
public function testWithForcedContentAndBadDate ()
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2017-05-24 16:44:03 +02:00
$logger = new Logger ( 'foo' );
$handler = new TestHandler ();
$logger -> pushHandler ( $handler );
2024-01-01 19:11:01 +01:00
$proxy = new ContentProxy ( new Graby (), $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $logger , $this -> fetchingErrorMessage );
2016-12-06 22:17:44 -05:00
$entry = new Entry ( new User ());
$proxy -> updateEntry (
$entry ,
2017-06-01 11:31:45 +02:00
'http://1.1.1.1' ,
2017-05-24 16:44:03 +02:00
[
'html' => str_repeat ( 'this is my content' , 325 ),
'title' => 'this is my title' ,
'url' => 'http://1.1.1.1' ,
'language' => 'fr' ,
'date' => '01 02 2012' ,
2017-11-11 20:04:15 +01:00
'headers' => [
'content-type' => 'text/html' ,
],
2017-05-24 16:44:03 +02:00
]
);
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://1.1.1.1' , $entry -> getUrl ());
$this -> assertSame ( 'this is my title' , $entry -> getTitle ());
2020-06-15 13:37:50 +02:00
$this -> assertStringContainsString ( 'content' , $entry -> getContent ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'text/html' , $entry -> getMimetype ());
$this -> assertSame ( 'fr' , $entry -> getLanguage ());
$this -> assertSame ( 4.0 , $entry -> getReadingTime ());
$this -> assertSame ( '1.1.1.1' , $entry -> getDomainName ());
2017-05-24 16:44:03 +02:00
$this -> assertNull ( $entry -> getPublishedAt ());
2023-07-28 14:58:43 +02:00
$this -> assertFalse ( $entry -> isNotParsed ());
2017-05-24 16:44:03 +02:00
$records = $handler -> getRecords ();
2019-05-21 20:10:57 +02:00
$this -> assertCount ( 3 , $records );
2020-06-15 13:37:50 +02:00
$this -> assertStringContainsString ( 'Error while defining date' , $records [ 0 ][ 'message' ]);
2016-03-27 20:35:56 +02:00
}
public function testTaggerThrowException ()
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' )
-> will ( $this -> throwException ( new \Exception ()));
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2024-01-01 19:11:01 +01:00
$proxy = new ContentProxy ( new Graby (), $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2016-12-06 22:17:44 -05:00
$entry = new Entry ( new User ());
2017-06-01 11:31:45 +02:00
$proxy -> updateEntry (
$entry ,
'http://1.1.1.1' ,
[
'html' => str_repeat ( 'this is my content' , 325 ),
'title' => 'this is my title' ,
'url' => 'http://1.1.1.1' ,
'language' => 'fr' ,
2017-11-11 20:04:15 +01:00
'headers' => [
'content-type' => 'text/html' ,
],
2017-06-01 11:31:45 +02:00
]
2016-12-07 15:16:49 -05:00
);
2016-03-27 20:35:56 +02:00
$this -> assertCount ( 0 , $entry -> getTags ());
}
2017-05-12 07:53:21 +02:00
public function dataForCrazyHtml ()
{
return [
'script and comment' => [
'<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />' ,
2017-05-29 10:14:01 +02:00
'lol' ,
2017-05-12 07:53:21 +02:00
],
'script' => [
'<strong>Script inside:</strong><script>alert(\'lol\');</script>' ,
2017-05-29 10:14:01 +02:00
'script' ,
2017-05-12 07:53:21 +02:00
],
];
}
/**
* @ dataProvider dataForCrazyHtml
*/
public function testWithCrazyHtmlContent ( $html , $escapedString )
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2024-01-01 19:11:01 +01:00
$proxy = new ContentProxy ( new Graby (), $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2017-06-01 11:31:45 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry (
$entry ,
2017-05-12 07:53:21 +02:00
'http://1.1.1.1' ,
[
'html' => $html ,
'title' => 'this is my title' ,
'url' => 'http://1.1.1.1' ,
'language' => 'fr' ,
'status' => '200' ,
2024-01-01 19:11:01 +01:00
// 'og_title' => 'my OG title',
2017-11-11 20:04:15 +01:00
'description' => 'OG desc' ,
'image' => 'http://3.3.3.3/cover.jpg' ,
'headers' => [
'content-type' => 'text/html' ,
2017-05-12 07:53:21 +02:00
],
]
);
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://1.1.1.1' , $entry -> getUrl ());
$this -> assertSame ( 'this is my title' , $entry -> getTitle ());
2020-06-15 13:37:50 +02:00
$this -> assertStringNotContainsString ( $escapedString , $entry -> getContent ());
2017-07-01 09:52:38 +02:00
$this -> assertSame ( 'http://3.3.3.3/cover.jpg' , $entry -> getPreviewPicture ());
$this -> assertSame ( 'text/html' , $entry -> getMimetype ());
$this -> assertSame ( 'fr' , $entry -> getLanguage ());
$this -> assertSame ( '200' , $entry -> getHttpStatus ());
$this -> assertSame ( '1.1.1.1' , $entry -> getDomainName ());
2023-07-28 14:58:43 +02:00
$this -> assertFalse ( $entry -> isNotParsed ());
2017-05-12 07:53:21 +02:00
}
2017-06-30 17:04:40 +02:00
public function testWithImageAsContent ()
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2017-06-30 17:04:40 +02:00
-> setMethods ([ 'fetchContent' ])
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
-> willReturn ([
'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>' ,
'title' => 'this is my title' ,
'url' => 'http://1.1.1.1/image.jpg' ,
'status' => '200' ,
2017-11-11 20:04:15 +01:00
'headers' => [
'content-type' => 'image/jpeg' ,
],
2017-06-30 17:04:40 +02:00
]);
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2017-06-30 17:04:40 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
2017-07-03 13:56:39 +02:00
$this -> assertSame ( 'http://1.1.1.1/image.jpg' , $entry -> getUrl ());
$this -> assertSame ( 'this is my title' , $entry -> getTitle ());
2020-06-15 13:37:50 +02:00
$this -> assertStringContainsString ( 'http://1.1.1.1/image.jpg' , $entry -> getContent ());
2017-06-30 17:04:40 +02:00
$this -> assertSame ( 'http://1.1.1.1/image.jpg' , $entry -> getPreviewPicture ());
2017-07-03 13:56:39 +02:00
$this -> assertSame ( 'image/jpeg' , $entry -> getMimetype ());
$this -> assertSame ( '200' , $entry -> getHttpStatus ());
$this -> assertSame ( '1.1.1.1' , $entry -> getDomainName ());
2023-07-28 14:58:43 +02:00
$this -> assertFalse ( $entry -> isNotParsed ());
2017-06-30 17:04:40 +02:00
}
2020-12-08 09:17:10 +01:00
public function testWebsiteWithValidUTF8TitleDoNothing ()
2018-09-19 13:59:07 +02:00
{
// You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
// See http://graphemica.com for more info about the characters
// '😻ℤ z' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
$actualTitle = $this -> hexToStr ( 'F09F98BB' . 'E284A4' . '7A' );
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2018-09-19 13:59:07 +02:00
-> setMethods ([ 'fetchContent' ])
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
-> willReturn ([
'html' => false ,
'title' => $actualTitle ,
'url' => '' ,
2019-02-11 10:59:56 +01:00
'headers' => [
'content-type' => 'text/html' ,
],
2018-09-19 13:59:07 +02:00
'language' => '' ,
]);
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2018-09-19 13:59:07 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
// '😻ℤ z' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
$expectedTitle = 'F09F98BB' . 'E284A4' . '7A' ;
$this -> assertSame ( $expectedTitle , $this -> strToHex ( $entry -> getTitle ()));
}
2020-12-08 09:17:10 +01:00
public function testWebsiteWithInvalidUTF8TitleRemoveInvalidCharacter ()
2018-09-19 13:59:07 +02:00
{
// See http://graphemica.com for more info about the characters
// 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
// The correct UTF-8 € character (U+20AC) is E282AC
$actualTitle = $this -> hexToStr ( '61' . '80' . '62' );
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2018-09-19 13:59:07 +02:00
-> setMethods ([ 'fetchContent' ])
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
-> willReturn ([
'html' => false ,
'title' => $actualTitle ,
'url' => '' ,
2019-02-11 10:59:56 +01:00
'headers' => [
'content-type' => 'text/html' ,
],
2018-09-19 13:59:07 +02:00
'language' => '' ,
]);
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2018-09-19 13:59:07 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
// 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
$expectedTitle = '61' . '62' ;
$this -> assertSame ( $expectedTitle , $this -> strToHex ( $entry -> getTitle ()));
}
2020-12-08 09:17:10 +01:00
public function testPdfWithUTF16BETitleConvertToUTF8 ()
2018-09-19 13:59:07 +02:00
{
// See http://graphemica.com for more info about the characters
// '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
$actualTitle = $this -> hexToStr ( 'D83DDE3B' );
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2018-09-19 13:59:07 +02:00
-> setMethods ([ 'fetchContent' ])
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
-> willReturn ([
'html' => false ,
'title' => $actualTitle ,
'url' => '' ,
2019-02-11 10:59:56 +01:00
'headers' => [
'content-type' => 'application/pdf' ,
],
2018-09-19 13:59:07 +02:00
'language' => '' ,
]);
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2018-09-19 13:59:07 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
// '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
$expectedTitle = 'F09F98BB' ;
$this -> assertSame ( $expectedTitle , $this -> strToHex ( $entry -> getTitle ()));
}
2020-12-08 09:17:10 +01:00
public function testPdfWithUTF8TitleDoNothing ()
2018-09-19 13:59:07 +02:00
{
// See http://graphemica.com for more info about the characters
// '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
$actualTitle = $this -> hexToStr ( 'F09F98BB' );
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2018-09-19 13:59:07 +02:00
-> setMethods ([ 'fetchContent' ])
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
-> willReturn ([
'html' => false ,
'title' => $actualTitle ,
'url' => '' ,
2019-02-11 10:59:56 +01:00
'headers' => [
'content-type' => 'application/pdf' ,
],
2018-09-19 13:59:07 +02:00
'language' => '' ,
]);
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2018-09-19 13:59:07 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
// '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
$expectedTitle = 'F09F98BB' ;
$this -> assertSame ( $expectedTitle , $this -> strToHex ( $entry -> getTitle ()));
}
2020-12-08 09:17:10 +01:00
public function testPdfWithWINDOWS1252TitleConvertToUTF8 ()
2018-09-19 13:59:07 +02:00
{
// See http://graphemica.com for more info about the characters
// '€' (80) in hexadecimal and WINDOWS-1252
$actualTitle = $this -> hexToStr ( '80' );
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2018-09-19 13:59:07 +02:00
-> setMethods ([ 'fetchContent' ])
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
-> willReturn ([
'html' => false ,
'title' => $actualTitle ,
'url' => '' ,
2019-02-11 10:59:56 +01:00
'headers' => [
'content-type' => 'application/pdf' ,
],
2018-09-19 13:59:07 +02:00
'language' => '' ,
]);
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2018-09-19 13:59:07 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
// '€' (U+20AC or E282AC) in hexadecimal and UTF-8
$expectedTitle = 'E282AC' ;
$this -> assertSame ( $expectedTitle , $this -> strToHex ( $entry -> getTitle ()));
}
2020-12-08 09:17:10 +01:00
public function testPdfWithInvalidCharacterInTitleRemoveInvalidCharacter ()
2018-09-19 13:59:07 +02:00
{
2023-03-24 22:52:45 +01:00
/*
* I spend too much time on trying to solve the problem of that test .
* Starting with PHP 8.1 this test fails because the string with invalid character is detected as WINDOWS - 1252 and then converted .
* In PHP < 8.1 , the string encoding can ' t be detected and nothing is then converted .
* So the removal of the invalid char happens in `sanitizeUTF8Text`
*
* So , I don ' t understand why the string with invalid char is detected as WINDOWS - 1252 in PHP 8.1 and not before .
*/
$this -> markTestSkipped ( 'Encoding issue in PHP >= 8.1' );
2018-09-19 13:59:07 +02:00
// See http://graphemica.com for more info about the characters
// '😻ℤ <F09F98BB> z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
// 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
$actualTitle = $this -> hexToStr ( 'F09F98BB' . 'E284A4' . '81' . '7A' );
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
2022-09-01 20:54:56 +02:00
$graby = $this -> getMockBuilder ( Graby :: class )
2018-09-19 13:59:07 +02:00
-> setMethods ([ 'fetchContent' ])
-> disableOriginalConstructor ()
-> getMock ();
$graby -> expects ( $this -> any ())
-> method ( 'fetchContent' )
-> willReturn ([
'html' => false ,
'title' => $actualTitle ,
'url' => '' ,
2019-02-11 10:59:56 +01:00
'headers' => [
'content-type' => 'application/pdf' ,
],
2018-09-19 13:59:07 +02:00
'language' => '' ,
]);
2019-08-11 23:55:52 +02:00
$proxy = new ContentProxy ( $graby , $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage );
2018-09-19 13:59:07 +02:00
$entry = new Entry ( new User ());
$proxy -> updateEntry ( $entry , 'http://0.0.0.0' );
// '😻ℤ z' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
// the 0x81 (represented by <20> ) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
$expectedTitle = 'F09F98BB' . 'E284A4' . '7A' ;
$this -> assertSame ( $expectedTitle , $this -> strToHex ( $entry -> getTitle ()));
}
2018-09-06 22:26:20 +02:00
/**
* Data provider for testWithChangedUrl .
*
* Arrays contain the following values :
* $entry_url
* $origin_url
* $content_url
* $expected_entry_url
* $expected_origin_url
* $expected_domain
2019-08-11 23:55:52 +02:00
* $processor_result
2018-09-06 22:26:20 +02:00
*/
public function dataForChangedUrl ()
{
return [
'normal' => [
'http://0.0.0.0' ,
null ,
'http://1.1.1.1' ,
'http://1.1.1.1' ,
'http://0.0.0.0' ,
'1.1.1.1' ,
2019-08-11 23:55:52 +02:00
false ,
2018-09-06 22:26:20 +02:00
],
'origin already set' => [
'http://0.0.0.0' ,
'http://hello' ,
'http://1.1.1.1' ,
'http://1.1.1.1' ,
'http://hello' ,
'1.1.1.1' ,
2019-08-11 23:55:52 +02:00
false ,
2018-09-06 22:26:20 +02:00
],
'trailing slash' => [
'https://example.com/hello-world' ,
null ,
'https://example.com/hello-world/' ,
'https://example.com/hello-world/' ,
null ,
'example.com' ,
2019-08-11 23:55:52 +02:00
false ,
2018-09-06 22:26:20 +02:00
],
'query string in fetched content' => [
'https://example.org/hello' ,
null ,
'https://example.org/hello?world=1' ,
2018-10-24 22:27:27 +02:00
'https://example.org/hello?world=1' ,
2018-09-06 22:26:20 +02:00
'https://example.org/hello' ,
'example.org' ,
2019-08-11 23:55:52 +02:00
false ,
2018-09-06 22:26:20 +02:00
],
'fragment in fetched content' => [
'https://example.org/hello' ,
null ,
'https://example.org/hello#world' ,
'https://example.org/hello' ,
null ,
'example.org' ,
2019-08-11 23:55:52 +02:00
false ,
2018-09-06 22:26:20 +02:00
],
2018-10-22 23:08:58 +02:00
'fragment and query string in fetched content' => [
'https://example.org/hello' ,
null ,
'https://example.org/hello?foo#world' ,
2018-10-24 22:27:27 +02:00
'https://example.org/hello?foo#world' ,
2018-10-22 23:08:58 +02:00
'https://example.org/hello' ,
'example.org' ,
2019-08-11 23:55:52 +02:00
false ,
2018-10-22 23:39:31 +02:00
],
'different path and query string in fetch content' => [
'https://example.org/hello' ,
null ,
'https://example.org/world?foo' ,
'https://example.org/world?foo' ,
'https://example.org/hello' ,
'example.org' ,
2019-08-11 23:55:52 +02:00
false ,
2018-10-22 23:39:31 +02:00
],
'feedproxy ignore list test' => [
'http://feedproxy.google.com/~r/Wallabag/~3/helloworld' ,
null ,
'https://example.org/hello-wallabag' ,
'https://example.org/hello-wallabag' ,
null ,
'example.org' ,
2019-08-11 23:55:52 +02:00
true ,
2018-10-22 23:39:31 +02:00
],
'feedproxy ignore list test with origin url already set' => [
'http://feedproxy.google.com/~r/Wallabag/~3/helloworld' ,
'https://example.org/this-is-source' ,
'https://example.org/hello-wallabag' ,
'https://example.org/hello-wallabag' ,
'https://example.org/this-is-source' ,
'example.org' ,
2019-08-11 23:55:52 +02:00
true ,
2018-10-22 23:39:31 +02:00
],
'lemonde ignore pattern test' => [
'http://www.lemonde.fr/tiny/url' ,
null ,
'http://example.com/hello-world' ,
'http://example.com/hello-world' ,
null ,
'example.com' ,
2019-08-11 23:55:52 +02:00
true ,
2018-10-22 23:39:31 +02:00
],
2018-09-06 22:26:20 +02:00
];
}
/**
* @ dataProvider dataForChangedUrl
*/
2019-08-11 23:55:52 +02:00
public function testWithChangedUrl ( $entry_url , $origin_url , $content_url , $expected_entry_url , $expected_origin_url , $expected_domain , $processor_result )
2018-09-06 22:26:20 +02:00
{
$tagger = $this -> getTaggerMock ();
$tagger -> expects ( $this -> once ())
-> method ( 'tag' );
2019-08-11 23:55:52 +02:00
$ruleBasedIgnoreOriginProcessor = $this -> getRuleBasedIgnoreOriginProcessorMock ();
$ruleBasedIgnoreOriginProcessor -> expects ( $this -> once ())
-> method ( 'process' )
-> willReturn ( $processor_result );
2024-01-01 19:11:01 +01:00
$proxy = new ContentProxy ( new Graby (), $tagger , $ruleBasedIgnoreOriginProcessor , $this -> getValidator (), $this -> getLogger (), $this -> fetchingErrorMessage , true );
2018-09-06 22:26:20 +02:00
$entry = new Entry ( new User ());
$entry -> setOriginUrl ( $origin_url );
$proxy -> updateEntry (
$entry ,
$entry_url ,
[
'html' => false ,
'title' => '' ,
'url' => $content_url ,
2019-02-11 10:59:56 +01:00
'headers' => [
'content-type' => '' ,
],
2018-09-06 22:26:20 +02:00
'language' => '' ,
],
true
);
$this -> assertSame ( $expected_entry_url , $entry -> getUrl ());
$this -> assertSame ( $expected_domain , $entry -> getDomainName ());
$this -> assertSame ( $expected_origin_url , $entry -> getOriginUrl ());
}
2018-09-19 13:59:07 +02:00
/**
2018-09-23 23:42:05 +02:00
* https :// stackoverflow . com / a / 18506801.
*
2018-09-19 13:59:07 +02:00
* @ return string
*/
2018-09-23 23:42:05 +02:00
private function strToHex ( $string )
{
2018-09-19 13:59:07 +02:00
$hex = '' ;
2018-09-23 23:42:05 +02:00
for ( $i = 0 ; $i < \strlen ( $string ); ++ $i ) {
$ord = \ord ( $string [ $i ]);
2018-09-19 13:59:07 +02:00
$hexCode = dechex ( $ord );
2018-09-23 23:42:05 +02:00
$hex .= substr ( '0' . $hexCode , - 2 );
2018-09-19 13:59:07 +02:00
}
2018-09-23 23:42:05 +02:00
return strtoupper ( $hex );
2018-09-19 13:59:07 +02:00
}
/**
2019-02-11 10:59:56 +01:00
* Convert hex to string .
*
* @ see https :// stackoverflow . com / a / 18506801
2018-09-23 23:42:05 +02:00
*
2018-09-19 13:59:07 +02:00
* @ return string
*/
2018-09-23 23:42:05 +02:00
private function hexToStr ( $hex )
{
$string = '' ;
for ( $i = 0 ; $i < \strlen ( $hex ) - 1 ; $i += 2 ) {
$string .= \chr ( hexdec ( $hex [ $i ] . $hex [ $i + 1 ]));
2018-09-19 13:59:07 +02:00
}
2018-09-23 23:42:05 +02:00
2018-09-19 13:59:07 +02:00
return $string ;
}
2015-10-11 22:27:47 +02:00
private function getTaggerMock ()
{
2017-05-27 22:08:14 +02:00
return $this -> getMockBuilder ( RuleBasedTagger :: class )
2016-04-12 11:36:01 +02:00
-> setMethods ([ 'tag' ])
2015-10-11 22:27:47 +02:00
-> disableOriginalConstructor ()
-> getMock ();
}
2015-10-17 17:45:51 +02:00
2019-08-11 23:55:52 +02:00
private function getRuleBasedIgnoreOriginProcessorMock ()
{
return $this -> getMockBuilder ( RuleBasedIgnoreOriginProcessor :: class )
-> setMethods ([ 'process' ])
-> disableOriginalConstructor ()
-> getMock ();
}
2015-10-31 16:38:49 +01:00
private function getLogger ()
2015-10-17 17:45:51 +02:00
{
2015-10-31 16:38:49 +01:00
return new NullLogger ();
2015-10-17 17:45:51 +02:00
}
2017-06-08 21:51:46 +02:00
2017-12-18 10:14:00 +01:00
private function getValidator ( $withDefaultMock = true )
2017-06-08 21:51:46 +02:00
{
2017-12-18 10:14:00 +01:00
$mock = $this -> getMockBuilder ( RecursiveValidator :: class )
2017-06-08 21:51:46 +02:00
-> setMethods ([ 'validate' ])
-> disableOriginalConstructor ()
-> getMock ();
2017-12-18 10:14:00 +01:00
if ( $withDefaultMock ) {
$mock -> expects ( $this -> any ())
-> method ( 'validate' )
-> willReturn ( new ConstraintViolationList ());
}
return $mock ;
2017-06-08 21:51:46 +02:00
}
2015-09-10 21:57:25 +02:00
}