Allow other fields to be send using API

Entry API can now have these new fields:
- content
- language
- preview_picture
- published_at

Re-use the ContentProxy to be able to do the same using the web UI (in the future).
htmLawed is used to clean stuff from content, I hope it’ll be enough to avoid security breach.

Lower content validation when we want to update an entry with content already defined. Before, language & content_type were required. If there weren’t provided, we re-fetched the content using graby. I think these fields aren’t required for an entry to be created. So I removed them.
Which means some import from the v1 export won’t be re-fetched since they provide content, url & title.

Also, remove liberation link from Readability import to avoid overlaping import (from wallabag v1, which had the same link)
This commit is contained in:
Jeremy Benoist 2017-05-11 08:14:29 +02:00
parent 4423b88c5b
commit e668a8124c
No known key found for this signature in database
GPG Key ID: BCA73962457ACC3C
7 changed files with 74 additions and 43 deletions

View File

@ -280,6 +280,10 @@ class EntryRestController extends WallabagRestController
* {"name"="tags", "dataType"="string", "required"=false, "format"="tag1,tag2,tag3", "description"="a comma-separated list of tags."}, * {"name"="tags", "dataType"="string", "required"=false, "format"="tag1,tag2,tag3", "description"="a comma-separated list of tags."},
* {"name"="starred", "dataType"="integer", "required"=false, "format"="1 or 0", "description"="entry already starred"}, * {"name"="starred", "dataType"="integer", "required"=false, "format"="1 or 0", "description"="entry already starred"},
* {"name"="archive", "dataType"="integer", "required"=false, "format"="1 or 0", "description"="entry already archived"}, * {"name"="archive", "dataType"="integer", "required"=false, "format"="1 or 0", "description"="entry already archived"},
* {"name"="content", "dataType"="string", "required"=false, "description"="Content of the entry"},
* {"name"="language", "dataType"="string", "required"=false, "description"="Language of the entry"},
* {"name"="preview_picture", "dataType"="string", "required"=false, "description"="Preview picture of the entry"},
* {"name"="published_at", "dataType"="datetime", "format"="YYYY-MM-DDTHH:II:SS+TZ", "required"=false, "description"="Published date of the entry"},
* } * }
* ) * )
* *
@ -293,30 +297,42 @@ class EntryRestController extends WallabagRestController
$title = $request->request->get('title'); $title = $request->request->get('title');
$isArchived = $request->request->get('archive'); $isArchived = $request->request->get('archive');
$isStarred = $request->request->get('starred'); $isStarred = $request->request->get('starred');
$content = $request->request->get('content');
$language = $request->request->get('language');
$picture = $request->request->get('preview_picture');
$publishedAt = $request->request->get('published_at');
$entry = $this->get('wallabag_core.entry_repository')->findByUrlAndUserId($url, $this->getUser()->getId()); $entry = $this->get('wallabag_core.entry_repository')->findByUrlAndUserId($url, $this->getUser()->getId());
if (false === $entry) { if (false === $entry) {
$entry = new Entry($this->getUser()); $entry = new Entry($this->getUser());
try {
$entry = $this->get('wallabag_core.content_proxy')->updateEntry(
$entry,
$url
);
} catch (\Exception $e) {
$this->get('logger')->error('Error while saving an entry', [
'exception' => $e,
'entry' => $entry,
]);
$entry->setUrl($url);
}
} }
if (!is_null($title)) { try {
$entry->setTitle($title); $entry = $this->get('wallabag_core.content_proxy')->updateEntry(
$entry,
$url,
[
'title' => $title,
'html' => $content,
'url' => $url,
'language' => $language,
'date' => $publishedAt,
// faking the preview picture
'open_graph' => [
'og_image' => $picture,
],
]
);
} catch (\Exception $e) {
$this->get('logger')->error('Error while saving an entry', [
'exception' => $e,
'entry' => $entry,
]);
$entry->setUrl($url);
} }
$tags = $request->request->get('tags', ''); $tags = $request->request->get('tags', []);
if (!empty($tags)) { if (!empty($tags)) {
$this->get('wallabag_core.tags_assigner')->assignTagsToEntry($entry, $tags); $this->get('wallabag_core.tags_assigner')->assignTagsToEntry($entry, $tags);
} }

View File

@ -45,6 +45,18 @@ class ContentProxy
*/ */
public function updateEntry(Entry $entry, $url, array $content = []) public function updateEntry(Entry $entry, $url, array $content = [])
{ {
// ensure content is a bit cleaned up
if (!empty($content['html'])) {
$content['html'] = htmLawed($content['html'], [
'safe' => 1,
// which means: do not remove iframe elements
'elements' => '*+iframe',
'deny_attribute' => 'style',
'comment' => 1,
'cdata' => 1,
]);
}
// do we have to fetch the content or the provided one is ok? // do we have to fetch the content or the provided one is ok?
if (empty($content) || false === $this->validateContent($content)) { if (empty($content) || false === $this->validateContent($content)) {
$fetchedContent = $this->graby->fetchContent($url); $fetchedContent = $this->graby->fetchContent($url);
@ -57,7 +69,7 @@ class ContentProxy
} }
$title = $content['title']; $title = $content['title'];
if (!$title && isset($content['open_graph']['og_title'])) { if (!$title && !empty($content['open_graph']['og_title'])) {
$title = $content['open_graph']['og_title']; $title = $content['open_graph']['og_title'];
} }
@ -65,7 +77,7 @@ class ContentProxy
if (false === $html) { if (false === $html) {
$html = $this->fetchingErrorMessage; $html = $this->fetchingErrorMessage;
if (isset($content['open_graph']['og_description'])) { if (!empty($content['open_graph']['og_description'])) {
$html .= '<p><i>But we found a short description: </i></p>'; $html .= '<p><i>But we found a short description: </i></p>';
$html .= $content['open_graph']['og_description']; $html .= $content['open_graph']['og_description'];
} }
@ -76,8 +88,12 @@ class ContentProxy
$entry->setContent($html); $entry->setContent($html);
$entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); $entry->setHttpStatus(isset($content['status']) ? $content['status'] : '');
if (isset($content['date']) && null !== $content['date'] && '' !== $content['date']) { if (!empty($content['date'])) {
$entry->setPublishedAt(new \DateTime($content['date'])); try {
$entry->setPublishedAt(new \DateTime($content['date']));
} catch (\Exception $e) {
$this->logger->warn('Error while defining date', ['e' => $e, 'url' => $url, 'date' => $content['date']]);
}
} }
if (!empty($content['authors'])) { if (!empty($content['authors'])) {
@ -97,12 +113,12 @@ class ContentProxy
$entry->setDomainName($domainName); $entry->setDomainName($domainName);
} }
if (isset($content['open_graph']['og_image']) && $content['open_graph']['og_image']) { if (!empty($content['open_graph']['og_image'])) {
$entry->setPreviewPicture($content['open_graph']['og_image']); $entry->setPreviewPicture($content['open_graph']['og_image']);
} }
// if content is an image define as a preview too // if content is an image define as a preview too
if (isset($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
$entry->setPreviewPicture($content['url']); $entry->setPreviewPicture($content['url']);
} }
@ -128,6 +144,6 @@ class ContentProxy
*/ */
private function validateContent(array $content) private function validateContent(array $content)
{ {
return isset($content['title']) && isset($content['html']) && isset($content['url']) && isset($content['language']) && isset($content['content_type']); return !empty($content['title']) && !empty($content['html']) && !empty($content['url']);
} }
} }

View File

@ -342,6 +342,9 @@ class EntryRestControllerTest extends WallabagApiTestCase
'url' => 'http://www.lemonde.fr/pixels/article/2015/03/28/plongee-dans-l-univers-d-ingress-le-jeu-de-google-aux-frontieres-du-reel_4601155_4408996.html', 'url' => 'http://www.lemonde.fr/pixels/article/2015/03/28/plongee-dans-l-univers-d-ingress-le-jeu-de-google-aux-frontieres-du-reel_4601155_4408996.html',
'tags' => 'google', 'tags' => 'google',
'title' => 'New title for my article', 'title' => 'New title for my article',
'content' => 'my content',
'language' => 'de_DE',
'published_at' => '2016-09-08T11:55:58+0200',
]); ]);
$this->assertEquals(200, $this->client->getResponse()->getStatusCode()); $this->assertEquals(200, $this->client->getResponse()->getStatusCode());
@ -355,6 +358,9 @@ class EntryRestControllerTest extends WallabagApiTestCase
$this->assertEquals('New title for my article', $content['title']); $this->assertEquals('New title for my article', $content['title']);
$this->assertEquals(1, $content['user_id']); $this->assertEquals(1, $content['user_id']);
$this->assertCount(2, $content['tags']); $this->assertCount(2, $content['tags']);
$this->assertSame('my content', $content['content']);
$this->assertSame('de_DE', $content['language']);
$this->assertSame('2016-09-08T11:55:58+0200', $content['published_at']);
} }
public function testPostSameEntry() public function testPostSameEntry()

View File

@ -112,16 +112,16 @@ class WallabagV1ControllerTest extends WallabagCoreTestCase
->get('doctrine.orm.entity_manager') ->get('doctrine.orm.entity_manager')
->getRepository('WallabagCoreBundle:Entry') ->getRepository('WallabagCoreBundle:Entry')
->findByUrlAndUserId( ->findByUrlAndUserId(
'https://framablog.org/2014/02/05/framabag-service-libre-gratuit-interview-developpeur/', 'http://www.framablog.org/index.php/post/2014/02/05/Framabag-service-libre-gratuit-interview-developpeur',
$this->getLoggedInUserId() $this->getLoggedInUserId()
); );
$this->assertGreaterThan(1, $body = $crawler->filter('body')->extract(['_text'])); $this->assertGreaterThan(1, $body = $crawler->filter('body')->extract(['_text']));
$this->assertContains('flashes.import.notice.summary', $body[0]); $this->assertContains('flashes.import.notice.summary', $body[0]);
$this->assertNotEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is ok'); $this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.framablog.org is empty');
$this->assertNotEmpty($content->getPreviewPicture(), 'Preview picture for http://www.framablog.org is ok'); $this->assertEmpty($content->getPreviewPicture(), 'Preview picture for http://www.framablog.org is empty');
$this->assertNotEmpty($content->getLanguage(), 'Language for http://www.framablog.org is ok'); $this->assertEmpty($content->getLanguage(), 'Language for http://www.framablog.org is empty');
$tags = $content->getTags(); $tags = $content->getTags();
$this->assertContains('foot', $tags, 'It includes the "foot" tag'); $this->assertContains('foot', $tags, 'It includes the "foot" tag');

View File

@ -119,9 +119,9 @@ class WallabagV2ControllerTest extends WallabagCoreTestCase
$this->getLoggedInUserId() $this->getLoggedInUserId()
); );
$this->assertNotEmpty($content->getMimetype(), 'Mimetype for http://www.liberation.fr is ok'); $this->assertEmpty($content->getMimetype(), 'Mimetype for http://www.liberation.fr is empty');
$this->assertNotEmpty($content->getPreviewPicture(), 'Preview picture for http://www.liberation.fr is ok'); $this->assertEmpty($content->getPreviewPicture(), 'Preview picture for http://www.liberation.fr is empty');
$this->assertNotEmpty($content->getLanguage(), 'Language for http://www.liberation.fr is ok'); $this->assertEmpty($content->getLanguage(), 'Language for http://www.liberation.fr is empty');
$tags = $content->getTags(); $tags = $content->getTags();
$this->assertContains('foot', $tags, 'It includes the "foot" tag'); $this->assertContains('foot', $tags, 'It includes the "foot" tag');

View File

@ -67,14 +67,14 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
public function testImport() public function testImport()
{ {
$readabilityImport = $this->getReadabilityImport(false, 24); $readabilityImport = $this->getReadabilityImport(false, 23);
$readabilityImport->setFilepath(__DIR__.'/../fixtures/readability.json'); $readabilityImport->setFilepath(__DIR__.'/../fixtures/readability.json');
$entryRepo = $this->getMockBuilder('Wallabag\CoreBundle\Repository\EntryRepository') $entryRepo = $this->getMockBuilder('Wallabag\CoreBundle\Repository\EntryRepository')
->disableOriginalConstructor() ->disableOriginalConstructor()
->getMock(); ->getMock();
$entryRepo->expects($this->exactly(24)) $entryRepo->expects($this->exactly(23))
->method('findByUrlAndUserId') ->method('findByUrlAndUserId')
->willReturn(false); ->willReturn(false);
@ -88,14 +88,14 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
->getMock(); ->getMock();
$this->contentProxy $this->contentProxy
->expects($this->exactly(24)) ->expects($this->exactly(23))
->method('updateEntry') ->method('updateEntry')
->willReturn($entry); ->willReturn($entry);
$res = $readabilityImport->import(); $res = $readabilityImport->import();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertEquals(['skipped' => 0, 'imported' => 24, 'queued' => 0], $readabilityImport->getSummary()); $this->assertEquals(['skipped' => 0, 'imported' => 23, 'queued' => 0], $readabilityImport->getSummary());
} }
public function testImportAndMarkAllAsRead() public function testImportAndMarkAllAsRead()
@ -165,7 +165,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
->getMock(); ->getMock();
$producer $producer
->expects($this->exactly(24)) ->expects($this->exactly(23))
->method('publish'); ->method('publish');
$readabilityImport->setProducer($producer); $readabilityImport->setProducer($producer);
@ -173,7 +173,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
$res = $readabilityImport->setMarkAsRead(true)->import(); $res = $readabilityImport->setMarkAsRead(true)->import();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertEquals(['skipped' => 0, 'imported' => 0, 'queued' => 24], $readabilityImport->getSummary()); $this->assertEquals(['skipped' => 0, 'imported' => 0, 'queued' => 23], $readabilityImport->getSummary());
} }
public function testImportWithRedis() public function testImportWithRedis()
@ -211,7 +211,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
$res = $readabilityImport->setMarkAsRead(true)->import(); $res = $readabilityImport->setMarkAsRead(true)->import();
$this->assertTrue($res); $this->assertTrue($res);
$this->assertEquals(['skipped' => 0, 'imported' => 0, 'queued' => 24], $readabilityImport->getSummary()); $this->assertEquals(['skipped' => 0, 'imported' => 0, 'queued' => 23], $readabilityImport->getSummary());
$this->assertNotEmpty($redisMock->lpop('readability')); $this->assertNotEmpty($redisMock->lpop('readability'));
} }

View File

@ -10,13 +10,6 @@
"article__title": "We Looked At 167,943 Tweets & Found Out Hashtags Are Worthless", "article__title": "We Looked At 167,943 Tweets & Found Out Hashtags Are Worthless",
"archive": false "archive": false
}, },
{
"article__title": "Réfugiés: l'UE va créer 100 000 places d'accueil dans les Balkans",
"article__url": "http://www.liberation.fr/planete/2015/10/26/refugies-l-ue-va-creer-100-000-places-d-accueil-dans-les-balkans_1408867",
"archive": false,
"date_added": "2016-09-08T11:55:58+0200",
"favorite": false
},
{ {
"article__title": "No title found", "article__title": "No title found",
"article__url": "http://news.nationalgeographic.com/2016/02/160211-albatrosses-mothers-babies-animals-science/&sf20739758=1", "article__url": "http://news.nationalgeographic.com/2016/02/160211-albatrosses-mothers-babies-animals-science/&sf20739758=1",