diff --git a/app/DoctrineMigrations/Version20190401105353.php b/app/DoctrineMigrations/Version20190401105353.php new file mode 100644 index 000000000..d27962dbe --- /dev/null +++ b/app/DoctrineMigrations/Version20190401105353.php @@ -0,0 +1,42 @@ +getTable($this->getTable('entry')); + + $this->skipIf($entryTable->hasColumn('hashed_url'), 'It seems that you already played this migration.'); + + $entryTable->addColumn('hashed_url', 'text', [ + 'length' => 40, + 'notnull' => false, + ]); + + $entryTable->addIndex(['user_id', 'hashed_url'], 'hashed_url_user_id', [], ['lengths' => [null, 40]]); + } + + /** + * @param Schema $schema + */ + public function down(Schema $schema) + { + $entryTable = $schema->getTable($this->getTable('entry')); + + $this->skipIf(!$entryTable->hasColumn('hashed_url'), 'It seems that you already played this migration.'); + + $entryTable->dropIndex('hashed_url_user_id'); + $entryTable->dropColumn('hashed_url'); + } +} diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index 5c8500917..06520af91 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php @@ -27,8 +27,10 @@ class EntryRestController extends WallabagRestController * @ApiDoc( * parameters={ * {"name"="return_id", "dataType"="string", "required"=false, "format"="1 or 0", "description"="Set 1 if you want to retrieve ID in case entry(ies) exists, 0 by default"}, - * {"name"="url", "dataType"="string", "required"=true, "format"="An url", "description"="Url to check if it exists"}, - * {"name"="urls", "dataType"="string", "required"=false, "format"="An array of urls (?urls[]=http...&urls[]=http...)", "description"="Urls (as an array) to check if it exists"} + * {"name"="url", "dataType"="string", "required"=true, "format"="An url", "description"="DEPRECATED, use hashed_url instead"}, + * {"name"="urls", "dataType"="string", "required"=false, "format"="An array of urls (?urls[]=http...&urls[]=http...)", "description"="DEPRECATED, use hashed_urls instead"}, + * {"name"="hashed_url", "dataType"="string", "required"=false, "format"="A hashed url", "description"="Hashed url using SHA1 to check if it exists"}, + * {"name"="hashed_urls", "dataType"="string", "required"=false, "format"="An array of hashed urls (?hashed_urls[]=xxx...&hashed_urls[]=xxx...)", "description"="An array of hashed urls using SHA1 to check if they exist"} * } * ) * @@ -37,17 +39,30 @@ class EntryRestController extends WallabagRestController public function getEntriesExistsAction(Request $request) { $this->validateAuthentication(); + $repo = $this->getDoctrine()->getRepository('WallabagCoreBundle:Entry'); $returnId = (null === $request->query->get('return_id')) ? false : (bool) $request->query->get('return_id'); + $urls = $request->query->get('urls', []); + $hashedUrls = $request->query->get('hashed_urls', []); // handle multiple urls first + if (!empty($hashedUrls)) { + $results = []; + foreach ($hashedUrls as $hashedUrl) { + $res = $repo->findByHashedUrlAndUserId($hashedUrl, $this->getUser()->getId()); + + $results[$hashedUrl] = $this->returnExistInformation($res, $returnId); + } + + return $this->sendResponse($results); + } + + // @deprecated, to be remove in 3.0 if (!empty($urls)) { $results = []; foreach ($urls as $url) { - $res = $this->getDoctrine() - ->getRepository('WallabagCoreBundle:Entry') - ->findByUrlAndUserId($url, $this->getUser()->getId()); + $res = $repo->findByUrlAndUserId($url, $this->getUser()->getId()); $results[$url] = $this->returnExistInformation($res, $returnId); } @@ -57,18 +72,21 @@ class EntryRestController extends WallabagRestController // let's see if it is a simple url? $url = $request->query->get('url', ''); + $hashedUrl = $request->query->get('hashed_url', ''); - if (empty($url)) { + if (empty($url) && empty($hashedUrl)) { throw $this->createAccessDeniedException('URL is empty?, logged user id: ' . $this->getUser()->getId()); } - $res = $this->getDoctrine() - ->getRepository('WallabagCoreBundle:Entry') - ->findByUrlAndUserId($url, $this->getUser()->getId()); + $method = 'findByUrlAndUserId'; + if (!empty($hashedUrl)) { + $method = 'findByHashedUrlAndUserId'; + $url = $hashedUrl; + } - $exists = $this->returnExistInformation($res, $returnId); + $res = $repo->$method($url, $this->getUser()->getId()); - return $this->sendResponse(['exists' => $exists]); + return $this->sendResponse(['exists' => $this->returnExistInformation($res, $returnId)]); } /** diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php new file mode 100644 index 000000000..45bd8c5ff --- /dev/null +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -0,0 +1,98 @@ +setName('wallabag:generate-hashed-urls') + ->setDescription('Generates hashed urls for each entry') + ->setHelp('This command helps you to generates hashes of the url of each entry, to check through API if an URL is already saved') + ->addArgument('username', InputArgument::OPTIONAL, 'User to process entries'); + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + $this->output = $output; + + $username = (string) $input->getArgument('username'); + + if ($username) { + try { + $user = $this->getUser($username); + $this->generateHashedUrls($user); + } catch (NoResultException $e) { + $output->writeln(sprintf('User "%s" not found.', $username)); + + return 1; + } + } else { + $users = $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findAll(); + + $output->writeln(sprintf('Generating hashed urls for "%d" users', \count($users))); + + foreach ($users as $user) { + $output->writeln(sprintf('Processing user: %s', $user->getUsername())); + $this->generateHashedUrls($user); + } + $output->writeln('Finished generated hashed urls'); + } + + return 0; + } + + /** + * @param User $user + */ + private function generateHashedUrls(User $user) + { + $em = $this->getContainer()->get('doctrine.orm.entity_manager'); + $repo = $this->getDoctrine()->getRepository('WallabagCoreBundle:Entry'); + + $entries = $repo->findByUser($user->getId()); + + $i = 1; + foreach ($entries as $entry) { + $entry->setHashedUrl(hash('sha1', $entry->getUrl())); + $em->persist($entry); + + if (0 === ($i % 20)) { + $em->flush(); + } + ++$i; + } + + $em->flush(); + + $this->output->writeln(sprintf('Generated hashed urls for user: %s', $user->getUserName())); + } + + /** + * Fetches a user from its username. + * + * @param string $username + * + * @return \Wallabag\UserBundle\Entity\User + */ + private function getUser($username) + { + return $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findOneByUserName($username); + } + + private function getDoctrine() + { + return $this->getContainer()->get('doctrine'); + } +} diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index b3cfdc4a4..c3fb87d21 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -25,7 +25,8 @@ use Wallabag\UserBundle\Entity\User; * options={"collate"="utf8mb4_unicode_ci", "charset"="utf8mb4"}, * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), - * @ORM\Index(name="uid", columns={"uid"}) + * @ORM\Index(name="uid", columns={"uid"}), + * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}) * } * ) * @ORM\HasLifecycleCallbacks() @@ -75,6 +76,13 @@ class Entry */ private $url; + /** + * @var string + * + * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) + */ + private $hashedUrl; + /** * @var bool * @@ -316,6 +324,7 @@ class Entry public function setUrl($url) { $this->url = $url; + $this->hashedUrl = hash('sha1', $url); return $this; } @@ -911,4 +920,24 @@ class Entry { return $this->originUrl; } + + /** + * @return string + */ + public function getHashedUrl() + { + return $this->hashedUrl; + } + + /** + * @param mixed $hashedUrl + * + * @return Entry + */ + public function setHashedUrl($hashedUrl) + { + $this->hashedUrl = $hashedUrl; + + return $this; + } } diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 45366623d..f50897296 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -346,6 +346,30 @@ class EntryRepository extends EntityRepository return false; } + /** + * Find an entry by its hashed url and its owner. + * If it exists, return the entry otherwise return false. + * + * @param string $hashedUrl Url hashed using sha1 + * @param int $userId + * + * @return Entry|bool + */ + public function findByHashedUrlAndUserId($hashedUrl, $userId) + { + $res = $this->createQueryBuilder('e') + ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) + ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) + ->getQuery() + ->getResult(); + + if (\count($res)) { + return current($res); + } + + return false; + } + /** * Count all entries for a user. * diff --git a/tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php b/tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php index 2151f587e..8cc12ed37 100644 --- a/tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php +++ b/tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php @@ -971,33 +971,49 @@ class EntryRestControllerTest extends WallabagApiTestCase $this->assertGreaterThanOrEqual($now->getTimestamp(), (new \DateTime($content['starred_at']))->getTimestamp()); } - public function testGetEntriesExistsWithReturnId() + public function dataForEntriesExistWithUrl() { - $this->client->request('GET', '/api/entries/exists?url=http://0.0.0.0/entry2&return_id=1'); + $url = hash('sha1', 'http://0.0.0.0/entry2'); - $this->assertSame(200, $this->client->getResponse()->getStatusCode()); - - $content = json_decode($this->client->getResponse()->getContent(), true); - - // it returns a database id, we don't know it, so we only check it's greater than the lowest possible value - $this->assertGreaterThan(1, $content['exists']); + return [ + 'with_id' => [ + 'url' => '/api/entries/exists?url=http://0.0.0.0/entry2&return_id=1', + 'expectedValue' => 2, + ], + 'without_id' => [ + 'url' => '/api/entries/exists?url=http://0.0.0.0/entry2', + 'expectedValue' => true, + ], + 'hashed_url_with_id' => [ + 'url' => '/api/entries/exists?hashed_url=' . $url . '&return_id=1', + 'expectedValue' => 2, + ], + 'hashed_url_without_id' => [ + 'url' => '/api/entries/exists?hashed_url=' . $url . '', + 'expectedValue' => true, + ], + ]; } - public function testGetEntriesExistsWithoutReturnId() + /** + * @dataProvider dataForEntriesExistWithUrl + */ + public function testGetEntriesExists($url, $expectedValue) { - $this->client->request('GET', '/api/entries/exists?url=http://0.0.0.0/entry2'); + $this->client->request('GET', $url); $this->assertSame(200, $this->client->getResponse()->getStatusCode()); $content = json_decode($this->client->getResponse()->getContent(), true); - $this->assertTrue($content['exists']); + $this->assertSame($expectedValue, $content['exists']); } public function testGetEntriesExistsWithManyUrls() { $url1 = 'http://0.0.0.0/entry2'; $url2 = 'http://0.0.0.0/entry10'; + $this->client->request('GET', '/api/entries/exists?urls[]=' . $url1 . '&urls[]=' . $url2 . '&return_id=1'); $this->assertSame(200, $this->client->getResponse()->getStatusCode()); @@ -1027,6 +1043,38 @@ class EntryRestControllerTest extends WallabagApiTestCase $this->assertFalse($content[$url2]); } + public function testGetEntriesExistsWithManyUrlsHashed() + { + $url1 = 'http://0.0.0.0/entry2'; + $url2 = 'http://0.0.0.0/entry10'; + $this->client->request('GET', '/api/entries/exists?hashed_urls[]=' . hash('sha1', $url1) . '&hashed_urls[]=' . hash('sha1', $url2) . '&return_id=1'); + + $this->assertSame(200, $this->client->getResponse()->getStatusCode()); + + $content = json_decode($this->client->getResponse()->getContent(), true); + + $this->assertArrayHasKey(hash('sha1', $url1), $content); + $this->assertArrayHasKey(hash('sha1', $url2), $content); + $this->assertSame(2, $content[hash('sha1', $url1)]); + $this->assertNull($content[hash('sha1', $url2)]); + } + + public function testGetEntriesExistsWithManyUrlsHashedReturnBool() + { + $url1 = 'http://0.0.0.0/entry2'; + $url2 = 'http://0.0.0.0/entry10'; + $this->client->request('GET', '/api/entries/exists?hashed_urls[]=' . hash('sha1', $url1) . '&hashed_urls[]=' . hash('sha1', $url2)); + + $this->assertSame(200, $this->client->getResponse()->getStatusCode()); + + $content = json_decode($this->client->getResponse()->getContent(), true); + + $this->assertArrayHasKey(hash('sha1', $url1), $content); + $this->assertArrayHasKey(hash('sha1', $url2), $content); + $this->assertTrue($content[hash('sha1', $url1)]); + $this->assertFalse($content[hash('sha1', $url2)]); + } + public function testGetEntriesExistsWhichDoesNotExists() { $this->client->request('GET', '/api/entries/exists?url=http://google.com/entry2'); @@ -1038,6 +1086,17 @@ class EntryRestControllerTest extends WallabagApiTestCase $this->assertFalse($content['exists']); } + public function testGetEntriesExistsWhichDoesNotExistsWithHashedUrl() + { + $this->client->request('GET', '/api/entries/exists?hashed_url=' . hash('sha1', 'http://google.com/entry2')); + + $this->assertSame(200, $this->client->getResponse()->getStatusCode()); + + $content = json_decode($this->client->getResponse()->getContent(), true); + + $this->assertFalse($content['exists']); + } + public function testGetEntriesExistsWithNoUrl() { $this->client->request('GET', '/api/entries/exists?url='); @@ -1045,6 +1104,13 @@ class EntryRestControllerTest extends WallabagApiTestCase $this->assertSame(403, $this->client->getResponse()->getStatusCode()); } + public function testGetEntriesExistsWithNoHashedUrl() + { + $this->client->request('GET', '/api/entries/exists?hashed_url='); + + $this->assertSame(403, $this->client->getResponse()->getStatusCode()); + } + public function testReloadEntryErrorWhileFetching() { $entry = $this->client->getContainer()->get('doctrine.orm.entity_manager') diff --git a/tests/Wallabag/CoreBundle/Command/GenerateUrlHashesCommandTest.php b/tests/Wallabag/CoreBundle/Command/GenerateUrlHashesCommandTest.php new file mode 100644 index 000000000..17eed210b --- /dev/null +++ b/tests/Wallabag/CoreBundle/Command/GenerateUrlHashesCommandTest.php @@ -0,0 +1,98 @@ +getClient()->getKernel()); + $application->add(new GenerateUrlHashesCommand()); + + $command = $application->find('wallabag:generate-hashed-urls'); + + $tester = new CommandTester($command); + $tester->execute([ + 'command' => $command->getName(), + ]); + + $this->assertContains('Generating hashed urls for "3" users', $tester->getDisplay()); + $this->assertContains('Finished generated hashed urls', $tester->getDisplay()); + } + + public function testRunGenerateUrlHashesCommandWithBadUsername() + { + $application = new Application($this->getClient()->getKernel()); + $application->add(new GenerateUrlHashesCommand()); + + $command = $application->find('wallabag:generate-hashed-urls'); + + $tester = new CommandTester($command); + $tester->execute([ + 'command' => $command->getName(), + 'username' => 'unknown', + ]); + + $this->assertContains('User "unknown" not found', $tester->getDisplay()); + } + + public function testRunGenerateUrlHashesCommandForUser() + { + $application = new Application($this->getClient()->getKernel()); + $application->add(new GenerateUrlHashesCommand()); + + $command = $application->find('wallabag:generate-hashed-urls'); + + $tester = new CommandTester($command); + $tester->execute([ + 'command' => $command->getName(), + 'username' => 'admin', + ]); + + $this->assertContains('Generated hashed urls for user: admin', $tester->getDisplay()); + } + + public function testGenerateUrls() + { + $url = 'http://www.lemonde.fr/sport/visuel/2017/05/05/rondelle-prison-blanchissage-comprendre-le-hockey-sur-glace_5122587_3242.html'; + $client = $this->getClient(); + $em = $client->getContainer()->get('doctrine.orm.entity_manager'); + + $this->logInAs('admin'); + + $user = $em->getRepository('WallabagUserBundle:User')->findOneById($this->getLoggedInUserId()); + + $entry1 = new Entry($user); + $entry1->setUrl($url); + + $em->persist($entry1); + $em->flush(); + + $application = new Application($this->getClient()->getKernel()); + $application->add(new GenerateUrlHashesCommand()); + + $command = $application->find('wallabag:generate-hashed-urls'); + + $tester = new CommandTester($command); + $tester->execute([ + 'command' => $command->getName(), + 'username' => 'admin', + ]); + + $this->assertContains('Generated hashed urls for user: admin', $tester->getDisplay()); + + $entry = $em->getRepository('WallabagCoreBundle:Entry')->findOneByUrl($url); + + $this->assertSame($entry->getHashedUrl(), hash('sha1', $url)); + + $query = $em->createQuery('DELETE FROM Wallabag\CoreBundle\Entity\Entry e WHERE e.url = :url'); + $query->setParameter('url', $url); + $query->execute(); + } +}