Merge pull request #2920 from wallabag/cleanduplicatescommand

Clean Duplicates Command
This commit is contained in:
Jérémy Benoist 2017-05-05 17:42:18 +02:00 committed by GitHub
commit ab742ee9c6
7 changed files with 319 additions and 2 deletions

View File

@ -0,0 +1,30 @@
Console Commands
================
wallabag has a number of CLI commands to manage a number of tasks. You can list all the commands by executing `bin/console` in the wallabag folder.
Each command has a help accessible through `bin/console help %command%`.
.. note::
If you're in a production environment, remember to add `-e prod` to each command.
Notable commands
----------------
* `assets:install`: May be helpful if assets are missing.
* `cache:clear`: should be run after each update (included in `make update`).
* `doctrine:migrations:status`: Output the status of your database migrations.
* `fos:user:activate`: Manually activate an user.
* `fos:user:change-password`: Change a password for an user.
* `fos:user:create`: Create an user.
* `fos:user:deactivate`: Deactivate an user (not deleted).
* `fos:user:demote`: Removes a role from an user, typically admin rights.
* `fos:user:promote`: Adds a role to an user, typically admin rights.
* `rabbitmq:*`: May be useful if you're using RabbitMQ.
* `wallabag:clean-duplicates`: Removes all entry duplicates for one user or all users
* `wallabag:export`: Exports all entries for an user. You can choose the output path of the file.
* `wallabag:import`: Import entries to different formats to an user account.
* `wallabag:import:redis-worker`: Useful if you use Redis.
* `wallabag:install`: (re)Install wallabag
* `wallabag:tag:all`: Tag all entries for an user using his/her tagging rules.

View File

@ -0,0 +1,30 @@
Actions en ligne de commande
============================
wallabag a un certain nombre de commandes CLI pour effectuer des tâches. Vous pouvez lister toutes les commandes en exécutant `bin/console` dans le dossier d'installation de wallabag.
Chaque commande a une aide correspondante accessible via `bin/console help %command%`.
.. note::
Si vous êtes dans un environnement de production, souvenez-vous d'ajouter `-e prod` à chaque commande.
Commandes notables
------------------
* `assets:install`: Peut-être utile si les *assets* sont manquants.
* `cache:clear`: doit être exécuté après chaque mise à jour (appelé dans `make update`).
* `doctrine:migrations:status`: Montre le statut de vos migrations de vos bases de données.
* `fos:user:activate`: Activer manuellement un utilisateur.
* `fos:user:change-password`: Changer le mot de passe pour un utilisateur.
* `fos:user:create`: Créer un utilisateur.
* `fos:user:deactivate`: Désactiver un utilisateur (non supprimé).
* `fos:user:demote`: Supprimer un rôle d'un utilisateur, typiquement les droits d'administration.
* `fos:user:promote`: Ajoute un rôle à un utilisateur, typiquement les droits d'administration.
* `rabbitmq:*`: Peut-être utile si vous utilisez RabbitMQ.
* `wallabag:clean-duplicates`: Supprime tous les articles dupliqués pour un utilisateur ou bien tous.
* `wallabag:export`: Exporte tous les articles pour un utilisateur. Vous pouvez choisir le chemin du fichier exporté.
* `wallabag:import`: Importe les articles en différents formats dans un compte utilisateur.
* `wallabag:import:redis-worker`: Utile si vous utilisez Redis.
* `wallabag:install`: (ré)Installer wallabag
* `wallabag:tag:all`: Tagger tous les articles pour un utilisateur ou une utilisatrice en utilisant ses règles de tags automatiques.

View File

@ -0,0 +1,119 @@
<?php
namespace Wallabag\CoreBundle\Command;
use Doctrine\ORM\NoResultException;
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\UserBundle\Entity\User;
class CleanDuplicatesCommand extends ContainerAwareCommand
{
/** @var OutputInterface */
protected $output;
protected $duplicates = 0;
protected function configure()
{
$this
->setName('wallabag:clean-duplicates')
->setDescription('Cleans the database for duplicates')
->setHelp('This command helps you to clean your articles list in case of duplicates')
->addArgument(
'username',
InputArgument::OPTIONAL,
'User to clean'
);
}
protected function execute(InputInterface $input, OutputInterface $output)
{
$this->output = $output;
$username = $input->getArgument('username');
if ($username) {
try {
$user = $this->getUser($username);
$this->cleanDuplicates($user);
} catch (NoResultException $e) {
$output->writeln(sprintf('<error>User "%s" not found.</error>', $username));
return 1;
}
} else {
$users = $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findAll();
$output->writeln(sprintf('Cleaning through %d user accounts', count($users)));
foreach ($users as $user) {
$output->writeln(sprintf('Processing user %s', $user->getUsername()));
$this->cleanDuplicates($user);
}
$output->writeln(sprintf('Finished cleaning. %d duplicates found in total', $this->duplicates));
}
return 0;
}
/**
* @param User $user
*/
private function cleanDuplicates(User $user)
{
$em = $this->getContainer()->get('doctrine.orm.entity_manager');
$repo = $this->getDoctrine()->getRepository('WallabagCoreBundle:Entry');
$entries = $repo->getAllEntriesIdAndUrl($user->getId());
$duplicatesCount = 0;
$urls = [];
foreach ($entries as $entry) {
$url = $this->similarUrl($entry['url']);
/* @var $entry Entry */
if (in_array($url, $urls)) {
++$duplicatesCount;
$em->remove($repo->find($entry['id']));
$em->flush(); // Flushing at the end of the loop would require the instance not being online
} else {
$urls[] = $entry['url'];
}
}
$this->duplicates += $duplicatesCount;
$this->output->writeln(sprintf('Cleaned %d duplicates for user %s', $duplicatesCount, $user->getUserName()));
}
private function similarUrl($url)
{
if (in_array(substr($url, -1), ['/', '#'])) { // get rid of "/" and "#" and the end of urls
return substr($url, 0, strlen($url));
}
return $url;
}
/**
* Fetches a user from its username.
*
* @param string $username
*
* @return \Wallabag\UserBundle\Entity\User
*/
private function getUser($username)
{
return $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findOneByUserName($username);
}
private function getDoctrine()
{
return $this->getContainer()->get('doctrine');
}
}

View File

@ -379,4 +379,34 @@ class EntryRepository extends EntityRepository
->setParameter('userId', $userId)
->execute();
}
/**
* Get id and url from all entries
* Used for the clean-duplicates command.
*/
public function getAllEntriesIdAndUrl($userId)
{
$qb = $this->createQueryBuilder('e')
->select('e.id, e.url')
->where('e.user = :userid')->setParameter(':userid', $userId);
return $qb->getQuery()->getArrayResult();
}
/**
* Find all entries by url and owner.
*
* @param $url
* @param $userId
*
* @return array
*/
public function findAllByUrlAndUserId($url, $userId)
{
return $this->createQueryBuilder('e')
->where('e.url = :url')->setParameter('url', urldecode($url))
->andWhere('e.user = :user_id')->setParameter('user_id', $userId)
->getQuery()
->getResult();
}
}

View File

@ -0,0 +1,108 @@
<?php
namespace Tests\Wallabag\CoreBundle\Command;
use Symfony\Bundle\FrameworkBundle\Console\Application;
use Symfony\Component\Console\Tester\CommandTester;
use Wallabag\CoreBundle\Command\CleanDuplicatesCommand;
use Tests\Wallabag\CoreBundle\WallabagCoreTestCase;
use Wallabag\CoreBundle\Entity\Entry;
class CleanDuplicatesCommandTest extends WallabagCoreTestCase
{
public function testRunCleanDuplicates()
{
$application = new Application($this->getClient()->getKernel());
$application->add(new CleanDuplicatesCommand());
$command = $application->find('wallabag:clean-duplicates');
$tester = new CommandTester($command);
$tester->execute([
'command' => $command->getName(),
]);
$this->assertContains('Cleaning through 3 user accounts', $tester->getDisplay());
$this->assertContains('Finished cleaning. 0 duplicates found in total', $tester->getDisplay());
}
public function testRunCleanDuplicatesCommandWithBadUsername()
{
$application = new Application($this->getClient()->getKernel());
$application->add(new CleanDuplicatesCommand());
$command = $application->find('wallabag:clean-duplicates');
$tester = new CommandTester($command);
$tester->execute([
'command' => $command->getName(),
'username' => 'unknown',
]);
$this->assertContains('User "unknown" not found', $tester->getDisplay());
}
public function testRunCleanDuplicatesCommandForUser()
{
$application = new Application($this->getClient()->getKernel());
$application->add(new CleanDuplicatesCommand());
$command = $application->find('wallabag:clean-duplicates');
$tester = new CommandTester($command);
$tester->execute([
'command' => $command->getName(),
'username' => 'admin',
]);
$this->assertContains('Cleaned 0 duplicates for user admin', $tester->getDisplay());
}
public function testDuplicate()
{
$url = 'http://www.lemonde.fr/sport/visuel/2017/05/05/rondelle-prison-blanchissage-comprendre-le-hockey-sur-glace_5122587_3242.html';
$client = $this->getClient();
$em = $client->getContainer()->get('doctrine.orm.entity_manager');
$this->logInAs('admin');
$nbEntries = $em->getRepository('WallabagCoreBundle:Entry')->findAllByUrlAndUserId($url, $this->getLoggedInUserId());
$this->assertCount(0, $nbEntries);
$user = $em->getRepository('WallabagUserBundle:User')->findOneById($this->getLoggedInUserId());
$entry1 = new Entry($user);
$entry1->setUrl($url);
$entry2 = new Entry($user);
$entry2->setUrl($url);
$em->persist($entry1);
$em->persist($entry2);
$em->flush();
$nbEntries = $em->getRepository('WallabagCoreBundle:Entry')->findAllByUrlAndUserId($url, $this->getLoggedInUserId());
$this->assertCount(2, $nbEntries);
$application = new Application($this->getClient()->getKernel());
$application->add(new CleanDuplicatesCommand());
$command = $application->find('wallabag:clean-duplicates');
$tester = new CommandTester($command);
$tester->execute([
'command' => $command->getName(),
'username' => 'admin',
]);
$this->assertContains('Cleaned 1 duplicates for user admin', $tester->getDisplay());
$nbEntries = $em->getRepository('WallabagCoreBundle:Entry')->findAllByUrlAndUserId($url, $this->getLoggedInUserId());
$this->assertCount(1, $nbEntries);
$query = $em->createQuery('DELETE FROM Wallabag\CoreBundle\Entity\Entry e WHERE e.url = :url');
$query->setParameter('url', $url);
$query->execute();
}
}

View File

@ -70,7 +70,7 @@ class ExportCommandTest extends WallabagCoreTestCase
$tester->execute([
'command' => $command->getName(),
'username' => 'admin',
'filepath' => 'specialexport.json'
'filepath' => 'specialexport.json',
]);
$this->assertFileExists('specialexport.json');

View File

@ -111,7 +111,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$this->assertEquals('http://domain.io', $entry->getUrl());
$this->assertEquals('my title', $entry->getTitle());
$this->assertEquals($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
$this->assertEquals($this->fetchingErrorMessage.'<p><i>But we found a short description: </i></p>desc', $entry->getContent());
$this->assertEmpty($entry->getPreviewPicture());
$this->assertEmpty($entry->getLanguage());
$this->assertEmpty($entry->getHttpStatus());