feedrss-toot/feed2toot/removeduplicates.py

#!/usr/bin/env python3
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
# Copyright © 2015-2019 Carl Chenet <carl.chenet@ohmytux.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>

# Remove duplicates from the final string before sending the tweet
'''Remove duplicates from the final string before sending the tweet'''

class RemoveDuplicates:
    '''Remove duplicates from the final string before sending the tweet'''
    def __init__(self, tweet):
        '''Constructor of RemoveDuplicates class'''
        self.tweet = tweet
        self.main()

    def main(self):
        '''Main of the RemoveDuplicates class'''
        # identify duplicate links
        links = []
        for element in self.tweet.split():
            if element != ' ' and (element.startswith('http://') or element.startswith('https://')):
                newlink = True
                # if we already found this link, increment the counter
                for i, _ in enumerate(links):
                    if links[i]['link'] == element:
                        newlink = False
                        links[i]['count'] += 1
                if newlink:
                    links.append({'link': element, 'count': 1})
        # remove duplicates
        validatedlinks = []
        for i in range(len(links)):
            if links[i]['count'] >= 2:
                validatedlinks.append(links[i])
        wildcard = 'FEED2TOOTWILDCARD'
        for element in validatedlinks:
            for i in range(element['count']):
                # needed for not inversing the order of links if it is a duplicate
                # and the second link is not one
                if i == 0:
                    self.tweet = self.tweet.replace(element['link'], wildcard, 1)
                else:
                    self.tweet = self.tweet.replace(element['link'], '', 1)
            # finally
            self.tweet = self.tweet.replace(wildcard, element['link'], 1)
        # remove all 2xspaces
        self.tweet = self.tweet.replace('  ', ' ')

    @property
    def finaltweet(self):
        '''return the final tweet after duplicates were removed'''
        return self.tweet
first commit 2017-04-09 10:30:48 +02:00			`#!/usr/bin/env python3`
			`# vim:ts=4:sw=4:ft=python:fileencoding=utf-8`
update copyright and supported python versions 2019-08-18 12:08:27 +02:00			`# Copyright © 2015-2019 Carl Chenet <carl.chenet@ohmytux.com>`
first commit 2017-04-09 10:30:48 +02:00			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>`

			`# Remove duplicates from the final string before sending the tweet`
			`'''Remove duplicates from the final string before sending the tweet'''`

cleaning some code (from MR #9) 2017-07-31 15:58:30 +02:00			`class RemoveDuplicates:`
first commit 2017-04-09 10:30:48 +02:00			`'''Remove duplicates from the final string before sending the tweet'''`
			`def __init__(self, tweet):`
			`'''Constructor of RemoveDuplicates class'''`
			`self.tweet = tweet`
			`self.main()`

			`def main(self):`
			`'''Main of the RemoveDuplicates class'''`
			`# identify duplicate links`
			`links = []`
			`for element in self.tweet.split():`
			`if element != ' ' and (element.startswith('http://') or element.startswith('https://')):`
			`newlink = True`
			`# if we already found this link, increment the counter`
cleaning some code (from MR #9) 2017-07-31 15:58:30 +02:00			`for i, _ in enumerate(links):`
first commit 2017-04-09 10:30:48 +02:00			`if links[i]['link'] == element:`
			`newlink = False`
			`links[i]['count'] += 1`
			`if newlink:`
cleaning some code (from MR #9) 2017-07-31 15:58:30 +02:00			`links.append({'link': element, 'count': 1})`
first commit 2017-04-09 10:30:48 +02:00			`# remove duplicates`
			`validatedlinks = []`
			`for i in range(len(links)):`
			`if links[i]['count'] >= 2:`
			`validatedlinks.append(links[i])`
			`wildcard = 'FEED2TOOTWILDCARD'`
			`for element in validatedlinks:`
cleaning some code (from MR #9) 2017-07-31 15:58:30 +02:00			`for i in range(element['count']):`
first commit 2017-04-09 10:30:48 +02:00			`# needed for not inversing the order of links if it is a duplicate`
			`# and the second link is not one`
			`if i == 0:`
cleaning some code (from MR #9) 2017-07-31 15:58:30 +02:00			`self.tweet = self.tweet.replace(element['link'], wildcard, 1)`
first commit 2017-04-09 10:30:48 +02:00			`else:`
			`self.tweet = self.tweet.replace(element['link'], '', 1)`
cleaning some code (from MR #9) 2017-07-31 15:58:30 +02:00			`# finally`
first commit 2017-04-09 10:30:48 +02:00			`self.tweet = self.tweet.replace(wildcard, element['link'], 1)`
			`# remove all 2xspaces`
			`self.tweet = self.tweet.replace(' ', ' ')`

			`@property`
			`def finaltweet(self):`
			`'''return the final tweet after duplicates were removed'''`
			`return self.tweet`