enahnced script
This commit is contained in:
parent
530b46a882
commit
6f9fbc8d81
@ -2,29 +2,47 @@
|
||||
#
|
||||
# Make sure to have all dependencies installed:
|
||||
# pip3 install googletrans==4.0.0-rc1
|
||||
# pip3 install asyncio (if using parallel version of the script)
|
||||
#
|
||||
# You must provide raw RSS 2.0 UTF-8 feed XML data as input, for example with curl:
|
||||
# curl 'https://phys.org/rss-feed/' | python ./translate-rss2.py "en" "pt_BR"
|
||||
# curl 'https://phys.org/rss-feed/' | python ./translate-rss2.py "en" "pt_BR" "true"
|
||||
#
|
||||
# You must provide two additional command line arguments:
|
||||
# translate-rss2.py [FROM-LANGUAGE] [TO-LANGUAGE]
|
||||
# You must provide three command line arguments:
|
||||
# translate-rss2.py [FROM-LANGUAGE] [TO-LANGUAGE] [RUN-PARALLEL]
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import html
|
||||
import requests
|
||||
import distutils.util
|
||||
import xml.etree.ElementTree as ET
|
||||
from googletrans import Translator
|
||||
|
||||
lang_from = sys.argv[1]
|
||||
lang_to = sys.argv[2]
|
||||
parallel = bool(distutils.util.strtobool(sys.argv[3]))
|
||||
|
||||
if parallel:
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
sys.stdin.reconfigure(encoding='utf-8')
|
||||
rss_data = sys.stdin.read()
|
||||
rss_document = ET.fromstring(rss_data)
|
||||
translator = Translator()
|
||||
|
||||
def translate_string(to_translate):
|
||||
translated_text = translator.translate(to_translate, src = lang_from, dest = lang_to)
|
||||
time.sleep(0.2)
|
||||
return translated_text.text
|
||||
try:
|
||||
translated_text = translator.translate(to_translate, src = lang_from, dest = lang_to)
|
||||
|
||||
if not parallel:
|
||||
time.sleep(0.2)
|
||||
|
||||
return translated_text.text
|
||||
except:
|
||||
return to_translate
|
||||
|
||||
def process_article(article):
|
||||
title = article.find("title")
|
||||
@ -33,7 +51,15 @@ def process_article(article):
|
||||
contents = article.find("description")
|
||||
contents.text = translate_string(" ".join(contents.itertext()))
|
||||
|
||||
for article in rss_document.findall(".//item"):
|
||||
process_article(article)
|
||||
if parallel:
|
||||
with ThreadPoolExecutor(max_workers = 2) as executor:
|
||||
futures = []
|
||||
for article in rss_document.findall(".//item"):
|
||||
futures.append(executor.submit(process_article, article))
|
||||
for future in futures:
|
||||
future.result()
|
||||
else:
|
||||
for article in rss_document.findall(".//item"):
|
||||
process_article(article)
|
||||
|
||||
print(ET.tostring(rss_document, encoding = "unicode"))
|
Loading…
x
Reference in New Issue
Block a user