sitoctt/Scripts/Translate.py

84 lines
2.7 KiB
Python
Raw Normal View History

2022-11-27 23:50:22 +01:00
#!/usr/bin/env python3
import hashlib, os, shutil, sys, time
2022-11-27 23:50:22 +01:00
from pathlib import Path
Engine = 'Google' # 'Google' or 'DeepL'
2022-11-27 23:50:22 +01:00
SourceLang = 'it'
DestLangs = ['de', 'en', 'es', 'fr', 'it', 'jp', 'ko', 'pt', 'ru', 'zh'] #['et', 'ja', 'lt', 'lv', 'de', 'hu', 'ru', 'zh', 'ro', 'da', 'it', 'es', 'nl', 'fr', 'sk', 'sl', 'pt', 'en', 'sv', 'fi', 'pl', 'el', 'bg', 'cs'] # All from output of `deepl --help`
2022-11-27 23:50:22 +01:00
# With shutil.copytree copy only folder struct, no files; https://stackoverflow.com/a/15664273
def IgnoreFiles(Dir, Files):
return [f for f in Files if os.path.isfile(os.path.join(Dir, f))]
def FindTextFiles():
List = []
2022-12-03 15:12:36 +01:00
#for Dir in ('Posts/'): #('Pages/', 'Posts/'):
for Ext in ('htm', 'html', 'markdown', 'md', 'pug', 'txt'):
for File in Path(Dir).rglob(f'*.{Ext}'):
List += [File]
2022-11-27 23:50:22 +01:00
return List
2022-12-03 15:12:36 +01:00
def GetMetaComment(Paragraph, Num, Count, Translation=None):
return f'<!-- Paragraph {abs(Count-Num)} {hashlib.md5(Paragraph.encode()).hexdigest()} {hashlib.md5(Translation.encode()).hexdigest() if Translation else ""} --->'
2022-11-27 23:50:22 +01:00
def StrReverse(Str):
_ = list(Str)
_.reverse()
return ''.join(_)
2022-12-03 15:12:36 +01:00
def TryTranslate(Text):
try:
if Engine.lower() == 'google':
return json.loads(urlopen(Request(f'{LingvaInstance}/api/v1/{SourceLang}/{Lang}/{URLParse.quote(Text, safe="")}')).read())["translation"]
elif Engine.lower() == 'deepl':
return Translate.translate(Text)
2022-12-03 15:12:36 +01:00
except Exception as e:
print(e)
return False
if Engine.lower() == 'google':
LingvaInstance = sys.argv[1]
import json
from urllib import parse as URLParse
from urllib.request import urlopen, Request
elif Engine.lower() == 'deepl':
from deepl import deepl
try:
DestLangs.remove(SourceLang)
except Exception:
pass
2022-11-27 23:50:22 +01:00
for Lang in DestLangs:
if Engine.lower() == 'deepl':
Translate = deepl.DeepLCLI(SourceLang, Lang)
2022-11-27 23:50:22 +01:00
for Dir in ('Pages', 'Posts/'):
shutil.copytree(Dir, f'i18n/{Lang}/{Dir}', ignore=IgnoreFiles, dirs_exist_ok=True)
for File in FindTextFiles():
2022-12-03 15:12:36 +01:00
if os.path.isfile(f'i18n/{Lang}/{File}'): # Skip already translated pages - This will evolve into a skip only if not updated
continue
2022-11-27 23:50:22 +01:00
with open(File, 'r') as f:
FullSource, Target, Trans = f.read(), '', ''
print(f'\n{Lang}/{File}:', end='')
Paragraphs = StrReverse(FullSource).split('\n\n')
for i,Paragraph in enumerate(Paragraphs): # Assuming no single paragraph is > 5000 chars
print(f' {i}', end='')
2022-12-03 15:12:36 +01:00
Backoff = 0
2022-11-27 23:50:22 +01:00
Target = StrReverse(Paragraph)
if not Target: # There were more than 2 line breaks
Trans += '\n\n'
continue
2022-12-03 15:12:36 +01:00
Done = ''
while not Done:
time.sleep(0.5 + (Backoff*1.025))
Backoff += 1.025
2022-12-03 15:12:36 +01:00
Done = TryTranslate(Target)
Trans = GetMetaComment(Target, i, len(Paragraphs), Done) + '\n' + Done + '\n\n' + Trans
with open(f'i18n/{Lang}/{File}', 'w') as f:
f.write(Trans)