mirror of
https://gitlab.com/octtspacc/sitoctt
synced 2025-06-05 22:09:20 +02:00
Init con Hugo, conversione ListedDownsync.js, fix metadati post
This commit is contained in:
12
Scripts/.Globals.sh
Normal file
12
Scripts/.Globals.sh
Normal file
@ -0,0 +1,12 @@
|
||||
#!/bin/sh
|
||||
|
||||
RepoUrl="https://gitlab.com/octtspacc/sitoctt.git"
|
||||
BranchThis="sitoctt-next"
|
||||
BranchTranslate="translate-cache"
|
||||
|
||||
#/bin/sh!
|
||||
|
||||
set -e
|
||||
owd="$(pwd)"
|
||||
swd="$(dirname "$(realpath "$0")")"
|
||||
|
18
Scripts/Build.sh
Executable file
18
Scripts/Build.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/bin/sh
|
||||
. "$(dirname "$(realpath "$0")")/.Globals.sh"
|
||||
cd "${swd}/.."
|
||||
|
||||
mkdir -p ./build
|
||||
cd ./build
|
||||
cp -r \
|
||||
../archetypes ../content \
|
||||
../layouts ../themes \
|
||||
../config.toml ../hugo.toml \
|
||||
./
|
||||
sh "${swd}/Translate.Get.sh"
|
||||
$([ -n "$(which python3)" ] && echo python3 || echo python) "${swd}/Translate.Main.py"
|
||||
sh "${swd}/Translate.Apply.sh"
|
||||
hugo
|
||||
sh "${swd}/Translate.Push.sh"
|
||||
|
||||
cd "${owd}"
|
7
Scripts/Clean.sh
Executable file
7
Scripts/Clean.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/bin/sh
|
||||
. "$(dirname "$(realpath "$0")")/.Globals.sh"
|
||||
cd "${swd}/.."
|
||||
|
||||
rm -rf ./build ./public ./resources
|
||||
|
||||
cd "${owd}"
|
@ -6,24 +6,33 @@ Exp.ConfigParser = require('./config-ini-parser').ConfigIniParser;
|
||||
Exp.importAll = function importAll(){ delete this.importAll; Object.assign(global, this); };
|
||||
|
||||
Exp.ParseMeta = (Raw) => {
|
||||
let Mid = { Meta: "", Macros: "", };
|
||||
let Data = { Meta: {}, Macros: {}, };
|
||||
let Mid = { Meta: "", Macros: "" };
|
||||
let Data = { Meta: {}, Macros: {}, IsToml: false };
|
||||
const Lines = Raw.trim().split('\n');
|
||||
if (Lines[0].trim() === '+++' && Lines.slice(-1)[0].trim() === '+++') {
|
||||
Data.IsToml = true;
|
||||
Lines = Lines.slice(1, -1);
|
||||
}
|
||||
for (let i=0; i<Lines.length; i++) {
|
||||
let Type;
|
||||
let Line = Lines[i].trim();
|
||||
if (Line.startsWith('//')) {
|
||||
Line = Line.slice('2').trim();
|
||||
};
|
||||
if (Line.startsWith('%')) {
|
||||
Type = 'Meta';
|
||||
} else if (Line.startsWith('$')) {
|
||||
Type = 'Macros';
|
||||
}
|
||||
if (['%', '$'].includes(Line[0])) {
|
||||
if (Line.startsWith('%')) {
|
||||
Type = 'Meta';
|
||||
} else if (Line.startsWith('$')) {
|
||||
Type = 'Macros';
|
||||
}
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
if (!Data.IsToml) {
|
||||
continue;
|
||||
}
|
||||
Mid.Meta += Line.trim() + '\n';
|
||||
}
|
||||
Mid[Type] += Line.substring(1).trim() + '\n';
|
||||
};
|
||||
}
|
||||
Object.keys(Mid).forEach((Type) => {
|
||||
const Items = new ConfigParser().parse(Mid[Type]).items();
|
||||
Items.forEach((Item) => {
|
||||
|
@ -87,12 +87,14 @@ const GetLinkElem = Dom => {
|
||||
const MakeMetaStr = Post => {
|
||||
let Str = '';
|
||||
['Meta', 'Macros'].forEach((Type) => {
|
||||
const Marks = { Meta: "%", Macros: "$", };
|
||||
const Marks = { Meta: "%", Macros: "$" };
|
||||
Object.keys(Post[Type]).forEach((Key) => {
|
||||
Str += `// ${Marks[Type]} ${Key} = ${Post[Type][Key]}\n`;
|
||||
//Str += `// ${Marks[Type]} ${Key} = ${Post[Type][Key]}\n`;
|
||||
// TODO: should this properly handle non-strings?
|
||||
Str += `${Key} = ${JSON.stringify(Post[Type][Key])}\n`;
|
||||
});
|
||||
});
|
||||
return Str;
|
||||
return `+++\n${Str}+++\n`; //Str;
|
||||
};
|
||||
|
||||
const HandlePost = (PostSrc, Output) => {
|
||||
@ -109,9 +111,10 @@ const HandlePost = (PostSrc, Output) => {
|
||||
// Handle .MetadataBlock elements
|
||||
Array.from(ContentDom.querySelectorAll(MetadataBlockSelect)).forEach((Elem) => {
|
||||
if (Elem.textContent) {
|
||||
const Meta = ParseMeta(Elem.textContent);
|
||||
const Meta = ParseMeta(Post.RawMeta = Elem.textContent);
|
||||
Post.Meta = Object.assign(Post.Meta, Meta.Meta);
|
||||
Post.Macros = Object.assign(Post.Macros, Meta.Macros);
|
||||
Post.IsToml = Meta.IsToml;
|
||||
};
|
||||
Elem.outerHTML = '';
|
||||
});
|
||||
@ -150,7 +153,7 @@ const HandlePost = (PostSrc, Output) => {
|
||||
Post.Content = GetFragHTML(ContentDom);
|
||||
|
||||
const PathFile = LinkPath.split('/').slice(-1)[0];
|
||||
const PathDir = LinkPath.split('/').slice(0, (LinkPath.split('/').length - 1)).join('/');
|
||||
const PathDir = ('./content/' + LinkPath.split('/').slice(0, (LinkPath.split('/').length - 1)).join('/'));
|
||||
const DatePrefix = PostsFileDate ? Post.Meta.CreatedOn + '-' : '';
|
||||
const FinalFilePath = `${PathDir}/${DatePrefix}${PathFile.substring(0, (PathFile.length - 4))}md`;
|
||||
if (!CheckDownsync(TryReadFileSync(FinalFilePath))) {
|
||||
@ -197,8 +200,8 @@ const HandlePost = (PostSrc, Output) => {
|
||||
if (Output == 'file') {
|
||||
TryMkdirSync(PathDir);
|
||||
Fs.writeFileSync(FinalFilePath, `\
|
||||
<!-- < Autogenerated by ListedDownsync.js. Do not edit (unless also set "% Downsync = False") - it would be overwritten. > -->
|
||||
${MakeMetaStr(Post)}
|
||||
${/* Post.IsToml ? Post.RawMeta : */ MakeMetaStr(Post)}
|
||||
<!-- Autogenerated by ListedDownsync.js. Do not edit (unless also set "% Downsync = False") - it would be overwritten. -->
|
||||
<h1>${Post.Meta.HTMLTitle ? Post.Meta.HTMLTitle : Post.Meta.Title}</h1>
|
||||
|
||||
${Post.Content}
|
||||
|
6
Scripts/Translate.Apply.sh
Executable file
6
Scripts/Translate.Apply.sh
Executable file
@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
. "$(dirname "$(realpath "$0")")/.Globals.sh"
|
||||
|
||||
if [ -n "$(ls ./translate)" ]
|
||||
then cp -r ./translate/* ./content/
|
||||
fi
|
6
Scripts/Translate.Get.sh
Executable file
6
Scripts/Translate.Get.sh
Executable file
@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
. "$(dirname "$(realpath "$0")")/.Globals.sh"
|
||||
|
||||
if [ ! -d ./translate ]
|
||||
then git clone --depth 1 --branch "${BranchTranslate}" "${RepoUrl}" ./translate
|
||||
fi
|
98
Scripts/Translate.Main.py
Executable file
98
Scripts/Translate.Main.py
Executable file
@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python3
|
||||
DestinationLanguages = ["it", "en"] # "de", "eo", "es", "fr"
|
||||
IncludePaths = ["/"]
|
||||
ExcludePaths = ["/Categories"]
|
||||
|
||||
from os import getcwd, listdir
|
||||
from os.path import dirname, realpath, isfile, isdir
|
||||
from pathlib import Path
|
||||
from translate_shell.translate import translate
|
||||
|
||||
# TODO handle frontmatter properly, some data must be translated (title, ...) but other must not (date, ...)
|
||||
# TODO also somehow handle overriding data for some translation (title, slug, ...)
|
||||
# TODO add warning about automatic translation at the beginning
|
||||
# TODO handle deleted files? (it should probably be done in another sh script, not here)
|
||||
|
||||
#
|
||||
|
||||
def printf(*objects):
|
||||
print(*objects, end='')
|
||||
|
||||
def get_source_language(document_path):
|
||||
return document_path.split('/')[0]
|
||||
|
||||
def make_destination_path(document_path, destination_language):
|
||||
return ('./translate/' + destination_language + '/'
|
||||
+ '/'.join(document_path.split('/')[1:]))
|
||||
|
||||
# TODO check for edit date in already translated documents and update them if needed
|
||||
def is_translation_uptodate(source_path, destination_path):
|
||||
return True
|
||||
|
||||
# TODO handle when the same document is available in multiple source languages?
|
||||
def needs_translation(source_path, destination_language=None):
|
||||
for folder_path in ExcludePaths:
|
||||
if ('/' + '/'.join(source_path.split('/')[1:])).startswith(folder_path + '/'):
|
||||
return False
|
||||
if not open('../content/' + source_path).read().strip():
|
||||
return False
|
||||
if destination_language:
|
||||
destination_path = make_destination_path(source_path, destination_language)
|
||||
if isfile(destination_path) and is_translation_uptodate(source_path, destination_path):
|
||||
return False
|
||||
return True
|
||||
|
||||
def find_documents(folder_path):
|
||||
documents_queue, documents = [], {}
|
||||
for document_path in Path(folder_path).rglob('*.*'):
|
||||
if isfile(document_path):
|
||||
documents_queue.append('/'.join(str(document_path).split('/')[2:]))
|
||||
for document in documents_queue:
|
||||
if needs_translation(document):
|
||||
documents[document] = []
|
||||
for destination_language in list(set(DestinationLanguages) - {get_source_language(document)}):
|
||||
if needs_translation(document, destination_language):
|
||||
documents[document].append(destination_language)
|
||||
return documents
|
||||
|
||||
def translate_document(document_path, documents):
|
||||
printf(f'* {document_path} ->')
|
||||
for destination_language in documents[document_path]:
|
||||
source_language = get_source_language(document_path)
|
||||
printf('', destination_language)
|
||||
try:
|
||||
translated = translate(
|
||||
open(('../content/' + document_path), 'r').read(),
|
||||
destination_language,
|
||||
source_language)
|
||||
if not len(translated.results):
|
||||
raise Exception("Unhandled error")
|
||||
printf('✅')
|
||||
except Exception as exception:
|
||||
printf('❌', exception)
|
||||
continue
|
||||
translated_text = translated.results[0].paraphrase
|
||||
text_header = translated_text.strip().splitlines()[0].strip()
|
||||
translated_preamble = ("\n\n{{< noticeAutomaticTranslation " + source_language + " >}}\n\n")
|
||||
if text_header in ["---", "+++"]:
|
||||
text_tokens = translated_text.split(text_header)
|
||||
translated_text = (text_header.join(text_tokens[:2]) + translated_preamble + text_header.join(text_tokens[2:]))
|
||||
else:
|
||||
translated_text = (translated_preamble + translated_text)
|
||||
destination_path = make_destination_path(document_path, destination_language)
|
||||
Path('/'.join(destination_path.split('/')[:-1])).mkdir(parents=True, exist_ok=True)
|
||||
open(destination_path, 'w').write(translated_text)
|
||||
printf('\n')
|
||||
|
||||
def main():
|
||||
for source_language in listdir('../content'):
|
||||
for folder_path in IncludePaths:
|
||||
documents = find_documents('../content/' + source_language + folder_path)
|
||||
for document_path in documents:
|
||||
if len(documents[document_path]):
|
||||
translate_document(document_path, documents)
|
||||
|
||||
if __name__ == "__main__":
|
||||
globals_text = open(dirname(realpath(__file__)) + '/.Globals.sh').read()
|
||||
exec(globals_text.split('#' + globals_text.splitlines()[0].split('#!')[1] + '!')[0])
|
||||
main()
|
6
Scripts/Translate.Push.sh
Executable file
6
Scripts/Translate.Push.sh
Executable file
@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
. "$(dirname "$(realpath "$0")")/.Globals.sh"
|
||||
|
||||
cd ./translate
|
||||
git add . && git commit -m . && git push || true
|
||||
cd ..
|
@ -1,87 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import hashlib, os, shutil, sys, time
|
||||
from pathlib import Path
|
||||
|
||||
GoogleLangs = ['af', 'sq', 'am', 'ar', 'hy', 'as', 'ay', 'az', 'bm', 'eu', 'be', 'bn', 'bho', 'bs', 'bg', 'ca', 'ceb', 'ny', 'zh', 'zh_HANT', 'co', 'hr', 'cs', 'da', 'dv', 'doi', 'nl', 'en', 'eo', 'et', 'ee', 'tl', 'fi', 'fr', 'fy', 'gl', 'ka', 'de', 'el', 'gn', 'gu', 'ht', 'ha', 'haw', 'iw', 'hi', 'hmn', 'hu', 'is', 'ig', 'ilo', 'id', 'ga', 'it', 'ja', 'jw', 'kn', 'kk', 'km', 'rw', 'gom', 'ko', 'kri', 'ku', 'ckb', 'ky', 'lo', 'la', 'lv', 'ln', 'lt', 'lg', 'lb', 'mk', 'mai', 'mg', 'ms', 'ml', 'mt', 'mi', 'mr', 'mni-Mtei', 'lus', 'mn', 'my', 'ne', 'no', 'or', 'om', 'ps', 'fa', 'pl', 'pt', 'pa', 'qu', 'ro', 'ru', 'sm', 'sa', 'gd', 'nso', 'sr', 'st', 'sn', 'sd', 'si', 'sk', 'sl', 'so', 'es', 'su', 'sw', 'sv', 'tg', 'ta', 'tt', 'te', 'th', 'ti', 'ts', 'tr', 'tk', 'ak', 'uk', 'ur', 'ug', 'uz', 'vi', 'cy', 'xh', 'yi', 'yo', 'zu'] # {LingvaURL}/api/v1/languages
|
||||
DeepLLangs = ['et', 'ja', 'lt', 'lv', 'de', 'hu', 'ru', 'zh', 'ro', 'da', 'it', 'es', 'nl', 'fr', 'sk', 'sl', 'pt', 'en', 'sv', 'fi', 'pl', 'el', 'bg', 'cs'] # All from output of `deepl --help`
|
||||
UserAgent = 'sitoctt:Translate.py'
|
||||
|
||||
Engine = 'Google' # 'Google' or 'DeepL'
|
||||
SourceLang = 'it'
|
||||
DestLangs = GoogleLangs
|
||||
|
||||
# With shutil.copytree copy only folder struct, no files; https://stackoverflow.com/a/15664273
|
||||
def IgnoreFiles(Dir, Files):
|
||||
return [f for f in Files if os.path.isfile(os.path.join(Dir, f))]
|
||||
|
||||
def FindTextFiles():
|
||||
List = []
|
||||
#for Dir in ('Posts/'): #('Pages/', 'Posts/'):
|
||||
for Ext in ('htm', 'html', 'markdown', 'md', 'pug', 'txt'):
|
||||
for File in Path(Dir).rglob(f'*.{Ext}'):
|
||||
List += [File]
|
||||
return List
|
||||
|
||||
def GetMetaComment(Paragraph, Num, Count, Translation=None):
|
||||
return f'<!-- Paragraph {abs(Count-Num)} {hashlib.md5(Paragraph.encode()).hexdigest()} {hashlib.md5(Translation.encode()).hexdigest() if Translation else ""} --->'
|
||||
|
||||
def StrReverse(Str):
|
||||
_ = list(Str)
|
||||
_.reverse()
|
||||
return ''.join(_)
|
||||
|
||||
def TryTranslate(Text):
|
||||
try:
|
||||
if Engine.lower() == 'google':
|
||||
return json.loads(urlopen(Request(f'{LingvaURL}/api/v1/{SourceLang}/{Lang}/{URLParse.quote(Text, safe="")}', headers={'User-Agent':UserAgent})).read())["translation"]
|
||||
elif Engine.lower() == 'deepl':
|
||||
return Translate.translate(Text)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return False
|
||||
|
||||
if Engine.lower() == 'google':
|
||||
LingvaURL = sys.argv[1]
|
||||
import json
|
||||
from urllib import parse as URLParse
|
||||
from urllib.request import urlopen, Request
|
||||
elif Engine.lower() == 'deepl':
|
||||
from deepl import deepl
|
||||
|
||||
try:
|
||||
DestLangs.remove(SourceLang)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for Lang in DestLangs:
|
||||
if Engine.lower() == 'deepl':
|
||||
Translate = deepl.DeepLCLI(SourceLang, Lang)
|
||||
|
||||
for Dir in ('Pages', 'Posts/'):
|
||||
shutil.copytree(Dir, f'i18n/{Lang}/{Dir}', ignore=IgnoreFiles, dirs_exist_ok=True)
|
||||
|
||||
for File in FindTextFiles():
|
||||
if os.path.isfile(f'i18n/{Lang}/{File}'): # Skip already translated pages - This will evolve into a skip only if not updated
|
||||
continue
|
||||
|
||||
with open(File, 'r') as f:
|
||||
FullSource, Target, Trans = f.read(), '', ''
|
||||
print(f'\n{Lang}/{File}:', end='')
|
||||
|
||||
Paragraphs = StrReverse(FullSource).split('\n\n')
|
||||
for i,Paragraph in enumerate(Paragraphs): # Assuming no single paragraph is > 5000 chars
|
||||
print(f' {i}', end='')
|
||||
Backoff = 0
|
||||
Target = StrReverse(Paragraph)
|
||||
if not Target: # There were more than 2 line breaks
|
||||
Trans += '\n\n'
|
||||
continue
|
||||
Done = ''
|
||||
while not Done:
|
||||
time.sleep(0.5 + (Backoff*1.025))
|
||||
Backoff += 1.025
|
||||
Done = TryTranslate(Target)
|
||||
Trans = GetMetaComment(Target, i, len(Paragraphs), Done) + '\n' + Done + '\n\n' + Trans
|
||||
|
||||
with open(f'i18n/{Lang}/{File}', 'w') as f:
|
||||
f.write(Trans)
|
Reference in New Issue
Block a user