add i18n checker and autogenerated AI translations (#2160)
* use translated string
* fix typos in lnbits/static/i18n/{it,jp,nl,we}.js
* add missing strings to cs,en,sk translations
* remove duplicates from lnbits/static/i18n/{cs,en,kr,sk}.js
* add i18n checker
* add i18n ai tool
* add autogenerated AI translations
* add i18n-ai-tool check whether variables in formatted strings are not broken
* fix issues with variables found by the script
* chore: make bundle
This commit is contained in:
parent
9f835f8350
commit
cf4d575062
22 changed files with 1485 additions and 84 deletions
123
tools/i18n-ai-tool.py
Normal file
123
tools/i18n-ai-tool.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
# 1. Always check the results of the procedure
|
||||
# 2. Always run "npx prettier -w lnbits/static/i18n/XX.js" to reformat the result
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
import json5
|
||||
from openai import OpenAI
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python3 tools/i18n-tool.py <code> [language]")
|
||||
sys.exit(1)
|
||||
lang = sys.argv[1]
|
||||
|
||||
|
||||
def load_language(lang):
|
||||
s = open(f"lnbits/static/i18n/{lang}.js", "rt").read()
|
||||
prefix = "window.localisation.%s = {\n" % lang
|
||||
assert s.startswith(prefix)
|
||||
s = s[len(prefix) - 2 :]
|
||||
return json5.loads(s)
|
||||
|
||||
|
||||
def save_language(lang, data):
|
||||
with open(f"lnbits/static/i18n/{lang}.js", "wt") as f:
|
||||
f.write("window.localisation.%s = {\n" % lang)
|
||||
row = 0
|
||||
for k, v in data.items():
|
||||
row += 1
|
||||
f.write(" %s:\n" % k)
|
||||
if "'" in v:
|
||||
f.write(' "%s"' % v)
|
||||
else:
|
||||
f.write(" '%s'" % v)
|
||||
if row == len(data):
|
||||
f.write("\n")
|
||||
else:
|
||||
f.write(",\n")
|
||||
f.write("}\n")
|
||||
|
||||
|
||||
def string_variables_match(str1, str2):
|
||||
pat = re.compile(r"%\{[a-z0-9_]*\}")
|
||||
m1 = re.findall(pat, str1)
|
||||
m2 = re.findall(pat, str2)
|
||||
return sorted(m1) == sorted(m2)
|
||||
|
||||
|
||||
def translate_string(lang_from, lang_to, text):
|
||||
target = {
|
||||
"de": "German",
|
||||
"es": "Spanish",
|
||||
"jp": "Japan",
|
||||
"cn": "Chinese",
|
||||
"fr": "French",
|
||||
"it": "Italian",
|
||||
"pi": "Pirate",
|
||||
"nl": "Dutch",
|
||||
"we": "Welsh",
|
||||
"pl": "Polish",
|
||||
"pt": "Portuguese",
|
||||
"br": "Brazilian Portugese",
|
||||
"cs": "Czech",
|
||||
"sk": "Slovak",
|
||||
"kr": "Korean",
|
||||
}[lang_to]
|
||||
assert os.getenv("OPENAI_API_KEY"), "OPENAI_API_KEY env var not set"
|
||||
client = OpenAI()
|
||||
try:
|
||||
chat_completion = client.chat.completions.create(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a language expert that speaks all languages in the world. You are about to translate text from English to another language. The text is a part of the software you are translating. If the given text contains a phrase enclosed by curly preceded with a percent sign, do not translate the given phrase, just keep it verbatim. So for example, the phrase %{amount} translated to target language should still be kept as %{amount}. Never output anything else, just the translated string.", # noqa: E501
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Translate the following string from English to {target}: {text}", # noqa: E501
|
||||
},
|
||||
],
|
||||
model="gpt-4-1106-preview", # aka GPT-4 Turbo
|
||||
)
|
||||
translated = chat_completion.choices[0].message.content.strip()
|
||||
# return translated string only if variables were not broken
|
||||
if string_variables_match(text, translated):
|
||||
return translated
|
||||
else:
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
data_en = load_language("en")
|
||||
data = load_language(lang)
|
||||
|
||||
missing = set(data_en.keys()) - set(data.keys())
|
||||
print(f"Missing {len(missing)} keys in language '{lang}'")
|
||||
|
||||
if len(missing) > 0:
|
||||
new = {}
|
||||
for k in data_en:
|
||||
if k in data:
|
||||
new[k] = data[k]
|
||||
else:
|
||||
print(f"Translating key '{k}'")
|
||||
print(f"{data_en[k]}")
|
||||
translated = translate_string("en", lang, data_en[k])
|
||||
print("->")
|
||||
if translated:
|
||||
print(f"{translated}")
|
||||
new[k] = translated
|
||||
else:
|
||||
print("ERROR")
|
||||
print()
|
||||
save_language(lang, new)
|
||||
else:
|
||||
# check whether variables match for each string
|
||||
for k in data_en:
|
||||
if not string_variables_match(data_en[k], data[k]):
|
||||
print(f"Variables mismatch ({k}):")
|
||||
print(data_en[k])
|
||||
print(data[k])
|
||||
77
tools/i18n-check.py
Normal file
77
tools/i18n-check.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import os
|
||||
import re
|
||||
|
||||
|
||||
def get_translation_ids_from_source():
|
||||
# find all HTML files in selected directories
|
||||
files = []
|
||||
for start in ["lnbits/core/templates", "lnbits/templates", "lnbits/static/js"]:
|
||||
for dir, _, filenames in os.walk(start):
|
||||
for filename in filenames:
|
||||
if filename.endswith(".html") or filename.endswith(".js"):
|
||||
fn = os.path.join(dir, filename)
|
||||
files.append(fn)
|
||||
# find all $t('...') and $t("...") calls in HTML files
|
||||
# and extract the string inside the quotes
|
||||
p1 = re.compile(r"\$t\('([^']*)'")
|
||||
p2 = re.compile(r'\$t\("([^"]*)"')
|
||||
ids = []
|
||||
for fn in files:
|
||||
with open(fn, "rt") as f:
|
||||
text = f.read()
|
||||
m1 = re.findall(p1, text)
|
||||
m2 = re.findall(p2, text)
|
||||
for m in m1:
|
||||
ids.append(m)
|
||||
for m in m2:
|
||||
ids.append(m)
|
||||
return ids
|
||||
|
||||
|
||||
def get_translation_ids_for_language(language):
|
||||
ids = []
|
||||
for line in open(f"lnbits/static/i18n/{language}.js", "rt"):
|
||||
# extract ids from lines like that start with exactly 2 spaces
|
||||
if line.startswith(" ") and not line.startswith(" "):
|
||||
m = line[2:].split(":")[0]
|
||||
ids.append(m)
|
||||
return ids
|
||||
|
||||
|
||||
src_ids = get_translation_ids_from_source()
|
||||
print(f"Number of ids from source: {len(src_ids)}")
|
||||
|
||||
en_ids = get_translation_ids_for_language("en")
|
||||
missing = set(src_ids) - set(en_ids)
|
||||
extra = set(en_ids) - set(src_ids)
|
||||
if len(missing) > 0:
|
||||
print()
|
||||
print(f'Missing ids in language "en": {len(missing)}')
|
||||
for i in sorted(missing):
|
||||
print(f" {i}")
|
||||
if len(extra) > 0:
|
||||
print()
|
||||
print(f'Extraneous ids in language "en": {len(extra)}')
|
||||
for i in sorted(extra):
|
||||
print(f" {i}")
|
||||
|
||||
languages = []
|
||||
for dir, _, filenames in os.walk("lnbits/static/i18n"):
|
||||
for filename in filenames:
|
||||
if filename.endswith(".js") and filename not in ["i18n.js", "en.js"]:
|
||||
languages.append(filename.split(".")[0])
|
||||
|
||||
for lang in sorted(languages):
|
||||
ids = get_translation_ids_for_language(lang)
|
||||
missing = set(en_ids) - set(ids)
|
||||
extra = set(ids) - set(en_ids)
|
||||
if len(missing) > 0:
|
||||
print()
|
||||
print(f'Missing ids in language "{lang}": {len(missing)}')
|
||||
for i in sorted(missing):
|
||||
print(f" {i}")
|
||||
if len(extra) > 0:
|
||||
print()
|
||||
print(f'Extraneous ids in language "{lang}": {len(extra)}')
|
||||
for i in sorted(extra):
|
||||
print(f" {i}")
|
||||
Loading…
Add table
Add a link
Reference in a new issue