Spaces:
Running
Running
import os, json, re, sys | |
import aiohttp, asyncio | |
import commentjson | |
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) | |
with open("config.json", "r", encoding="utf-8") as f: | |
config = commentjson.load(f) | |
api_key = config["openai_api_key"] | |
url = config["openai_api_base"] + "/v1/chat/completions" if "openai_api_base" in config else "https://api.openai.com/v1/chat/completions" | |
def get_current_strings(): | |
pattern = r'i18n\s*\(\s*["\']([^"\']*(?:\)[^"\']*)?)["\']\s*\)' | |
# Load the .py files | |
contents = "" | |
for dirpath, dirnames, filenames in os.walk("."): | |
for filename in filenames: | |
if filename.endswith(".py"): | |
filepath = os.path.join(dirpath, filename) | |
with open(filepath, 'r', encoding='utf-8') as f: | |
contents += f.read() | |
# Matching with regular expressions | |
matches = re.findall(pattern, contents, re.DOTALL) | |
data = {match.strip('()"'): '' for match in matches} | |
fixed_data = {} # fix some keys | |
for key, value in data.items(): | |
if "](" in key and key.count("(") != key.count(")"): | |
fixed_data[key+")"] = value | |
else: | |
fixed_data[key] = value | |
return fixed_data | |
def get_locale_strings(filename): | |
try: | |
with open(filename, "r", encoding="utf-8") as f: | |
locale_strs = json.load(f) | |
except FileNotFoundError: | |
locale_strs = {} | |
return locale_strs | |
def sort_strings(existing_translations): | |
# Sort the merged data | |
sorted_translations = {} | |
# Add entries with (NOT USED) in their values | |
for key, value in sorted(existing_translations.items(), key=lambda x: x[0]): | |
if "(🔴NOT USED)" in value: | |
sorted_translations[key] = value | |
# Add entries with empty values | |
for key, value in sorted(existing_translations.items(), key=lambda x: x[0]): | |
if value == "": | |
sorted_translations[key] = value | |
# Add the rest of the entries | |
for key, value in sorted(existing_translations.items(), key=lambda x: x[0]): | |
if value != "" and "(NOT USED)" not in value: | |
sorted_translations[key] = value | |
return sorted_translations | |
async def auto_translate(str, language): | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {api_key}", | |
"temperature": f"{0}", | |
} | |
payload = { | |
"model": "gpt-4o", | |
"messages": [ | |
{ | |
"role": "system", | |
"content": f"You are a translation program;\nYour job is to translate user input into {language};\nThe content you are translating is a string in the App;\nDo not explain emoji;\nIf input is only a emoji, please simply return origin emoji;\nPlease ensure that the translation results are concise and easy to understand." | |
}, | |
{"role": "user", "content": f"{str}"} | |
], | |
} | |
async with aiohttp.ClientSession() as session: | |
async with session.post(url, headers=headers, json=payload) as response: | |
data = await response.json() | |
return data["choices"][0]["message"]["content"] | |
async def main(auto=False): | |
current_strs = get_current_strings() | |
locale_files = [] | |
# 遍历locale目录下的所有json文件 | |
for dirpath, dirnames, filenames in os.walk("locale"): | |
for filename in filenames: | |
if filename.endswith(".json"): | |
locale_files.append(os.path.join(dirpath, filename)) | |
for locale_filename in locale_files: | |
if "zh_CN" in locale_filename: | |
continue | |
locale_strs = get_locale_strings(locale_filename) | |
# Add new keys | |
new_keys = [] | |
for key in current_strs: | |
if key not in locale_strs: | |
new_keys.append(key) | |
locale_strs[key] = "" | |
print(f"{locale_filename[7:-5]}'s new str: {len(new_keys)}") | |
# Add (NOT USED) to invalid keys | |
for key in locale_strs: | |
if key not in current_strs: | |
locale_strs[key] = "(🔴NOT USED)" + locale_strs[key] | |
print(f"{locale_filename[7:-5]}'s invalid str: {len(locale_strs) - len(current_strs)}") | |
locale_strs = sort_strings(locale_strs) | |
if auto: | |
tasks = [] | |
non_translated_keys = [] | |
for key in locale_strs: | |
if locale_strs[key] == "": | |
non_translated_keys.append(key) | |
tasks.append(auto_translate(key, locale_filename[7:-5])) | |
results = await asyncio.gather(*tasks) | |
for key, result in zip(non_translated_keys, results): | |
locale_strs[key] = "(🟡REVIEW NEEDED)" + result | |
print(f"{locale_filename[7:-5]}'s auto translated str: {len(non_translated_keys)}") | |
with open(locale_filename, 'w', encoding='utf-8') as f: | |
json.dump(locale_strs, f, ensure_ascii=False, indent=4) | |
if __name__ == "__main__": | |
auto = False | |
if len(sys.argv) > 1 and sys.argv[1] == "--auto": | |
auto = True | |
asyncio.run(main(auto)) | |