Spaces:
Sleeping
Sleeping
File size: 5,103 Bytes
0cc999a 8ba98ee 0cc999a 8ba98ee 0cc999a 8ba98ee 0cc999a 8ba98ee 0cc999a 8ba98ee 0cc999a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os, json, re, sys
import aiohttp, asyncio
import commentjson
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
with open("config.json", "r", encoding="utf-8") as f:
config = commentjson.load(f)
api_key = config["openai_api_key"]
url = config["openai_api_base"] + "/v1/chat/completions" if "openai_api_base" in config else "https://api.openai.com/v1/chat/completions"
def get_current_strings():
pattern = r'i18n\s*\(\s*["\']([^"\']*(?:\)[^"\']*)?)["\']\s*\)'
# Load the .py files
contents = ""
for dirpath, dirnames, filenames in os.walk("."):
for filename in filenames:
if filename.endswith(".py"):
filepath = os.path.join(dirpath, filename)
with open(filepath, 'r', encoding='utf-8') as f:
contents += f.read()
# Matching with regular expressions
matches = re.findall(pattern, contents, re.DOTALL)
data = {match.strip('()"'): '' for match in matches}
fixed_data = {} # fix some keys
for key, value in data.items():
if "](" in key and key.count("(") != key.count(")"):
fixed_data[key+")"] = value
else:
fixed_data[key] = value
return fixed_data
def get_locale_strings(filename):
try:
with open(filename, "r", encoding="utf-8") as f:
locale_strs = json.load(f)
except FileNotFoundError:
locale_strs = {}
return locale_strs
def sort_strings(existing_translations):
# Sort the merged data
sorted_translations = {}
# Add entries with (NOT USED) in their values
for key, value in sorted(existing_translations.items(), key=lambda x: x[0]):
if "(🔴NOT USED)" in value:
sorted_translations[key] = value
# Add entries with empty values
for key, value in sorted(existing_translations.items(), key=lambda x: x[0]):
if value == "":
sorted_translations[key] = value
# Add the rest of the entries
for key, value in sorted(existing_translations.items(), key=lambda x: x[0]):
if value != "" and "(NOT USED)" not in value:
sorted_translations[key] = value
return sorted_translations
async def auto_translate(str, language):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
"temperature": f"{0}",
}
payload = {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": f"You are a translation program;\nYour job is to translate user input into {language};\nThe content you are translating is a string in the App;\nDo not explain emoji;\nIf input is only a emoji, please simply return origin emoji;\nPlease ensure that the translation results are concise and easy to understand."
},
{"role": "user", "content": f"{str}"}
],
}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=payload) as response:
data = await response.json()
return data["choices"][0]["message"]["content"]
async def main(auto=False):
current_strs = get_current_strings()
locale_files = []
# 遍历locale目录下的所有json文件
for dirpath, dirnames, filenames in os.walk("locale"):
for filename in filenames:
if filename.endswith(".json"):
locale_files.append(os.path.join(dirpath, filename))
for locale_filename in locale_files:
if "zh_CN" in locale_filename:
continue
locale_strs = get_locale_strings(locale_filename)
# Add new keys
new_keys = []
for key in current_strs:
if key not in locale_strs:
new_keys.append(key)
locale_strs[key] = ""
print(f"{locale_filename[7:-5]}'s new str: {len(new_keys)}")
# Add (NOT USED) to invalid keys
for key in locale_strs:
if key not in current_strs:
locale_strs[key] = "(🔴NOT USED)" + locale_strs[key]
print(f"{locale_filename[7:-5]}'s invalid str: {len(locale_strs) - len(current_strs)}")
locale_strs = sort_strings(locale_strs)
if auto:
tasks = []
non_translated_keys = []
for key in locale_strs:
if locale_strs[key] == "":
non_translated_keys.append(key)
tasks.append(auto_translate(key, locale_filename[7:-5]))
results = await asyncio.gather(*tasks)
for key, result in zip(non_translated_keys, results):
locale_strs[key] = "(🟡REVIEW NEEDED)" + result
print(f"{locale_filename[7:-5]}'s auto translated str: {len(non_translated_keys)}")
with open(locale_filename, 'w', encoding='utf-8') as f:
json.dump(locale_strs, f, ensure_ascii=False, indent=4)
if __name__ == "__main__":
auto = False
if len(sys.argv) > 1 and sys.argv[1] == "--auto":
auto = True
asyncio.run(main(auto))
|