File size: 5,103 Bytes
0cc999a
 
 
8ba98ee
0cc999a
8ba98ee
0cc999a
 
 
 
8ba98ee
 
0cc999a
 
8ba98ee
0cc999a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ba98ee
0cc999a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os, json, re, sys
import aiohttp, asyncio
import commentjson

asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())

with open("config.json", "r", encoding="utf-8") as f:
    config = commentjson.load(f)
api_key = config["openai_api_key"]
url = config["openai_api_base"] + "/v1/chat/completions" if "openai_api_base" in config else "https://api.openai.com/v1/chat/completions"


def get_current_strings():
    pattern = r'i18n\s*\(\s*["\']([^"\']*(?:\)[^"\']*)?)["\']\s*\)'

    # Load the .py files
    contents = ""
    for dirpath, dirnames, filenames in os.walk("."):
        for filename in filenames:
            if filename.endswith(".py"):
                filepath = os.path.join(dirpath, filename)
                with open(filepath, 'r', encoding='utf-8') as f:
                    contents += f.read()
    # Matching with regular expressions
    matches = re.findall(pattern, contents, re.DOTALL)
    data = {match.strip('()"'): '' for match in matches}
    fixed_data = {}     # fix some keys
    for key, value in data.items():
        if "](" in key and key.count("(") != key.count(")"):
                fixed_data[key+")"] = value
        else:
            fixed_data[key] = value

    return fixed_data


def get_locale_strings(filename):
    try:
        with open(filename, "r", encoding="utf-8") as f:
            locale_strs = json.load(f)
    except FileNotFoundError:
        locale_strs = {}
    return locale_strs


def sort_strings(existing_translations):
    # Sort the merged data
    sorted_translations = {}
    # Add entries with (NOT USED) in their values
    for key, value in sorted(existing_translations.items(), key=lambda x: x[0]):
        if "(🔴NOT USED)" in value:
            sorted_translations[key] = value
    # Add entries with empty values
    for key, value in sorted(existing_translations.items(), key=lambda x: x[0]):
        if value == "":
            sorted_translations[key] = value
    # Add the rest of the entries
    for key, value in sorted(existing_translations.items(), key=lambda x: x[0]):
        if value != "" and "(NOT USED)" not in value:
            sorted_translations[key] = value

    return sorted_translations


async def auto_translate(str, language):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}",
        "temperature": f"{0}",
    }
    payload = {
        "model": "gpt-3.5-turbo",
        "messages": [
            {
                "role": "system",
                "content": f"You are a translation program;\nYour job is to translate user input into {language};\nThe content you are translating is a string in the App;\nDo not explain emoji;\nIf input is only a emoji, please simply return origin emoji;\nPlease ensure that the translation results are concise and easy to understand."
            },
            {"role": "user", "content": f"{str}"}
        ],
    }

    async with aiohttp.ClientSession() as session:
        async with session.post(url, headers=headers, json=payload) as response:
            data = await response.json()
            return data["choices"][0]["message"]["content"]


async def main(auto=False):
    current_strs = get_current_strings()
    locale_files = []
    # 遍历locale目录下的所有json文件
    for dirpath, dirnames, filenames in os.walk("locale"):
        for filename in filenames:
            if filename.endswith(".json"):
                locale_files.append(os.path.join(dirpath, filename))


    for locale_filename in locale_files:
        if "zh_CN" in locale_filename:
            continue
        locale_strs = get_locale_strings(locale_filename)

        # Add new keys
        new_keys = []
        for key in current_strs:
            if key not in locale_strs:
                new_keys.append(key)
                locale_strs[key] = ""
        print(f"{locale_filename[7:-5]}'s new str: {len(new_keys)}")
        # Add (NOT USED) to invalid keys
        for key in locale_strs:
            if key not in current_strs:
                locale_strs[key] = "(🔴NOT USED)" + locale_strs[key]
        print(f"{locale_filename[7:-5]}'s invalid str: {len(locale_strs) - len(current_strs)}")

        locale_strs = sort_strings(locale_strs)

        if auto:
            tasks = []
            non_translated_keys = []
            for key in locale_strs:
                if locale_strs[key] == "":
                    non_translated_keys.append(key)
                    tasks.append(auto_translate(key, locale_filename[7:-5]))
            results = await asyncio.gather(*tasks)
            for key, result in zip(non_translated_keys, results):
                locale_strs[key] = "(🟡REVIEW NEEDED)" + result
            print(f"{locale_filename[7:-5]}'s auto translated str: {len(non_translated_keys)}")

        with open(locale_filename, 'w', encoding='utf-8') as f:
            json.dump(locale_strs, f, ensure_ascii=False, indent=4)


if __name__ == "__main__":
    auto = False
    if len(sys.argv) > 1 and sys.argv[1] == "--auto":
        auto = True
    asyncio.run(main(auto))