File size: 13,646 Bytes
6c448b9
c376e46
3951159
8d52819
3951159
 
 
 
c376e46
 
3951159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d52819
 
 
 
 
 
6c448b9
c376e46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c448b9
 
 
 
c376e46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3951159
c376e46
 
3951159
6c448b9
c376e46
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
import os
import json
import functools
import re
import pickle
import time

CACHE_FOLDER = "gpt_log"
blacklist = ['multi-language', 'gpt_log', '.git', 'private_upload']
LANG = "English"

if not os.path.exists(CACHE_FOLDER):
    os.makedirs(CACHE_FOLDER)


def lru_file_cache(maxsize=128, ttl=None, filename=None):
    """
    Decorator that caches a function's return value after being called with given arguments. 
    It uses a Least Recently Used (LRU) cache strategy to limit the size of the cache.
    maxsize: Maximum size of the cache. Defaults to 128.
    ttl: Time-to-Live of the cache. If a value hasn't been accessed for `ttl` seconds, it will be evicted from the cache.
    filename: Name of the file to store the cache in. If not supplied, the function name + ".cache" will be used.
    """
    cache_path = os.path.join(CACHE_FOLDER, f"{filename}.cache") if filename is not None else None

    def decorator_function(func):
        cache = {}
        _cache_info = {
            "hits": 0,
            "misses": 0,
            "maxsize": maxsize,
            "currsize": 0,
            "ttl": ttl,
            "filename": cache_path,
        }

        @functools.wraps(func)
        def wrapper_function(*args, **kwargs):
            key = str((args, frozenset(kwargs)))
            if key in cache:
                if _cache_info["ttl"] is None or (cache[key][1] + _cache_info["ttl"]) >= time.time():
                    _cache_info["hits"] += 1
                    print(f'Warning, reading cache, last read {(time.time()-cache[key][1])//60} minutes ago'); time.sleep(2)
                    cache[key][1] = time.time()
                    return cache[key][0]
                else:
                    del cache[key]

            result = func(*args, **kwargs)
            cache[key] = [result, time.time()]
            _cache_info["misses"] += 1
            _cache_info["currsize"] += 1

            if _cache_info["currsize"] > _cache_info["maxsize"]:
                oldest_key = None
                for k in cache:
                    if oldest_key is None:
                        oldest_key = k
                    elif cache[k][1] < cache[oldest_key][1]:
                        oldest_key = k
                del cache[oldest_key]
                _cache_info["currsize"] -= 1

            if cache_path is not None:
                with open(cache_path, "wb") as f:
                    pickle.dump(cache, f)

            return result

        def cache_info():
            return _cache_info

        wrapper_function.cache_info = cache_info

        if cache_path is not None and os.path.exists(cache_path):
            with open(cache_path, "rb") as f:
                cache = pickle.load(f)
            _cache_info["currsize"] = len(cache)

        return wrapper_function

    return decorator_function

def contains_chinese(string):
    """
    Returns True if the given string contains Chinese characters, False otherwise.
    """
    chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
    return chinese_regex.search(string) is not None

def split_list(lst, n_each_req):
    """
    Split a list into smaller lists, each with a maximum number of elements.
    :param lst: the list to split
    :param n_each_req: the maximum number of elements in each sub-list
    :return: a list of sub-lists
    """
    result = []
    for i in range(0, len(lst), n_each_req):
        result.append(lst[i:i + n_each_req])
    return result

def map_to_json(map, language):
    dict_ = read_map_from_json(language)
    dict_.update(map)
    with open(f'docs/translate_{language.lower()}.json', 'w', encoding='utf8') as f:
        json.dump(dict_, f, indent=4, ensure_ascii=False)

def read_map_from_json(language):
    if os.path.exists(f'docs/translate_{language.lower()}.json'):
        with open(f'docs/translate_{language.lower()}.json', 'r', encoding='utf8') as f: 
            return json.load(f)
    return {}

cached_translation = {}
cached_translation = read_map_from_json(language=LANG)

@lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
def trans(word_to_translate, language, special=False):
    if len(word_to_translate) == 0: return {}
    from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
    from toolbox import get_conf, ChatBotWithCookies
    proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
        get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
    llm_kwargs = {
        'api_key': API_KEY,
        'llm_model': LLM_MODEL,
        'top_p':1.0, 
        'max_length': None,
        'temperature':0.0,
    }
    N_EACH_REQ = 16
    word_to_translate_split = split_list(word_to_translate, N_EACH_REQ)
    inputs_array = [str(s) for s in word_to_translate_split]
    inputs_show_user_array = inputs_array
    history_array = [[] for _ in inputs_array]
    if special: #  to English using CamelCase Naming Convention
        sys_prompt_array = [f"Translate following names to English with CamelCase naming convention. Keep original format" for _ in inputs_array]
    else:
        sys_prompt_array = [f"Translate following sentences to {LANG}. Keep original format." for _ in inputs_array]
    chatbot = ChatBotWithCookies(llm_kwargs)
    gpt_say_generator = request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
        inputs_array, 
        inputs_show_user_array, 
        llm_kwargs, 
        chatbot, 
        history_array, 
        sys_prompt_array, 
    )
    while True:
        try:
            gpt_say = next(gpt_say_generator)
            print(gpt_say[1][0][1])
        except StopIteration as e:
            result = e.value
            break
    translated_result = {}
    for i, r in enumerate(result):
        if i%2 == 1:
            try:
                res_before_trans = eval(result[i-1])
                res_after_trans = eval(result[i])
                for a,b in zip(res_before_trans, res_after_trans):
                    translated_result[a] = b
            except:
                try:
                    res_before_trans = eval(result[i-1])
                    result[i] = result[i].strip('[\']')
                    res_after_trans = [s for s in result[i].split("', '")]
                    for a,b in zip(res_before_trans, res_after_trans):
                        translated_result[a] = b
                except:
                    res_before_trans = eval(result[i-1])
                    for a in res_before_trans:
                        translated_result[a] = None
    return translated_result

def step_1_core_key_translate():
    def extract_chinese_characters(file_path):
        syntax = []
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
            import ast
            root = ast.parse(content)
            for node in ast.walk(root):
                if isinstance(node, ast.Name):
                    if contains_chinese(node.id): syntax.append(node.id)
                if isinstance(node, ast.Import):
                    for n in node.names:
                        if contains_chinese(n.name): syntax.append(n.name)
                elif isinstance(node, ast.ImportFrom):
                    for n in node.names:
                        if contains_chinese(n.name): syntax.append(n.name)
                        for k in node.module.split('.'):
                            if contains_chinese(k): syntax.append(k)
            return syntax

    def extract_chinese_characters_from_directory(directory_path):
        chinese_characters = []
        for root, dirs, files in os.walk(directory_path):
            if any([b in root for b in blacklist]):
                continue
            for file in files:
                if file.endswith('.py'):
                    file_path = os.path.join(root, file)
                    chinese_characters.extend(extract_chinese_characters(file_path))
        return chinese_characters

    directory_path = './'
    chinese_core_names = extract_chinese_characters_from_directory(directory_path)
    chinese_core_keys = [name for name in chinese_core_names]
    chinese_core_keys_norepeat = []
    for d in chinese_core_keys:
        if d not in chinese_core_keys_norepeat: chinese_core_keys_norepeat.append(d)
    need_translate = []
    cached_translation = read_map_from_json(language=LANG)
    cached_translation_keys = list(cached_translation.keys())
    for d in chinese_core_keys_norepeat:
        if d not in cached_translation_keys: 
            need_translate.append(d)

    need_translate_mapping = trans(need_translate, language=LANG, special=True)
    map_to_json(need_translate_mapping, language=LANG)
    cached_translation = read_map_from_json(language=LANG)
    cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0])))

    chinese_core_keys_norepeat_mapping = {}
    for k in chinese_core_keys_norepeat:
        chinese_core_keys_norepeat_mapping.update({k:cached_translation[k]})

    # ===============================================
    # copy
    # ===============================================
    def copy_source_code():

        from toolbox import get_conf
        import shutil
        import os
        try: shutil.rmtree(f'./multi-language/{LANG}/')
        except: pass
        os.makedirs(f'./multi-language', exist_ok=True)
        backup_dir = f'./multi-language/{LANG}/'
        shutil.copytree('./', backup_dir, ignore=lambda x, y: blacklist)
    copy_source_code()

    # ===============================================
    # primary key replace
    # ===============================================
    directory_path = f'./multi-language/{LANG}/'
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.py'):
                file_path = os.path.join(root, file)
                syntax = []
                # read again
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                
                for k, v in chinese_core_keys_norepeat_mapping.items():
                    content = content.replace(k, v)

                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(content)


def step_2_core_key_translate():

    # =================================================================================================
    # step2 
    # =================================================================================================
    def get_strings(node):
        strings = []

        # recursively traverse the AST
        for child in ast.iter_child_nodes(node):
            if isinstance(child, ast.Str):
                if contains_chinese(child.s):
                    string_ = child.s.strip().strip(',').strip().strip('.').strip()
                    if string_.startswith('[Local Message]'):
                        string_ = string_.replace('[Local Message]', '')
                        string_ = string_.strip().strip(',').strip().strip('.').strip()
                    strings.append([
                        string_, 
                        child.lineno*10000+child.col_offset
                    ])
            elif isinstance(child, ast.AST):
                strings.extend(get_strings(child))

        return strings

    string_literals = []
    directory_path = f'./multi-language/{LANG}/'
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.py'):
                file_path = os.path.join(root, file)
                syntax = []
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                    import ast
                    tree = ast.parse(content)
                    res = get_strings(tree)
                    string_literals.extend(res)

    chinese_literal_names = []
    chinese_literal_names_norepeat = []
    for string, offset in string_literals:
        chinese_literal_names.append(string)
    chinese_literal_names_norepeat = []
    for d in chinese_literal_names:
        if d not in chinese_literal_names_norepeat: chinese_literal_names_norepeat.append(d)
    need_translate = []
    cached_translation = read_map_from_json(language=LANG)
    cached_translation_keys = list(cached_translation.keys())
    for d in chinese_literal_names_norepeat:
        if d not in cached_translation_keys: 
            need_translate.append(d)


    up = trans(need_translate, language=LANG, special=False)
    map_to_json(up, language=LANG)
    cached_translation = read_map_from_json(language=LANG)
    cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0])))

    # ===============================================
    # literal key replace
    # ===============================================
    directory_path = f'./multi-language/{LANG}/'
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.py'):
                file_path = os.path.join(root, file)
                syntax = []
                # read again
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                
                for k, v in cached_translation.items():
                    content = content.replace(k, v)

                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(content)


step_1_core_key_translate()
step_2_core_key_translate()