qingxu99 commited on
Commit
3951159
1 Parent(s): 6c448b9
Files changed (3) hide show
  1. .gitignore +2 -1
  2. config.py +1 -1
  3. multi_language.py +171 -2
.gitignore CHANGED
@@ -146,4 +146,5 @@ debug*
146
  private*
147
  crazy_functions/test_project/pdf_and_word
148
  crazy_functions/test_samples
149
- request_llm/jittorllms
 
 
146
  private*
147
  crazy_functions/test_project/pdf_and_word
148
  crazy_functions/test_samples
149
+ request_llm/jittorllms
150
+ multi-language
config.py CHANGED
@@ -54,7 +54,7 @@ LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
54
  # 设置gradio的并行线程数(不需要修改)
55
  CONCURRENT_COUNT = 100
56
 
57
- # 加一个看板娘装饰
58
  ADD_WAIFU = False
59
 
60
  # 设置用户名和密码(不需要修改)(相关功能不稳定,与gradio版本和网络都相关,如果本地使用不建议加这个)
 
54
  # 设置gradio的并行线程数(不需要修改)
55
  CONCURRENT_COUNT = 100
56
 
57
+ # 加一个live2d装饰
58
  ADD_WAIFU = False
59
 
60
  # 设置用户名和密码(不需要修改)(相关功能不稳定,与gradio版本和网络都相关,如果本地使用不建议加这个)
multi_language.py CHANGED
@@ -1,4 +1,84 @@
1
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def extract_chinese_characters(file_path):
4
  with open(file_path, 'r', encoding='utf-8') as f:
@@ -29,6 +109,95 @@ directory_path = './'
29
  chinese_characters = extract_chinese_characters_from_directory(directory_path)
30
  word_to_translate = {}
31
  for d in chinese_characters:
32
- word_to_translate[d['word']] = "Translation"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- print('All Chinese characters:', chinese_characters)
 
1
  import os
2
+ import functools
3
+ import os
4
+ import pickle
5
+ import time
6
+
7
+ CACHE_FOLDER = "gpt_log"
8
+
9
+ if not os.path.exists(CACHE_FOLDER):
10
+ os.makedirs(CACHE_FOLDER)
11
+
12
+
13
+ def lru_file_cache(maxsize=128, ttl=None, filename=None):
14
+ """
15
+ Decorator that caches a function's return value after being called with given arguments.
16
+ It uses a Least Recently Used (LRU) cache strategy to limit the size of the cache.
17
+ maxsize: Maximum size of the cache. Defaults to 128.
18
+ ttl: Time-to-Live of the cache. If a value hasn't been accessed for `ttl` seconds, it will be evicted from the cache.
19
+ filename: Name of the file to store the cache in. If not supplied, the function name + ".cache" will be used.
20
+ """
21
+ cache_path = os.path.join(CACHE_FOLDER, f"{filename}.cache") if filename is not None else None
22
+
23
+ def decorator_function(func):
24
+ cache = {}
25
+ _cache_info = {
26
+ "hits": 0,
27
+ "misses": 0,
28
+ "maxsize": maxsize,
29
+ "currsize": 0,
30
+ "ttl": ttl,
31
+ "filename": cache_path,
32
+ }
33
+
34
+ @functools.wraps(func)
35
+ def wrapper_function(*args, **kwargs):
36
+ key = str((args, frozenset(kwargs)))
37
+ if key in cache:
38
+ if _cache_info["ttl"] is None or (cache[key][1] + _cache_info["ttl"]) >= time.time():
39
+ _cache_info["hits"] += 1
40
+ print(f'Warning, reading cache, last read {(time.time()-cache[key][1])//60} minutes ago'); time.sleep(2)
41
+ cache[key][1] = time.time()
42
+ return cache[key][0]
43
+ else:
44
+ del cache[key]
45
+
46
+ result = func(*args, **kwargs)
47
+ cache[key] = [result, time.time()]
48
+ _cache_info["misses"] += 1
49
+ _cache_info["currsize"] += 1
50
+
51
+ if _cache_info["currsize"] > _cache_info["maxsize"]:
52
+ oldest_key = None
53
+ for k in cache:
54
+ if oldest_key is None:
55
+ oldest_key = k
56
+ elif cache[k][1] < cache[oldest_key][1]:
57
+ oldest_key = k
58
+ del cache[oldest_key]
59
+ _cache_info["currsize"] -= 1
60
+
61
+ if cache_path is not None:
62
+ with open(cache_path, "wb") as f:
63
+ pickle.dump(cache, f)
64
+
65
+ return result
66
+
67
+ def cache_info():
68
+ return _cache_info
69
+
70
+ wrapper_function.cache_info = cache_info
71
+
72
+ if cache_path is not None and os.path.exists(cache_path):
73
+ with open(cache_path, "rb") as f:
74
+ cache = pickle.load(f)
75
+ _cache_info["currsize"] = len(cache)
76
+
77
+ return wrapper_function
78
+
79
+ return decorator_function
80
+
81
+
82
 
83
  def extract_chinese_characters(file_path):
84
  with open(file_path, 'r', encoding='utf-8') as f:
 
109
  chinese_characters = extract_chinese_characters_from_directory(directory_path)
110
  word_to_translate = {}
111
  for d in chinese_characters:
112
+ word_to_translate[d['word']] = "TRANS"
113
+
114
+ def break_dictionary(d, n):
115
+ items = list(d.items())
116
+ num_dicts = (len(items) + n - 1) // n
117
+ return [{k: v for k, v in items[i*n:(i+1)*n]} for i in range(num_dicts)]
118
+
119
+ N_EACH_REQ = 50
120
+ word_to_translate_split = break_dictionary(word_to_translate, N_EACH_REQ)
121
+ LANG = "English"
122
+
123
+ @lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
124
+ def trans(words):
125
+ # from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
126
+ # from toolbox import get_conf, ChatBotWithCookies
127
+ # proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
128
+ # get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
129
+ # llm_kwargs = {
130
+ # 'api_key': API_KEY,
131
+ # 'llm_model': LLM_MODEL,
132
+ # 'top_p':1.0,
133
+ # 'max_length': None,
134
+ # 'temperature':0.0,
135
+ # }
136
+ # plugin_kwargs = {}
137
+ # chatbot = ChatBotWithCookies(llm_kwargs)
138
+ # history = []
139
+ # for gpt_say in request_gpt_model_in_new_thread_with_ui_alive(
140
+ # inputs=words, inputs_show_user=words,
141
+ # llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
142
+ # sys_prompt=f"Translate following words to {LANG}, replace `TRANS` with translated result."
143
+ # ):
144
+ # gpt_say = gpt_say[1][0][1]
145
+ # return gpt_say
146
+ return '{}'
147
+
148
+ translated_result = {}
149
+ for d in word_to_translate_split:
150
+ res = trans(str(d))
151
+ try:
152
+ # convert translated result back to python dictionary
153
+ res_dict = eval(res)
154
+ except:
155
+ print('Unexpected output.')
156
+ translated_result.update(res_dict)
157
+
158
+ print('All Chinese characters:', chinese_characters)
159
+
160
+
161
+ # =================== create copy =====================
162
+ def copy_source_code():
163
+ """
164
+ 一键更新协议:备份和下载
165
+ """
166
+ from toolbox import get_conf
167
+ import shutil
168
+ import os
169
+ import requests
170
+ import zipfile
171
+ try: shutil.rmtree(f'./multi-language/{LANG}/')
172
+ except: pass
173
+ os.makedirs(f'./multi-language', exist_ok=True)
174
+ backup_dir = f'./multi-language/{LANG}/'
175
+ shutil.copytree('./', backup_dir, ignore=lambda x, y: ['multi-language', 'gpt_log', '.git', 'private_upload'])
176
+ copy_source_code()
177
+
178
+
179
+ for d in chinese_characters:
180
+ d['file'] = f'./multi-language/{LANG}/' + d['file']
181
+ if d['word'] in translated_result:
182
+ d['trans'] = translated_result[d['word']]
183
+ else:
184
+ d['trans'] = None
185
+
186
+ chinese_characters = sorted(chinese_characters, key=lambda x: len(x['word']), reverse=True)
187
+ for d in chinese_characters:
188
+ if d['trans'] is None:
189
+ continue
190
+
191
+
192
+
193
+ with open(d['file'], 'r', encoding='utf-8') as f:
194
+ content = f.read()
195
+
196
+ content.replace(d['word'], d['trans'])
197
+ substring = d['trans']
198
+ substring_start_index = content.find(substring)
199
+ substring_end_index = substring_start_index + len(substring) - 1
200
+ if content[substring_start_index].isalpha() or content[substring_start_index].isdigit():
201
+ content = content[:substring_start_index+1]
202
+
203