Spaces:
Sleeping
Sleeping
translate not fin
Browse files- docs/translate_english.json +0 -0
- multi_language.py +252 -113
- theme.py +3 -8
docs/translate_english.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
multi_language.py
CHANGED
@@ -1,10 +1,13 @@
|
|
1 |
import os
|
|
|
2 |
import functools
|
3 |
import re
|
4 |
import pickle
|
5 |
import time
|
6 |
|
7 |
CACHE_FOLDER = "gpt_log"
|
|
|
|
|
8 |
|
9 |
if not os.path.exists(CACHE_FOLDER):
|
10 |
os.makedirs(CACHE_FOLDER)
|
@@ -78,7 +81,6 @@ def lru_file_cache(maxsize=128, ttl=None, filename=None):
|
|
78 |
|
79 |
return decorator_function
|
80 |
|
81 |
-
|
82 |
def contains_chinese(string):
|
83 |
"""
|
84 |
Returns True if the given string contains Chinese characters, False otherwise.
|
@@ -86,122 +88,259 @@ def contains_chinese(string):
|
|
86 |
chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
|
87 |
return chinese_regex.search(string) is not None
|
88 |
|
89 |
-
def
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
for root, dirs, files in os.walk(directory_path):
|
106 |
for file in files:
|
107 |
if file.endswith('.py'):
|
108 |
file_path = os.path.join(root, file)
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
content.replace(d['word'], d['trans'])
|
201 |
-
substring = d['trans']
|
202 |
-
substring_start_index = content.find(substring)
|
203 |
-
substring_end_index = substring_start_index + len(substring) - 1
|
204 |
-
if content[substring_start_index].isalpha() or content[substring_start_index].isdigit():
|
205 |
-
content = content[:substring_start_index+1]
|
206 |
|
207 |
|
|
|
|
|
|
1 |
import os
|
2 |
+
import json
|
3 |
import functools
|
4 |
import re
|
5 |
import pickle
|
6 |
import time
|
7 |
|
8 |
CACHE_FOLDER = "gpt_log"
|
9 |
+
blacklist = ['multi-language', 'gpt_log', '.git', 'private_upload']
|
10 |
+
LANG = "English"
|
11 |
|
12 |
if not os.path.exists(CACHE_FOLDER):
|
13 |
os.makedirs(CACHE_FOLDER)
|
|
|
81 |
|
82 |
return decorator_function
|
83 |
|
|
|
84 |
def contains_chinese(string):
|
85 |
"""
|
86 |
Returns True if the given string contains Chinese characters, False otherwise.
|
|
|
88 |
chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
|
89 |
return chinese_regex.search(string) is not None
|
90 |
|
91 |
+
def split_list(lst, n_each_req):
|
92 |
+
"""
|
93 |
+
Split a list into smaller lists, each with a maximum number of elements.
|
94 |
+
:param lst: the list to split
|
95 |
+
:param n_each_req: the maximum number of elements in each sub-list
|
96 |
+
:return: a list of sub-lists
|
97 |
+
"""
|
98 |
+
result = []
|
99 |
+
for i in range(0, len(lst), n_each_req):
|
100 |
+
result.append(lst[i:i + n_each_req])
|
101 |
+
return result
|
102 |
+
|
103 |
+
def map_to_json(map, language):
|
104 |
+
dict_ = read_map_from_json(language)
|
105 |
+
dict_.update(map)
|
106 |
+
with open(f'docs/translate_{language.lower()}.json', 'w', encoding='utf8') as f:
|
107 |
+
json.dump(dict_, f, indent=4, ensure_ascii=False)
|
108 |
+
|
109 |
+
def read_map_from_json(language):
|
110 |
+
if os.path.exists(f'docs/translate_{language.lower()}.json'):
|
111 |
+
with open(f'docs/translate_{language.lower()}.json', 'r', encoding='utf8') as f:
|
112 |
+
return json.load(f)
|
113 |
+
return {}
|
114 |
+
|
115 |
+
cached_translation = {}
|
116 |
+
cached_translation = read_map_from_json(language=LANG)
|
117 |
+
|
118 |
+
@lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
|
119 |
+
def trans(word_to_translate, language, special=False):
|
120 |
+
if len(word_to_translate) == 0: return {}
|
121 |
+
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
122 |
+
from toolbox import get_conf, ChatBotWithCookies
|
123 |
+
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
|
124 |
+
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
|
125 |
+
llm_kwargs = {
|
126 |
+
'api_key': API_KEY,
|
127 |
+
'llm_model': LLM_MODEL,
|
128 |
+
'top_p':1.0,
|
129 |
+
'max_length': None,
|
130 |
+
'temperature':0.0,
|
131 |
+
}
|
132 |
+
N_EACH_REQ = 16
|
133 |
+
word_to_translate_split = split_list(word_to_translate, N_EACH_REQ)
|
134 |
+
inputs_array = [str(s) for s in word_to_translate_split]
|
135 |
+
inputs_show_user_array = inputs_array
|
136 |
+
history_array = [[] for _ in inputs_array]
|
137 |
+
if special: # to English using CamelCase Naming Convention
|
138 |
+
sys_prompt_array = [f"Translate following names to English with CamelCase naming convention. Keep original format" for _ in inputs_array]
|
139 |
+
else:
|
140 |
+
sys_prompt_array = [f"Translate following sentences to {LANG}. Keep original format." for _ in inputs_array]
|
141 |
+
chatbot = ChatBotWithCookies(llm_kwargs)
|
142 |
+
gpt_say_generator = request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
143 |
+
inputs_array,
|
144 |
+
inputs_show_user_array,
|
145 |
+
llm_kwargs,
|
146 |
+
chatbot,
|
147 |
+
history_array,
|
148 |
+
sys_prompt_array,
|
149 |
+
)
|
150 |
+
while True:
|
151 |
+
try:
|
152 |
+
gpt_say = next(gpt_say_generator)
|
153 |
+
print(gpt_say[1][0][1])
|
154 |
+
except StopIteration as e:
|
155 |
+
result = e.value
|
156 |
+
break
|
157 |
+
translated_result = {}
|
158 |
+
for i, r in enumerate(result):
|
159 |
+
if i%2 == 1:
|
160 |
+
try:
|
161 |
+
res_before_trans = eval(result[i-1])
|
162 |
+
res_after_trans = eval(result[i])
|
163 |
+
for a,b in zip(res_before_trans, res_after_trans):
|
164 |
+
translated_result[a] = b
|
165 |
+
except:
|
166 |
+
try:
|
167 |
+
res_before_trans = eval(result[i-1])
|
168 |
+
result[i] = result[i].strip('[\']')
|
169 |
+
res_after_trans = [s for s in result[i].split("', '")]
|
170 |
+
for a,b in zip(res_before_trans, res_after_trans):
|
171 |
+
translated_result[a] = b
|
172 |
+
except:
|
173 |
+
res_before_trans = eval(result[i-1])
|
174 |
+
for a in res_before_trans:
|
175 |
+
translated_result[a] = None
|
176 |
+
return translated_result
|
177 |
+
|
178 |
+
def step_1_core_key_translate():
|
179 |
+
def extract_chinese_characters(file_path):
|
180 |
+
syntax = []
|
181 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
182 |
+
content = f.read()
|
183 |
+
import ast
|
184 |
+
root = ast.parse(content)
|
185 |
+
for node in ast.walk(root):
|
186 |
+
if isinstance(node, ast.Name):
|
187 |
+
if contains_chinese(node.id): syntax.append(node.id)
|
188 |
+
if isinstance(node, ast.Import):
|
189 |
+
for n in node.names:
|
190 |
+
if contains_chinese(n.name): syntax.append(n.name)
|
191 |
+
elif isinstance(node, ast.ImportFrom):
|
192 |
+
for n in node.names:
|
193 |
+
if contains_chinese(n.name): syntax.append(n.name)
|
194 |
+
for k in node.module.split('.'):
|
195 |
+
if contains_chinese(k): syntax.append(k)
|
196 |
+
return syntax
|
197 |
+
|
198 |
+
def extract_chinese_characters_from_directory(directory_path):
|
199 |
+
chinese_characters = []
|
200 |
+
for root, dirs, files in os.walk(directory_path):
|
201 |
+
if any([b in root for b in blacklist]):
|
202 |
+
continue
|
203 |
+
for file in files:
|
204 |
+
if file.endswith('.py'):
|
205 |
+
file_path = os.path.join(root, file)
|
206 |
+
chinese_characters.extend(extract_chinese_characters(file_path))
|
207 |
+
return chinese_characters
|
208 |
+
|
209 |
+
directory_path = './'
|
210 |
+
chinese_core_names = extract_chinese_characters_from_directory(directory_path)
|
211 |
+
chinese_core_keys = [name for name in chinese_core_names]
|
212 |
+
chinese_core_keys_norepeat = []
|
213 |
+
for d in chinese_core_keys:
|
214 |
+
if d not in chinese_core_keys_norepeat: chinese_core_keys_norepeat.append(d)
|
215 |
+
need_translate = []
|
216 |
+
cached_translation = read_map_from_json(language=LANG)
|
217 |
+
cached_translation_keys = list(cached_translation.keys())
|
218 |
+
for d in chinese_core_keys_norepeat:
|
219 |
+
if d not in cached_translation_keys:
|
220 |
+
need_translate.append(d)
|
221 |
+
|
222 |
+
need_translate_mapping = trans(need_translate, language=LANG, special=True)
|
223 |
+
map_to_json(need_translate_mapping, language=LANG)
|
224 |
+
cached_translation = read_map_from_json(language=LANG)
|
225 |
+
cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0])))
|
226 |
+
|
227 |
+
chinese_core_keys_norepeat_mapping = {}
|
228 |
+
for k in chinese_core_keys_norepeat:
|
229 |
+
chinese_core_keys_norepeat_mapping.update({k:cached_translation[k]})
|
230 |
+
|
231 |
+
# ===============================================
|
232 |
+
# copy
|
233 |
+
# ===============================================
|
234 |
+
def copy_source_code():
|
235 |
+
|
236 |
+
from toolbox import get_conf
|
237 |
+
import shutil
|
238 |
+
import os
|
239 |
+
try: shutil.rmtree(f'./multi-language/{LANG}/')
|
240 |
+
except: pass
|
241 |
+
os.makedirs(f'./multi-language', exist_ok=True)
|
242 |
+
backup_dir = f'./multi-language/{LANG}/'
|
243 |
+
shutil.copytree('./', backup_dir, ignore=lambda x, y: blacklist)
|
244 |
+
copy_source_code()
|
245 |
+
|
246 |
+
# ===============================================
|
247 |
+
# primary key replace
|
248 |
+
# ===============================================
|
249 |
+
directory_path = f'./multi-language/{LANG}/'
|
250 |
for root, dirs, files in os.walk(directory_path):
|
251 |
for file in files:
|
252 |
if file.endswith('.py'):
|
253 |
file_path = os.path.join(root, file)
|
254 |
+
syntax = []
|
255 |
+
# read again
|
256 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
257 |
+
content = f.read()
|
258 |
+
|
259 |
+
for k, v in chinese_core_keys_norepeat_mapping.items():
|
260 |
+
content = content.replace(k, v)
|
261 |
+
|
262 |
+
with open(file_path, 'w', encoding='utf-8') as f:
|
263 |
+
f.write(content)
|
264 |
+
|
265 |
+
|
266 |
+
def step_2_core_key_translate():
|
267 |
+
|
268 |
+
# =================================================================================================
|
269 |
+
# step2
|
270 |
+
# =================================================================================================
|
271 |
+
def get_strings(node):
|
272 |
+
strings = []
|
273 |
+
|
274 |
+
# recursively traverse the AST
|
275 |
+
for child in ast.iter_child_nodes(node):
|
276 |
+
if isinstance(child, ast.Str):
|
277 |
+
if contains_chinese(child.s):
|
278 |
+
string_ = child.s.strip().strip(',').strip().strip('.').strip()
|
279 |
+
if string_.startswith('[Local Message]'):
|
280 |
+
string_ = string_.replace('[Local Message]', '')
|
281 |
+
string_ = string_.strip().strip(',').strip().strip('.').strip()
|
282 |
+
strings.append([
|
283 |
+
string_,
|
284 |
+
child.lineno*10000+child.col_offset
|
285 |
+
])
|
286 |
+
elif isinstance(child, ast.AST):
|
287 |
+
strings.extend(get_strings(child))
|
288 |
+
|
289 |
+
return strings
|
290 |
+
|
291 |
+
string_literals = []
|
292 |
+
directory_path = f'./multi-language/{LANG}/'
|
293 |
+
for root, dirs, files in os.walk(directory_path):
|
294 |
+
for file in files:
|
295 |
+
if file.endswith('.py'):
|
296 |
+
file_path = os.path.join(root, file)
|
297 |
+
syntax = []
|
298 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
299 |
+
content = f.read()
|
300 |
+
import ast
|
301 |
+
tree = ast.parse(content)
|
302 |
+
res = get_strings(tree)
|
303 |
+
string_literals.extend(res)
|
304 |
+
|
305 |
+
chinese_literal_names = []
|
306 |
+
chinese_literal_names_norepeat = []
|
307 |
+
for string, offset in string_literals:
|
308 |
+
chinese_literal_names.append(string)
|
309 |
+
chinese_literal_names_norepeat = []
|
310 |
+
for d in chinese_literal_names:
|
311 |
+
if d not in chinese_literal_names_norepeat: chinese_literal_names_norepeat.append(d)
|
312 |
+
need_translate = []
|
313 |
+
cached_translation = read_map_from_json(language=LANG)
|
314 |
+
cached_translation_keys = list(cached_translation.keys())
|
315 |
+
for d in chinese_literal_names_norepeat:
|
316 |
+
if d not in cached_translation_keys:
|
317 |
+
need_translate.append(d)
|
318 |
+
|
319 |
+
|
320 |
+
up = trans(need_translate, language=LANG, special=False)
|
321 |
+
map_to_json(up, language=LANG)
|
322 |
+
cached_translation = read_map_from_json(language=LANG)
|
323 |
+
cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0])))
|
324 |
+
|
325 |
+
# ===============================================
|
326 |
+
# literal key replace
|
327 |
+
# ===============================================
|
328 |
+
directory_path = f'./multi-language/{LANG}/'
|
329 |
+
for root, dirs, files in os.walk(directory_path):
|
330 |
+
for file in files:
|
331 |
+
if file.endswith('.py'):
|
332 |
+
file_path = os.path.join(root, file)
|
333 |
+
syntax = []
|
334 |
+
# read again
|
335 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
336 |
+
content = f.read()
|
337 |
+
|
338 |
+
for k, v in cached_translation.items():
|
339 |
+
content = content.replace(k, v)
|
|
|
340 |
|
341 |
+
with open(file_path, 'w', encoding='utf-8') as f:
|
342 |
+
f.write(content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
|
344 |
|
345 |
+
step_1_core_key_translate()
|
346 |
+
step_2_core_key_translate()
|
theme.py
CHANGED
@@ -103,35 +103,30 @@ def adjust_theme():
|
|
103 |
|
104 |
|
105 |
advanced_css = """
|
106 |
-
/* 设置表格的外边距为1em,内部单元格之间边框合并,空单元格显示. */
|
107 |
.markdown-body table {
|
108 |
margin: 1em 0;
|
109 |
border-collapse: collapse;
|
110 |
empty-cells: show;
|
111 |
}
|
112 |
|
113 |
-
/* 设置表格单元格的内边距为5px,边框粗细为1.2px,颜色为--border-color-primary. */
|
114 |
.markdown-body th, .markdown-body td {
|
115 |
border: 1.2px solid var(--border-color-primary);
|
116 |
padding: 5px;
|
117 |
}
|
118 |
|
119 |
-
/* 设置表头背景颜色为rgba(175,184,193,0.2),透明度为0.2. */
|
120 |
.markdown-body thead {
|
121 |
background-color: rgba(175,184,193,0.2);
|
122 |
}
|
123 |
|
124 |
-
/* 设置表头单元格的内边距为0.5em和0.2em. */
|
125 |
.markdown-body thead th {
|
126 |
padding: .5em .2em;
|
127 |
}
|
128 |
|
129 |
-
/* 去掉列表前缀的默认间距,使其与文本线对齐. */
|
130 |
.markdown-body ol, .markdown-body ul {
|
131 |
padding-inline-start: 2em !important;
|
132 |
}
|
133 |
|
134 |
-
/*
|
135 |
[class *= "message"] {
|
136 |
border-radius: var(--radius-xl) !important;
|
137 |
/* padding: var(--spacing-xl) !important; */
|
@@ -151,7 +146,7 @@ advanced_css = """
|
|
151 |
border-bottom-right-radius: 0 !important;
|
152 |
}
|
153 |
|
154 |
-
/*
|
155 |
.markdown-body code {
|
156 |
display: inline;
|
157 |
white-space: break-spaces;
|
@@ -171,7 +166,7 @@ advanced_css = """
|
|
171 |
background-color: rgba(175,184,193,0.2);
|
172 |
}
|
173 |
|
174 |
-
/*
|
175 |
.markdown-body pre code {
|
176 |
display: block;
|
177 |
overflow: auto;
|
|
|
103 |
|
104 |
|
105 |
advanced_css = """
|
|
|
106 |
.markdown-body table {
|
107 |
margin: 1em 0;
|
108 |
border-collapse: collapse;
|
109 |
empty-cells: show;
|
110 |
}
|
111 |
|
|
|
112 |
.markdown-body th, .markdown-body td {
|
113 |
border: 1.2px solid var(--border-color-primary);
|
114 |
padding: 5px;
|
115 |
}
|
116 |
|
|
|
117 |
.markdown-body thead {
|
118 |
background-color: rgba(175,184,193,0.2);
|
119 |
}
|
120 |
|
|
|
121 |
.markdown-body thead th {
|
122 |
padding: .5em .2em;
|
123 |
}
|
124 |
|
|
|
125 |
.markdown-body ol, .markdown-body ul {
|
126 |
padding-inline-start: 2em !important;
|
127 |
}
|
128 |
|
129 |
+
/* chat box. */
|
130 |
[class *= "message"] {
|
131 |
border-radius: var(--radius-xl) !important;
|
132 |
/* padding: var(--spacing-xl) !important; */
|
|
|
146 |
border-bottom-right-radius: 0 !important;
|
147 |
}
|
148 |
|
149 |
+
/* linein code block. */
|
150 |
.markdown-body code {
|
151 |
display: inline;
|
152 |
white-space: break-spaces;
|
|
|
166 |
background-color: rgba(175,184,193,0.2);
|
167 |
}
|
168 |
|
169 |
+
/* code block css */
|
170 |
.markdown-body pre code {
|
171 |
display: block;
|
172 |
overflow: auto;
|