qingxu99 commited on
Commit
c376e46
1 Parent(s): 8d52819

translate not fin

Browse files
Files changed (3) hide show
  1. docs/translate_english.json +0 -0
  2. multi_language.py +252 -113
  3. theme.py +3 -8
docs/translate_english.json ADDED
The diff for this file is too large to render. See raw diff
 
multi_language.py CHANGED
@@ -1,10 +1,13 @@
1
  import os
 
2
  import functools
3
  import re
4
  import pickle
5
  import time
6
 
7
  CACHE_FOLDER = "gpt_log"
 
 
8
 
9
  if not os.path.exists(CACHE_FOLDER):
10
  os.makedirs(CACHE_FOLDER)
@@ -78,7 +81,6 @@ def lru_file_cache(maxsize=128, ttl=None, filename=None):
78
 
79
  return decorator_function
80
 
81
-
82
  def contains_chinese(string):
83
  """
84
  Returns True if the given string contains Chinese characters, False otherwise.
@@ -86,122 +88,259 @@ def contains_chinese(string):
86
  chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
87
  return chinese_regex.search(string) is not None
88
 
89
- def extract_chinese_characters(file_path):
90
- syntax = []
91
- with open(file_path, 'r', encoding='utf-8') as f:
92
- content = f.read()
93
- import ast
94
- root = ast.parse(content)
95
- for node in ast.walk(root):
96
- if isinstance(node, ast.Name):
97
- if contains_chinese(node.id):
98
- print(node.id)
99
- syntax.append(node)
100
-
101
- return syntax
102
-
103
- def extract_chinese_characters_from_directory(directory_path):
104
- chinese_characters = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  for root, dirs, files in os.walk(directory_path):
106
  for file in files:
107
  if file.endswith('.py'):
108
  file_path = os.path.join(root, file)
109
- chinese_characters.extend(extract_chinese_characters(file_path))
110
- return chinese_characters
111
-
112
- directory_path = './'
113
- chinese_characters = extract_chinese_characters_from_directory(directory_path)
114
- word_to_translate = {}
115
- for d in chinese_characters:
116
- word_to_translate[d['word']] = "TRANS"
117
-
118
- def break_dictionary(d, n):
119
- items = list(d.items())
120
- num_dicts = (len(items) + n - 1) // n
121
- return [{k: v for k, v in items[i*n:(i+1)*n]} for i in range(num_dicts)]
122
-
123
- N_EACH_REQ = 50
124
- word_to_translate_split = break_dictionary(word_to_translate, N_EACH_REQ)
125
- LANG = "English"
126
-
127
- @lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
128
- def trans(words):
129
- # from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
130
- # from toolbox import get_conf, ChatBotWithCookies
131
- # proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
132
- # get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
133
- # llm_kwargs = {
134
- # 'api_key': API_KEY,
135
- # 'llm_model': LLM_MODEL,
136
- # 'top_p':1.0,
137
- # 'max_length': None,
138
- # 'temperature':0.0,
139
- # }
140
- # plugin_kwargs = {}
141
- # chatbot = ChatBotWithCookies(llm_kwargs)
142
- # history = []
143
- # for gpt_say in request_gpt_model_in_new_thread_with_ui_alive(
144
- # inputs=words, inputs_show_user=words,
145
- # llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
146
- # sys_prompt=f"Translate following words to {LANG}, replace `TRANS` with translated result."
147
- # ):
148
- # gpt_say = gpt_say[1][0][1]
149
- # return gpt_say
150
- return '{}'
151
-
152
- translated_result = {}
153
- for d in word_to_translate_split:
154
- res = trans(str(d))
155
- try:
156
- # convert translated result back to python dictionary
157
- res_dict = eval(res)
158
- except:
159
- print('Unexpected output.')
160
- translated_result.update(res_dict)
161
-
162
- print('All Chinese characters:', chinese_characters)
163
-
164
-
165
- # =================== create copy =====================
166
- def copy_source_code():
167
- """
168
- 一键更新协议:备份和下载
169
- """
170
- from toolbox import get_conf
171
- import shutil
172
- import os
173
- import requests
174
- import zipfile
175
- try: shutil.rmtree(f'./multi-language/{LANG}/')
176
- except: pass
177
- os.makedirs(f'./multi-language', exist_ok=True)
178
- backup_dir = f'./multi-language/{LANG}/'
179
- shutil.copytree('./', backup_dir, ignore=lambda x, y: ['multi-language', 'gpt_log', '.git', 'private_upload'])
180
- copy_source_code()
181
-
182
-
183
- for d in chinese_characters:
184
- d['file'] = f'./multi-language/{LANG}/' + d['file']
185
- if d['word'] in translated_result:
186
- d['trans'] = translated_result[d['word']]
187
- else:
188
- d['trans'] = None
189
-
190
- chinese_characters = sorted(chinese_characters, key=lambda x: len(x['word']), reverse=True)
191
- for d in chinese_characters:
192
- if d['trans'] is None:
193
- continue
194
-
195
-
196
 
197
- with open(d['file'], 'r', encoding='utf-8') as f:
198
- content = f.read()
199
-
200
- content.replace(d['word'], d['trans'])
201
- substring = d['trans']
202
- substring_start_index = content.find(substring)
203
- substring_end_index = substring_start_index + len(substring) - 1
204
- if content[substring_start_index].isalpha() or content[substring_start_index].isdigit():
205
- content = content[:substring_start_index+1]
206
 
207
 
 
 
 
1
  import os
2
+ import json
3
  import functools
4
  import re
5
  import pickle
6
  import time
7
 
8
  CACHE_FOLDER = "gpt_log"
9
+ blacklist = ['multi-language', 'gpt_log', '.git', 'private_upload']
10
+ LANG = "English"
11
 
12
  if not os.path.exists(CACHE_FOLDER):
13
  os.makedirs(CACHE_FOLDER)
 
81
 
82
  return decorator_function
83
 
 
84
  def contains_chinese(string):
85
  """
86
  Returns True if the given string contains Chinese characters, False otherwise.
 
88
  chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
89
  return chinese_regex.search(string) is not None
90
 
91
+ def split_list(lst, n_each_req):
92
+ """
93
+ Split a list into smaller lists, each with a maximum number of elements.
94
+ :param lst: the list to split
95
+ :param n_each_req: the maximum number of elements in each sub-list
96
+ :return: a list of sub-lists
97
+ """
98
+ result = []
99
+ for i in range(0, len(lst), n_each_req):
100
+ result.append(lst[i:i + n_each_req])
101
+ return result
102
+
103
+ def map_to_json(map, language):
104
+ dict_ = read_map_from_json(language)
105
+ dict_.update(map)
106
+ with open(f'docs/translate_{language.lower()}.json', 'w', encoding='utf8') as f:
107
+ json.dump(dict_, f, indent=4, ensure_ascii=False)
108
+
109
+ def read_map_from_json(language):
110
+ if os.path.exists(f'docs/translate_{language.lower()}.json'):
111
+ with open(f'docs/translate_{language.lower()}.json', 'r', encoding='utf8') as f:
112
+ return json.load(f)
113
+ return {}
114
+
115
+ cached_translation = {}
116
+ cached_translation = read_map_from_json(language=LANG)
117
+
118
+ @lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
119
+ def trans(word_to_translate, language, special=False):
120
+ if len(word_to_translate) == 0: return {}
121
+ from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
122
+ from toolbox import get_conf, ChatBotWithCookies
123
+ proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
124
+ get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
125
+ llm_kwargs = {
126
+ 'api_key': API_KEY,
127
+ 'llm_model': LLM_MODEL,
128
+ 'top_p':1.0,
129
+ 'max_length': None,
130
+ 'temperature':0.0,
131
+ }
132
+ N_EACH_REQ = 16
133
+ word_to_translate_split = split_list(word_to_translate, N_EACH_REQ)
134
+ inputs_array = [str(s) for s in word_to_translate_split]
135
+ inputs_show_user_array = inputs_array
136
+ history_array = [[] for _ in inputs_array]
137
+ if special: # to English using CamelCase Naming Convention
138
+ sys_prompt_array = [f"Translate following names to English with CamelCase naming convention. Keep original format" for _ in inputs_array]
139
+ else:
140
+ sys_prompt_array = [f"Translate following sentences to {LANG}. Keep original format." for _ in inputs_array]
141
+ chatbot = ChatBotWithCookies(llm_kwargs)
142
+ gpt_say_generator = request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
143
+ inputs_array,
144
+ inputs_show_user_array,
145
+ llm_kwargs,
146
+ chatbot,
147
+ history_array,
148
+ sys_prompt_array,
149
+ )
150
+ while True:
151
+ try:
152
+ gpt_say = next(gpt_say_generator)
153
+ print(gpt_say[1][0][1])
154
+ except StopIteration as e:
155
+ result = e.value
156
+ break
157
+ translated_result = {}
158
+ for i, r in enumerate(result):
159
+ if i%2 == 1:
160
+ try:
161
+ res_before_trans = eval(result[i-1])
162
+ res_after_trans = eval(result[i])
163
+ for a,b in zip(res_before_trans, res_after_trans):
164
+ translated_result[a] = b
165
+ except:
166
+ try:
167
+ res_before_trans = eval(result[i-1])
168
+ result[i] = result[i].strip('[\']')
169
+ res_after_trans = [s for s in result[i].split("', '")]
170
+ for a,b in zip(res_before_trans, res_after_trans):
171
+ translated_result[a] = b
172
+ except:
173
+ res_before_trans = eval(result[i-1])
174
+ for a in res_before_trans:
175
+ translated_result[a] = None
176
+ return translated_result
177
+
178
+ def step_1_core_key_translate():
179
+ def extract_chinese_characters(file_path):
180
+ syntax = []
181
+ with open(file_path, 'r', encoding='utf-8') as f:
182
+ content = f.read()
183
+ import ast
184
+ root = ast.parse(content)
185
+ for node in ast.walk(root):
186
+ if isinstance(node, ast.Name):
187
+ if contains_chinese(node.id): syntax.append(node.id)
188
+ if isinstance(node, ast.Import):
189
+ for n in node.names:
190
+ if contains_chinese(n.name): syntax.append(n.name)
191
+ elif isinstance(node, ast.ImportFrom):
192
+ for n in node.names:
193
+ if contains_chinese(n.name): syntax.append(n.name)
194
+ for k in node.module.split('.'):
195
+ if contains_chinese(k): syntax.append(k)
196
+ return syntax
197
+
198
+ def extract_chinese_characters_from_directory(directory_path):
199
+ chinese_characters = []
200
+ for root, dirs, files in os.walk(directory_path):
201
+ if any([b in root for b in blacklist]):
202
+ continue
203
+ for file in files:
204
+ if file.endswith('.py'):
205
+ file_path = os.path.join(root, file)
206
+ chinese_characters.extend(extract_chinese_characters(file_path))
207
+ return chinese_characters
208
+
209
+ directory_path = './'
210
+ chinese_core_names = extract_chinese_characters_from_directory(directory_path)
211
+ chinese_core_keys = [name for name in chinese_core_names]
212
+ chinese_core_keys_norepeat = []
213
+ for d in chinese_core_keys:
214
+ if d not in chinese_core_keys_norepeat: chinese_core_keys_norepeat.append(d)
215
+ need_translate = []
216
+ cached_translation = read_map_from_json(language=LANG)
217
+ cached_translation_keys = list(cached_translation.keys())
218
+ for d in chinese_core_keys_norepeat:
219
+ if d not in cached_translation_keys:
220
+ need_translate.append(d)
221
+
222
+ need_translate_mapping = trans(need_translate, language=LANG, special=True)
223
+ map_to_json(need_translate_mapping, language=LANG)
224
+ cached_translation = read_map_from_json(language=LANG)
225
+ cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0])))
226
+
227
+ chinese_core_keys_norepeat_mapping = {}
228
+ for k in chinese_core_keys_norepeat:
229
+ chinese_core_keys_norepeat_mapping.update({k:cached_translation[k]})
230
+
231
+ # ===============================================
232
+ # copy
233
+ # ===============================================
234
+ def copy_source_code():
235
+
236
+ from toolbox import get_conf
237
+ import shutil
238
+ import os
239
+ try: shutil.rmtree(f'./multi-language/{LANG}/')
240
+ except: pass
241
+ os.makedirs(f'./multi-language', exist_ok=True)
242
+ backup_dir = f'./multi-language/{LANG}/'
243
+ shutil.copytree('./', backup_dir, ignore=lambda x, y: blacklist)
244
+ copy_source_code()
245
+
246
+ # ===============================================
247
+ # primary key replace
248
+ # ===============================================
249
+ directory_path = f'./multi-language/{LANG}/'
250
  for root, dirs, files in os.walk(directory_path):
251
  for file in files:
252
  if file.endswith('.py'):
253
  file_path = os.path.join(root, file)
254
+ syntax = []
255
+ # read again
256
+ with open(file_path, 'r', encoding='utf-8') as f:
257
+ content = f.read()
258
+
259
+ for k, v in chinese_core_keys_norepeat_mapping.items():
260
+ content = content.replace(k, v)
261
+
262
+ with open(file_path, 'w', encoding='utf-8') as f:
263
+ f.write(content)
264
+
265
+
266
+ def step_2_core_key_translate():
267
+
268
+ # =================================================================================================
269
+ # step2
270
+ # =================================================================================================
271
+ def get_strings(node):
272
+ strings = []
273
+
274
+ # recursively traverse the AST
275
+ for child in ast.iter_child_nodes(node):
276
+ if isinstance(child, ast.Str):
277
+ if contains_chinese(child.s):
278
+ string_ = child.s.strip().strip(',').strip().strip('.').strip()
279
+ if string_.startswith('[Local Message]'):
280
+ string_ = string_.replace('[Local Message]', '')
281
+ string_ = string_.strip().strip(',').strip().strip('.').strip()
282
+ strings.append([
283
+ string_,
284
+ child.lineno*10000+child.col_offset
285
+ ])
286
+ elif isinstance(child, ast.AST):
287
+ strings.extend(get_strings(child))
288
+
289
+ return strings
290
+
291
+ string_literals = []
292
+ directory_path = f'./multi-language/{LANG}/'
293
+ for root, dirs, files in os.walk(directory_path):
294
+ for file in files:
295
+ if file.endswith('.py'):
296
+ file_path = os.path.join(root, file)
297
+ syntax = []
298
+ with open(file_path, 'r', encoding='utf-8') as f:
299
+ content = f.read()
300
+ import ast
301
+ tree = ast.parse(content)
302
+ res = get_strings(tree)
303
+ string_literals.extend(res)
304
+
305
+ chinese_literal_names = []
306
+ chinese_literal_names_norepeat = []
307
+ for string, offset in string_literals:
308
+ chinese_literal_names.append(string)
309
+ chinese_literal_names_norepeat = []
310
+ for d in chinese_literal_names:
311
+ if d not in chinese_literal_names_norepeat: chinese_literal_names_norepeat.append(d)
312
+ need_translate = []
313
+ cached_translation = read_map_from_json(language=LANG)
314
+ cached_translation_keys = list(cached_translation.keys())
315
+ for d in chinese_literal_names_norepeat:
316
+ if d not in cached_translation_keys:
317
+ need_translate.append(d)
318
+
319
+
320
+ up = trans(need_translate, language=LANG, special=False)
321
+ map_to_json(up, language=LANG)
322
+ cached_translation = read_map_from_json(language=LANG)
323
+ cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0])))
324
+
325
+ # ===============================================
326
+ # literal key replace
327
+ # ===============================================
328
+ directory_path = f'./multi-language/{LANG}/'
329
+ for root, dirs, files in os.walk(directory_path):
330
+ for file in files:
331
+ if file.endswith('.py'):
332
+ file_path = os.path.join(root, file)
333
+ syntax = []
334
+ # read again
335
+ with open(file_path, 'r', encoding='utf-8') as f:
336
+ content = f.read()
337
+
338
+ for k, v in cached_translation.items():
339
+ content = content.replace(k, v)
 
340
 
341
+ with open(file_path, 'w', encoding='utf-8') as f:
342
+ f.write(content)
 
 
 
 
 
 
 
343
 
344
 
345
+ step_1_core_key_translate()
346
+ step_2_core_key_translate()
theme.py CHANGED
@@ -103,35 +103,30 @@ def adjust_theme():
103
 
104
 
105
  advanced_css = """
106
- /* 设置表格的外边距为1em,内部单元格之间边框合并,空单元格显示. */
107
  .markdown-body table {
108
  margin: 1em 0;
109
  border-collapse: collapse;
110
  empty-cells: show;
111
  }
112
 
113
- /* 设置表格单元格的内边距为5px,边框粗细为1.2px,颜色为--border-color-primary. */
114
  .markdown-body th, .markdown-body td {
115
  border: 1.2px solid var(--border-color-primary);
116
  padding: 5px;
117
  }
118
 
119
- /* 设置表头背景颜色为rgba(175,184,193,0.2),透明度为0.2. */
120
  .markdown-body thead {
121
  background-color: rgba(175,184,193,0.2);
122
  }
123
 
124
- /* 设置表头单元格的内边距为0.5em和0.2em. */
125
  .markdown-body thead th {
126
  padding: .5em .2em;
127
  }
128
 
129
- /* 去掉列表前缀的默认间距,使其与文本线对齐. */
130
  .markdown-body ol, .markdown-body ul {
131
  padding-inline-start: 2em !important;
132
  }
133
 
134
- /* 设定聊天气泡的样式,包括圆角、最大宽度和阴影等. */
135
  [class *= "message"] {
136
  border-radius: var(--radius-xl) !important;
137
  /* padding: var(--spacing-xl) !important; */
@@ -151,7 +146,7 @@ advanced_css = """
151
  border-bottom-right-radius: 0 !important;
152
  }
153
 
154
- /* 行内代码的背景设为淡灰色,设定圆角和间距. */
155
  .markdown-body code {
156
  display: inline;
157
  white-space: break-spaces;
@@ -171,7 +166,7 @@ advanced_css = """
171
  background-color: rgba(175,184,193,0.2);
172
  }
173
 
174
- /* 设定代码块的样式,包括背景颜色、内、外边距、圆角。 */
175
  .markdown-body pre code {
176
  display: block;
177
  overflow: auto;
 
103
 
104
 
105
  advanced_css = """
 
106
  .markdown-body table {
107
  margin: 1em 0;
108
  border-collapse: collapse;
109
  empty-cells: show;
110
  }
111
 
 
112
  .markdown-body th, .markdown-body td {
113
  border: 1.2px solid var(--border-color-primary);
114
  padding: 5px;
115
  }
116
 
 
117
  .markdown-body thead {
118
  background-color: rgba(175,184,193,0.2);
119
  }
120
 
 
121
  .markdown-body thead th {
122
  padding: .5em .2em;
123
  }
124
 
 
125
  .markdown-body ol, .markdown-body ul {
126
  padding-inline-start: 2em !important;
127
  }
128
 
129
+ /* chat box. */
130
  [class *= "message"] {
131
  border-radius: var(--radius-xl) !important;
132
  /* padding: var(--spacing-xl) !important; */
 
146
  border-bottom-right-radius: 0 !important;
147
  }
148
 
149
+ /* linein code block. */
150
  .markdown-body code {
151
  display: inline;
152
  white-space: break-spaces;
 
166
  background-color: rgba(175,184,193,0.2);
167
  }
168
 
169
+ /* code block css */
170
  .markdown-body pre code {
171
  display: block;
172
  overflow: auto;