alienet commited on
Commit
aa28c7f
·
1 Parent(s): 8053048
EasyTranslator.py ADDED
@@ -0,0 +1,607 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from os import path as osp
3
+ import json
4
+ from utils import *
5
+ from themes import *
6
+
7
+ # Initialization
8
+ # id指代台词的编号,为一个字符串
9
+ # idx指代顺序排列的序号,0,1,2,...
10
+ config_path = osp.join(osp.dirname(osp.abspath(__file__)),"./config.json")
11
+ args = load_config(config_path)
12
+ if_save_id_immediately = True if int(args["if_save_id_immediately"]) else False
13
+ moyu_mode = True if int(args["moyu_mode"]) else False
14
+ path = args["file_path"]
15
+ abs_path = smart_path(path)
16
+ replace_dict_path = smart_path(args["replace_dict_path"])
17
+ name_dict_path = smart_path(args["name_dict_path"])
18
+ altered_text_finals= set()
19
+
20
+
21
+ if osp.exists(abs_path):
22
+ with open(abs_path, "r", encoding ="utf8") as json_file:
23
+ dic = json.load(json_file)
24
+ id_lis = list(dic.keys())
25
+ idx_dic = dict()
26
+ for idx,id_ in enumerate(id_lis):
27
+ idx_dic[id_] = idx
28
+ id_idx = 0
29
+ if args["last_edited_id"] in id_lis:
30
+ id_idx = idx_dic[args["last_edited_id"]]
31
+
32
+ # Dict for replacement
33
+ replace_dic = {}
34
+ if osp.exists(replace_dict_path):
35
+ with open(replace_dict_path, "r", encoding="utf-8") as f:
36
+ for line in f:
37
+ if not line:continue
38
+ item = line.split(" ")
39
+ item[1] = item[1].replace("\n","")
40
+ replace_dic[item[0]]=item[1]
41
+ f.close()
42
+
43
+ # Dict for name
44
+ name_dic = {}
45
+ if osp.exists(name_dict_path):
46
+ with open(name_dict_path, "r", encoding="utf-8") as f:
47
+ for line in f:
48
+ if not line:continue
49
+ item = line.split(" ")
50
+ item[1] = item[1].replace("\n","")
51
+ name_dic[item[0]]=item[1]
52
+
53
+ # Translate
54
+ def gpt_translate(text,text_id):
55
+ text = text.replace("\n"," ")
56
+ prompt = args["openai_api_settings"]["prompt_prefix"]+text+args["openai_api_settings"]["prompt_postfix"]
57
+ translation, if_succ = get_gpt_completion(prompt, api_key = args["openai_api_settings"]["openai_api_key"])
58
+ if dic[text_id]["text"].replace("\n"," ") == text and if_succ:
59
+ dic[text_id]["gpt3"] = translation
60
+ return translation
61
+
62
+ def baidu_translate(text,text_id):
63
+ text = text.replace("\n"," ")
64
+ translation = get_baidu_completion(text,
65
+ api_id = args["baidu_api_settings"]["api_id"],
66
+ api_key = args["baidu_api_settings"]["api_key"],
67
+ from_lang=args["baidu_api_settings"]["from_lang"],
68
+ to_lang=args["baidu_api_settings"]["to_lang"],)
69
+ if dic[text_id]["text"].replace("\n"," ") == text:
70
+ dic[text_id]["baidu"] = translation
71
+ return translation
72
+
73
+ def batch_translate(radio, check, text_start_id,text_end_id,progress=gr.Progress()):
74
+ progress(0, desc="Starting...")
75
+ if text_start_id not in id_lis or text_end_id not in id_lis or idx_dic[text_start_id] > idx_dic[text_end_id]:
76
+ gr.Warning("找不到指定序号, 或id前后顺序错误")
77
+ return
78
+ start = idx_dic[text_start_id]
79
+ end = idx_dic[text_end_id] + 1
80
+ lis = id_lis[start:end]
81
+ if radio == "Gpt3":
82
+ for key in progress.tqdm(lis):
83
+ gpt_translate(dic[key]['text'],key)
84
+ time.sleep(0.1)
85
+ if radio == 'Baidu':
86
+ for key in progress.tqdm(lis):
87
+ baidu_translate(dic[key]['text'],key)
88
+ time.sleep(0.1)
89
+ if check:
90
+ save_json(show_info=False)
91
+ gr.Info(f"批量机翻成功, 共完成{end-start}句翻译")
92
+ return f"已完成{end-start}句翻译"
93
+
94
+ # Other actions
95
+ def last_text():
96
+ global id_idx
97
+ if id_idx > 0:
98
+ id_idx -= 1
99
+ return id_lis[id_idx]
100
+
101
+ def next_text():
102
+ global id_idx
103
+ if id_idx < len(id_lis)-1:
104
+ id_idx += 1
105
+ return id_lis[id_idx]
106
+
107
+ def replace(text_gpt,text_baidu,text_final,text_id, check_file = True):
108
+ if not text_id:
109
+ text_id = id_lis[id_idx]
110
+ if check_file:
111
+ if osp.exists(replace_dict_path):
112
+ with open(replace_dict_path, "r", encoding="utf-8") as f:
113
+ for line in f:
114
+ item = line.split(" ")
115
+ item[1] = item[1].replace("\n","")
116
+ replace_dic[item[0]]=item[1]
117
+ f.close()
118
+ for key,value in replace_dic.items():
119
+ text_gpt = text_gpt.replace(key, value)
120
+ text_baidu = text_baidu.replace(key, value)
121
+ text_final = text_final.replace(key, value)
122
+ dic[text_id]["gpt3"] = text_gpt
123
+ dic[text_id]["baidu"] = text_baidu
124
+ dic[text_id]["text_CN"] = text_final
125
+ return text_gpt,text_baidu,text_final
126
+
127
+ def change_id(text_id):
128
+ if not text_id or text_id not in idx_dic: return args["file_path"],"","","","","",""
129
+ global id_idx
130
+ id_idx = idx_dic[text_id]
131
+ if "gpt3" not in dic[text_id]:
132
+ dic[text_id]["gpt3"] = ""
133
+ if "baidu" not in dic[text_id]:
134
+ dic[text_id]["baidu"] = ""
135
+ if "text_CN" not in dic[text_id]:
136
+ dic[text_id]["text_CN"] = ""
137
+ if dic[text_id]["name"] not in name_dic:
138
+ name_dic[dic[text_id]["name"]] = dic[text_id]["name"]
139
+ dic[text_id]["name_CN"] = name_dic[dic[text_id]["name"]]
140
+ replace(dic[text_id]["gpt3"],dic[text_id]["baidu"],dic[text_id]["text_CN"],text_id,False)
141
+ if if_save_id_immediately:
142
+ args["last_edited_id"] = text_id
143
+ save_config(args,config_path)
144
+ return args["file_path"],dic[text_id]["text"],dic[text_id]["name"],name_dic[dic[text_id]["name"]],\
145
+ dic[text_id]["gpt3"],dic[text_id]["baidu"],dic[text_id]["text_CN"]
146
+
147
+ def change_final(text,text_id):
148
+ if not text_id or not text_id in idx_dic: return
149
+ if text != dic[text_id]["text_CN"]:
150
+ dic[text_id]["text_CN"] = text
151
+ altered_text_finals.add(text_id)
152
+ return
153
+
154
+ def change_name(name,name_cn,text_id):
155
+ if not text_id or not text_id in idx_dic: return
156
+ name_dic[name] = name_cn
157
+ dic[text_id]["name_CN"] = name_cn
158
+ return
159
+
160
+ def save_json(show_info = True):
161
+ global altered_text_finals
162
+ with open(abs_path, "w", encoding ="utf8") as json_file:
163
+ json.dump(dic,json_file,indent = 1,ensure_ascii = False)
164
+ if osp.exists(name_dict_path):
165
+ with open(name_dict_path,"w",encoding = "utf-8") as f:
166
+ for key,value in name_dic.items():
167
+ f.write(f"{key} {value}\n")
168
+ if show_info:
169
+ gr.Info(f"JSON保存成功, 共更新{len(altered_text_finals)}句译文")
170
+ altered_text_finals = set()
171
+
172
+ def save_last_position(text_id):
173
+ args["last_edited_id"] = text_id
174
+ save_config(args,config_path)
175
+ return
176
+
177
+ def load_last_position(text_path):
178
+ global id_idx,id_lis,idx_dic,path,dic
179
+ if not osp.exists(smart_path(text_path)):
180
+ raise gr.Error("文件不存在")
181
+ if path != text_path:
182
+ path = text_path
183
+ with open(smart_path(text_path), "r", encoding ="utf8") as json_file:
184
+ dic = json.load(json_file)
185
+ id_lis = list(dic.keys())
186
+ idx_dic = dict()
187
+ for idx,id_ in enumerate(id_lis):
188
+ idx_dic[id_] = idx
189
+ id_idx = 0
190
+ args["file_path"] = path
191
+ save_config(args,config_path)
192
+ return args["last_edited_id"]
193
+
194
+ def submit_api(baidu_api_id, baidu_api_key, from_lang, to_lang, openai_api_key,prefix,postfix,target_id):
195
+ global args
196
+ if baidu_api_id != "":
197
+ args["baidu_api_settings"]["api_id"] = baidu_api_id
198
+ if baidu_api_key != "":
199
+ args["baidu_api_settings"]["api_key"] = baidu_api_key
200
+ if from_lang != "":
201
+ args["baidu_api_settings"]["from_lang"] = from_lang
202
+ if to_lang != "":
203
+ args["baidu_api_settings"]["to_lang"] = to_lang
204
+ if openai_api_key != "":
205
+ args["openai_api_settings"]["openai_api_key"] = openai_api_key
206
+ args["openai_api_settings"]["prompt_prefix"] = prefix
207
+ args["openai_api_settings"]["prompt_postfix"] = postfix
208
+ args["target_id"] = target_id
209
+ save_config(args,config_path)
210
+ return
211
+
212
+ def refresh_context(refresh_id,length,context_type):
213
+ if not refresh_id or not refresh_id in idx_dic: return [],id_lis[id_idx]
214
+ length = int(length)
215
+ idx = idx_dic[refresh_id]
216
+ if context_type == "上下文":
217
+ ids = id_lis[max(idx-length, 0):idx+length+1]
218
+ elif context_type == "上文":
219
+ ids = id_lis[max(idx-length, 0):idx+1]
220
+ elif context_type == "下文":
221
+ ids = id_lis[idx:idx+length+1]
222
+ data = []
223
+ for i in ids:
224
+ if dic[i]["name"] not in name_dic:
225
+ name_dic[dic[i]["name"]] = dic[i]["name"]
226
+ dic[i]["name_CN"] = name_dic[dic[i]["name"]]
227
+ if 'text_CN' not in dic[i]:
228
+ dic[i]['text_CN'] = ""
229
+ row = [i, dic[i]['name'],dic[i]['name_CN'], dic[i]['text'],dic[i]['text_CN']]
230
+ if i == id_lis[idx]: row[0] = f"**{i}**"
231
+ if i in altered_text_finals:
232
+ row[4] = f"*{row[4]}"
233
+ data.append(row)
234
+ return data,id_lis[id_idx]
235
+
236
+ def save_context(data, refresh_id, if_save = False):
237
+ altered = 0
238
+ for i in range(len(data)):
239
+ text_id = data['id'][i]
240
+ text_cn = data['text_CN'][i]
241
+ text_id = text_id.replace("*","")
242
+ if text_id in altered_text_finals and text_cn and text_cn[0] == "*":
243
+ text_cn = text_cn[1:]
244
+ if dic[text_id]['text_CN'] != text_cn:
245
+ altered += 1
246
+ altered_text_finals.add(text_id)
247
+ dic[text_id]['text_CN'] = text_cn
248
+ gr.Info(f"已修改{altered}条译文")
249
+ if if_save:
250
+ save_json()
251
+ return
252
+
253
+ # Derive text
254
+ def derive_text(radio_type, text_start_id, text_end_id,text_seperator_long,text_seperator_short, output_txt_path):
255
+ output_txt_path = smart_path(output_txt_path)
256
+ if output_txt_path[-4:] != ".txt":
257
+ gr.Warning("输出路径错误")
258
+ return
259
+ if text_start_id not in id_lis or text_end_id not in id_lis or idx_dic[text_start_id] > idx_dic[text_end_id]:
260
+ gr.Warning("找不到指定序号, 或id前后顺序错误")
261
+ return
262
+ start = idx_dic[text_start_id]
263
+ end = idx_dic[text_end_id] + 1
264
+ lis = id_lis[start:end]
265
+ if radio_type == "双语|人名文本":
266
+ with open(output_txt_path,"w",encoding="utf-8") as f:
267
+ for key in lis:
268
+ # if key[-3:] == "001":
269
+ # f.write("【"+key[-4]+"】\n")
270
+ f.write(text_seperator_long+"\n")
271
+ f.write(dic[key]["name"]+"\n")
272
+ f.write("\n")
273
+ f.write(dic[key]["text"]+"\n")
274
+ f.write("\n")
275
+ f.write(text_seperator_short+"\n")
276
+ f.write(dic[key]["name_CN"]+"\n\n")
277
+ f.write(dic[key]["text_CN"]+"\n")
278
+ f.write("\n")
279
+ return
280
+ if radio_type == "中文|人名文本":
281
+ with open(output_txt_path,"w",encoding="utf-8") as f:
282
+ for key in lis:
283
+ # if key[-3:] == "001":
284
+ # f.write("【"+key[-4]+"】\n")
285
+ f.write(text_seperator_long+"\n")
286
+ f.write(dic[key]["name_CN"]+"\n\n")
287
+ f.write(dic[key]["text_CN"]+"\n")
288
+ f.write("\n")
289
+ return
290
+ if radio_type == "中文|单次人名文本":
291
+ with open(output_txt_path,"w",encoding="utf-8") as f:
292
+ name_lis = []
293
+ for key in lis:
294
+ name = dic[key]["name_CN"]
295
+ if name not in name_lis:
296
+ name_lis.append(name)
297
+ f.write(name + ": "+ dic[key]["text_CN"]+"\n")
298
+ else:
299
+ f.write(dic[key]["text_CN"]+"\n")
300
+ f.write("\n")
301
+ if radio_type == "中文|纯文本":
302
+ with open(output_txt_path,"w",encoding="utf-8") as f:
303
+ for key in lis:
304
+ f.write(dic[key]["text_CN"]+"\n")
305
+ f.write("\n")
306
+ gr.Info(f"Txt导出成功, 共导出{len(lis)}条记录")
307
+
308
+ def get_remaining_text_num():
309
+ if args["target_id"] in id_lis:
310
+ target_idx= idx_dic[args["target_id"]]
311
+ rem = target_idx - id_idx
312
+ label = f"目标剩余{rem}条"
313
+ else:
314
+ label = "目标剩余???条"
315
+ return label
316
+
317
+ def merge_json(merged_path,file_merging_json,text_start_id,text_end_id,type):
318
+ merged_path = smart_path(merged_path)
319
+ if not osp.exists(merged_path):
320
+ gr.Warning("路径不存在")
321
+ return
322
+ with open(merged_path, "r", encoding ="utf8") as json_file:
323
+ dic_merge = json.load(json_file)
324
+ id_lis_merge = list(dic_merge.keys())
325
+ idx_dic_merge = dict()
326
+ for idx,id_ in enumerate(id_lis_merge):
327
+ idx_dic_merge[id_] = idx
328
+ if text_start_id not in id_lis_merge or text_end_id not in id_lis_merge or idx_dic_merge[text_start_id] > idx_dic_merge[text_end_id]:
329
+ gr.Warning("找不到指定序号, 或id前后顺序错误")
330
+ return
331
+ path = file_merging_json.name
332
+ with open(path, "r", encoding ="utf8") as json_file:
333
+ dic_new = json.load(json_file)
334
+ for idx in range(idx_dic_merge[text_start_id],idx_dic_merge[text_end_id] + 1):
335
+ if type == "仅人工翻译":
336
+ dic_merge[id_lis_merge[idx]]['text_CN'] = dic_new[id_lis_merge[idx]]['text_CN']
337
+ else:
338
+ dic_merge[id_lis_merge[idx]] = dic_new[id_lis_merge[idx]]
339
+ with open(merged_path, "w", encoding ="utf8") as json_file:
340
+ json.dump(dic_merge,json_file,indent = 1,ensure_ascii = False)
341
+ gr.Info(f"合并成功,共更新{idx_dic_merge[text_end_id] - idx_dic_merge[text_start_id] + 1}条译文")
342
+ return
343
+
344
+ def output_json(merged_path,text_start_id,text_end_id):
345
+ merged_path = smart_path(merged_path)
346
+ if not osp.exists(merged_path):
347
+ gr.Warning("路径不存在")
348
+ return
349
+ with open(merged_path, "r", encoding ="utf8") as json_file:
350
+ dic_merge = json.load(json_file)
351
+ id_lis_merge = list(dic_merge.keys())
352
+ idx_dic_merge = dict()
353
+ for idx,id_ in enumerate(id_lis_merge):
354
+ idx_dic_merge[id_] = idx
355
+ if text_start_id not in id_lis_merge or text_end_id not in id_lis_merge or idx_dic_merge[text_start_id] > idx_dic_merge[text_end_id]:
356
+ gr.Warning("找不到指定序号, 或id前后顺序错误")
357
+ return
358
+ dic_new = {}
359
+ for idx in range(idx_dic_merge[text_start_id],idx_dic_merge[text_end_id] + 1):
360
+ dic_new[id_lis_merge[idx]] = dic_merge[id_lis_merge[idx]]
361
+ name = "small_" + osp.basename(path)
362
+ new_path = osp.join(osp.dirname(merged_path), name)
363
+ with open(new_path, "w", encoding ="utf8") as json_file:
364
+ json.dump(dic_new,json_file,indent = 1,ensure_ascii = False)
365
+ return new_path
366
+
367
+ shortcut_js = """
368
+ <script>
369
+ function shortcuts(e) {
370
+
371
+ if (e.key.toLowerCase() == "s" && e.shiftKey) {
372
+ document.getElementById("button_save").click();
373
+ }
374
+ if (e.key.toLowerCase() == "w" && e.shiftKey) {
375
+ document.getElementById("button_up").click();
376
+ }
377
+ if (e.key.toLowerCase() == "x" && e.shiftKey) {
378
+ document.getElementById("button_down").click();
379
+ }
380
+ if (e.key.toLowerCase() == "r" && e.shiftKey) {
381
+ document.getElementById("button_replace").click();
382
+ }
383
+ if (e.key.toLowerCase() == "g" && e.shiftKey) {
384
+ document.getElementById("button_translate_gpt").click();
385
+ }
386
+ if (e.key.toLowerCase() == "b" && e.shiftKey) {
387
+ document.getElementById("button_translate_baidu").click();
388
+ }
389
+
390
+ }
391
+ document.addEventListener('keyup', shortcuts, false);
392
+ </script>
393
+ """
394
+
395
+ with gr.Blocks(theme=Theme1(),head=shortcut_js) as demo:
396
+ gr.Markdown("# <center>EasyTranslator v1.0.6</center> ",visible=True)
397
+ # 文本编辑页
398
+ with gr.Tab("文本编辑"):
399
+ gr.Markdown("## 文本编辑及保存区")
400
+ with gr.Row():
401
+ text_file_path = gr.Textbox(label = "File Path", value = args["file_path"])
402
+ text_id = gr.Textbox(label = "Text id",show_copy_button=True)
403
+ button_load_pos = gr.Button("LOAD last edited position")
404
+ if not if_save_id_immediately:
405
+ button_save_pos = gr.Button("SAVE last edited position")
406
+ with gr.Row():
407
+ if not moyu_mode:
408
+ # 全屏mode
409
+ with gr.Column():
410
+ text_name = gr.Textbox(label = "Name")
411
+ text_text = gr.Textbox(label = "Text", lines=10,show_copy_button=True)
412
+ button_save = gr.Button("SAVE FILE",scale= 2,elem_id = "button_save")
413
+ with gr.Column():
414
+ text_name_cn = gr.Textbox(label = "Name_CN")
415
+ with gr.Row():
416
+ text_gpt = gr.Textbox(label = "GPT", lines=3,show_copy_button=True,interactive = True)
417
+ button_translate_gpt = gr.Button("Translate(GPT)",elem_id = "button_translate_gpt")
418
+ with gr.Row():
419
+ text_baidu = gr.Textbox(label = "Baidu", lines=3,show_copy_button=True,interactive = True)
420
+ button_translate_baidu = gr.Button("Translate(Baidu)",elem_id = "button_translate_baidu")
421
+ text_final = gr.Textbox(label = "Text_CN", lines=3,show_copy_button=True,interactive = True)
422
+ with gr.Row():
423
+ button_up = gr.Button("↑",elem_id = "button_up")
424
+ button_down = gr.Button("↓",elem_id = "button_down")
425
+ button_replace = gr.Button("Replace",elem_id = "button_replace")
426
+ else:
427
+ # 摸鱼mode
428
+ with gr.Column():
429
+ button_save = gr.Button("SAVE FILE",scale= 2)
430
+ text_name = gr.Textbox(label = "Name")
431
+ text_name_cn = gr.Textbox(label = "Name_CN")
432
+ with gr.Column():
433
+ with gr.Row():
434
+ text_gpt = gr.Textbox(label = "GPT", lines=3,show_copy_button=True,interactive = True)
435
+ button_translate_gpt = gr.Button("Translate(GPT)")
436
+ with gr.Row():
437
+ text_baidu = gr.Textbox(label = "Baidu", lines=3,show_copy_button=True,interactive = True)
438
+ button_translate_baidu = gr.Button("Translate(Baidu)")
439
+ text_text = gr.Textbox(label = "Text", lines=3,show_copy_button=True)
440
+ text_final = gr.Textbox(label = "Text_CN", lines=3,show_copy_button=True,interactive = True)
441
+ with gr.Row():
442
+ button_up = gr.Button("↑")
443
+ button_down = gr.Button("↓")
444
+ button_replace = gr.Button("Replace")
445
+ label_remaining_text = gr.Label(label="进度",value = "目标剩余???条")
446
+ gr.Markdown("## 批量机翻区")
447
+ with gr.Row():
448
+ text_translate_start_id = gr.Textbox(label = "起始句id")
449
+ text_translate_end_id = gr.Textbox(label = "结束句id")
450
+ with gr.Row():
451
+ radio_translator = gr.Radio(choices = ["Baidu","Gpt3"],label = "接口")
452
+ label_progress = gr.Label(label = "进度条",value="")
453
+ checkbox_if_save_translation = gr.Checkbox(value= False, label = "翻译完成后直接保存JSON")
454
+ button_batch_translate = gr.Button("批量翻译")
455
+
456
+ tab_context = gr.Tab("文本预览及导出")
457
+ with tab_context:
458
+ gr.Markdown("## 上下文预览区")
459
+ with gr.Row():
460
+ with gr.Column():
461
+ with gr.Row():
462
+ text_refresh_id = gr.Textbox(label = "编号", value = args["last_edited_id"])
463
+ text_context_length = gr.Textbox(label = "上下文长度", value = args["context_half_length"])
464
+ radio_context_type = gr.Radio(choices = ["上下文","上文", "下文"], label = "预览模式",value="下文")
465
+ with gr.Column():
466
+ with gr.Row():
467
+ button_refresh = gr.Button("Refresh")
468
+ button_save_context = gr.Button("Save Changes")
469
+ checkbox_if_save_context = gr.Checkbox(value= False, label = "修改直接保存JSON")
470
+ dataframe_context = gr.DataFrame(headers=['id','name','name_CN','text','text_CN'],
471
+ interactive=True)
472
+ gr.Markdown("## 文档导出区")
473
+ radio_type = gr.Radio(choices = ["中文|纯文本","中文|单次人名文本", "中文|人名文本", "双语|人名文本"],label = "导出类型")
474
+ with gr.Row():
475
+ text_derive_start_id = gr.Textbox(label = "起始句id")
476
+ text_derive_end_id = gr.Textbox(label = "结束句id")
477
+ with gr.Row():
478
+ text_seperator_long = gr.Textbox(label = "句间分隔符(长)", value = args["seperator_long"])
479
+ text_seperator_short = gr.Textbox(label = "双语间分隔符(短)", value = args["seperator_short"])
480
+ text_output_path = gr.Textbox(label = "输出文件路径", value = args["output_txt_path"])
481
+ button_derive_text = gr.Button("导出文本")
482
+
483
+ # 文件转换页
484
+ with gr.Tab("文件转换"):
485
+ gr.Markdown("## CSV to JSON(支持批量上传)")
486
+ gr.Markdown("准备好台词csv文件(至少包含正序排列的台词)并将台词列命名为text,如自带角色名则将此列命名为name,如自带id则将此列命名为id。\
487
+ 在此处上传csv文件,保存生成的json文件,之后在主界面输入json文件路径即可使用。")
488
+ with gr.Row():
489
+ with gr.Column():
490
+
491
+ file_target_csv = gr.File(file_types=["csv"],file_count = "multiple", label="Input CSV")
492
+ with gr.Row():
493
+ text_text_column = gr.Textbox(label="text列名",value = args["csv_column_name"]["text"])
494
+ text_name_column = gr.Textbox(label="name列名",value = args["csv_column_name"]["name"])
495
+ text_id_column = gr.Textbox(label="id列名(optional)",value = args["csv_column_name"]["id"],placeholder = "若不指定或找不到指定列,程序会自动编号")
496
+ button_convert2json = gr.Button("Convert")
497
+ file_result_json = gr.File(file_types=["json"],label="Output JSON",interactive=False)
498
+ gr.Markdown("## JSON to CSV(支持批量上传)")
499
+ with gr.Row():
500
+ with gr.Column():
501
+ file_target_json = gr.File(file_types=["json"],file_count = "multiple",label="Input JSON")
502
+ button_convert2csv = gr.Button("Convert")
503
+ file_result_csv = gr.File(file_types=["jcsv"],label="Output CSV",interactive=False)
504
+ # 文件合并页
505
+ with gr.Tab("文件合并"):
506
+ gr.Markdown("## 合并JSON文件")
507
+ gr.Markdown("将两个json文件中的译文合并,方便多人协作。使用方法为上传部分翻译后的json文件,指定起止id。\
508
+ 程序会用【上传文件】中,从起始句id到结束句id的全部内容,覆盖【指定地址】中的json文件从起始句id到结束句id的全部内容。\
509
+ 若起止id顺序颠倒或不存在,按钮不会作用。请仔细检查并做好备份!!")
510
+ with gr.Column():
511
+
512
+ text_merged_path = gr.Textbox(label = "File Path", value = args["file_path"])
513
+ file_merging_json = gr.File(file_types=["json"],file_count = "single", label="File to be merged")
514
+ with gr.Row():
515
+ text_merge_start_id = gr.Textbox(label="起始句id",value = "")
516
+ text_merge_end_id = gr.Textbox(label="结束句id",value = "")
517
+ radio_merge_type = gr.Radio(choices = ["仅人工翻译","全部替换"], label = "合并模式",value="仅人工翻译")
518
+
519
+ button_merge = gr.Button("Merge")
520
+
521
+ # button_output_json = gr.Button("Merge")
522
+ gr.Markdown("## 导出JSON文件")
523
+ gr.Markdown("支持导出起止id范围的小型json文件,以减少协作时的传输负担。使用上面File Path的指定地址。")
524
+ with gr.Row():
525
+ text_output_start_id = gr.Textbox(label="起始句id",value = "")
526
+ text_output_end_id = gr.Textbox(label="结束句id",value = "")
527
+ button_output = gr.Button("Output")
528
+ file_output_json = gr.File(file_types=["json"],label="Output JSON",interactive=False)
529
+
530
+
531
+
532
+ # API设置页
533
+ with gr.Tab("API Settings"):
534
+ gr.Markdown("## 百度 API")
535
+ text_baidu_api_id = gr.Textbox(label="Baidu API Id",value = args["baidu_api_settings"]["api_id"])
536
+ text_baidu_api_key = gr.Textbox(label="Baidu API Key", value = args["baidu_api_settings"]["api_key"])
537
+ with gr.Row():
538
+ text_from_lang = gr.Textbox(label="From Lang",value = args["baidu_api_settings"]["from_lang"])
539
+ text_to_lang = gr.Textbox(label="To Lang",value = args["baidu_api_settings"]["to_lang"])
540
+ gr.Markdown("## OPENAI API")
541
+ text_openai_api = gr.Textbox(label="OPENAI API Key",value = args["openai_api_settings"]["openai_api_key"])
542
+ with gr.Row():
543
+ text_prefix = gr.Textbox(label="Prompt Prefix",value = args["openai_api_settings"]["prompt_prefix"])
544
+ text_postfix = gr.Textbox(label="Prompt Postfix",value = args["openai_api_settings"]["prompt_postfix"])
545
+ gr.Markdown("## 目标id")
546
+ text_target_id = gr.Textbox(label="Target Id",value = args["target_id"])
547
+ button_api_submit = gr.Button("Submit")
548
+
549
+
550
+ # 标签页行为
551
+ tab_context.select(refresh_context, inputs=[text_id,text_context_length,radio_context_type],outputs=[dataframe_context,text_refresh_id])
552
+
553
+ # 文本框行为
554
+ text_id.change(change_id, inputs = [text_id],
555
+ outputs = [text_file_path,text_text,text_name,text_name_cn,text_gpt,text_baidu,text_final])
556
+ text_id.change(get_remaining_text_num,inputs = None, outputs= [label_remaining_text])
557
+ text_final.change(change_final,inputs = [text_final,text_id])
558
+ text_name_cn.change(change_name,inputs = [text_name,text_name_cn,text_id])
559
+
560
+ # 按钮行为
561
+ # -文本编辑页
562
+ button_load_pos.click(load_last_position,inputs=text_file_path, outputs = text_id)
563
+ if not if_save_id_immediately:
564
+ button_save_pos.click(save_last_position, inputs = [text_id])
565
+ button_up.click(last_text, outputs = text_id)
566
+ button_down.click(next_text, outputs = text_id)
567
+ button_translate_gpt.click(gpt_translate,
568
+ inputs=[text_text,text_id], outputs=text_gpt)
569
+ button_translate_baidu.click(baidu_translate,
570
+ inputs=[text_text,text_id], outputs=text_baidu)
571
+ button_replace.click(replace,
572
+ inputs = [text_gpt,text_baidu,text_final,text_id],
573
+ outputs=[text_gpt,text_baidu,text_final])
574
+ button_save.click(save_json)
575
+
576
+ button_batch_translate.click(batch_translate, inputs = [radio_translator,checkbox_if_save_translation,text_translate_start_id,text_translate_end_id],
577
+ outputs = [label_progress])
578
+
579
+ # -预览及导出页
580
+ # button_refresh.click(save_context, inputs=[dataframe_context, text_refresh_id, checkbox_if_save_context])
581
+ button_refresh.click(refresh_context,inputs=[text_refresh_id,text_context_length,radio_context_type], outputs = [dataframe_context,text_id])
582
+ button_save_context.click(save_context, inputs=[dataframe_context, text_refresh_id, checkbox_if_save_context])
583
+ button_derive_text.click(derive_text,
584
+ inputs = [radio_type, text_derive_start_id, text_derive_end_id,
585
+ text_seperator_long,text_seperator_short,text_output_path])
586
+
587
+ # -文件转换页
588
+ button_convert2json.click(convert_to_json,
589
+ inputs = [file_target_csv, text_text_column, text_name_column, text_id_column],
590
+ outputs = file_result_json)
591
+ button_convert2csv.click(convert_to_csv,
592
+ inputs = file_target_json,
593
+ outputs = file_result_csv)
594
+
595
+ # -文件合并页
596
+ button_merge.click(merge_json, inputs=[text_merged_path,file_merging_json,text_merge_start_id,text_merge_end_id,radio_merge_type])
597
+ button_output.click(output_json, inputs=[text_merged_path,text_output_start_id,text_output_end_id],outputs=file_output_json)
598
+
599
+ # -API管理页
600
+ button_api_submit.click(submit_api,
601
+ inputs = [text_baidu_api_id,text_baidu_api_key,text_from_lang,text_to_lang,
602
+ text_openai_api,text_prefix,text_postfix,text_target_id])
603
+
604
+ demo.queue()
605
+
606
+ if __name__=="__main__":
607
+ demo.launch(show_error=True)
README.md CHANGED
@@ -10,4 +10,125 @@ pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  license: apache-2.0
11
  ---
12
 
13
+ # EasyTranslator v1.0.6
14
+ 基于gradio的汉化辅助工具
15
+ ## v1.0.6更新内容
16
+ 1. 更新文件合并功能,方便多人协作。在文件合并页中可将依照指示将两个json文件合并,同步人工翻译进度。并支持导出小规模json文件方便传输。
17
+
18
+ ## v1.0.5更新内容
19
+ 1. 支持键盘快捷键<br>
20
+ shift+w: ↑<br>
21
+ shift+x: ↓<br>
22
+ shift+s: save json<br>
23
+ shift+r: replace<br>
24
+ shift+g: gpt translate<br>
25
+ shift+b: baidu translate<br>
26
+
27
+ ## v1.0.4更新内容
28
+ 1. 追加摸鱼模式, 将必要组件集中在半个屏幕内。在`config.json`中`moyu_mode`设为1开启, 设为0关闭
29
+ 2. 加入对GPT翻译的超时检测, 时间上限在`config.json`的`openai_api_settings`中的`time_limit`处设置, 单位为秒。若请求超时, 会打印超时提示, 但不会报错)
30
+ 3. GPT翻译现在将不返回重复结果
31
+
32
+ ## v1.0.3更新内容
33
+ 1. 支持预览页直接修改译文, 建议保存JSON后再使用此功能
34
+ 2. 可选是否即时更新上次编辑id
35
+
36
+ `config.json`中设置`"if_save_id_immediately"`参数, 若为1则逻辑与之前一样, 在切换id时立刻保存进`config.json`;若为0则会显示保存编辑id按钮`SAVE last edited position`, 在点击后存入`config.json`。
37
+
38
+ ## v1.0.2更新内容
39
+ 1. 支持批量机翻
40
+
41
+ ## v1.0.1更新内容
42
+ 1. 优化文件读取逻辑
43
+ 2. 增加错误提示、警告等。保存JSON成功时会提示更新的译文条数
44
+ 3. 允许自定义传输到gpt的prompt、自定义百度翻译的原文及目标语言
45
+ 4. 追加上下文预览功能, 并允许自定义预览条数和编号。指定id将会以双星号标记, 修改过的译文将会在前面加星号标记
46
+ 5. 优化按钮手感
47
+
48
+ ## 特性
49
+ 1. 一键机翻接口, 提供复制到剪贴板按钮
50
+ 2. 便捷的上下句切换, 直接跳转功能
51
+ 3. 记忆上次编辑位置功能
52
+ 4. 人名翻译记忆功能, 一次修改将会同步到全体。人名词典在程序启动时读取并在保存JSON文件时保存。开启程序时可以直接改`name_cn`, 关闭程序后可以修改人名词典。下次开启程序时人名词典中的内容将会覆盖JSON文件中的`name_cn`。
53
+ 5. 文本翻译记忆功能, 机翻/修改后只要不关闭程序, 切换上下句, 刷新 网页都不会影响
54
+ 6. 译文缓存。相对地原文不会缓存, 所以手滑改或删掉只要切换或者刷新即可恢复。因此想查看原文具体某个词的翻译也可以直接编辑原文再机翻, 不会影响原文本。
55
+ 7. 一键替换功能, 用于专有名词错译的情况。会将机翻及手翻文本中的对象全部替换。替换词典可以在运行中直接更改, 不用重开程序。
56
+ 8. 便利的api key管理及prompt修改等
57
+ 9. 提供JSON文件与CSV文件互转
58
+ 10. 上下文预览功能
59
+ <br><br>
60
+
61
+ ## 使用
62
+ 至少需要安装python3(作者使用的版本是3.10, 其它版本尚未测试)
63
+ ***
64
+ ### Install
65
+ ```
66
+ git clone https://github.com/alienet1109/EasyTranslator.git
67
+ ```
68
+ 不想安git可以直接下载压缩包
69
+ ***
70
+ ### Preparation
71
+ #### 1. 安装依赖
72
+ ```
73
+ pip install -r requirements.txt
74
+ ```
75
+ #### 2. 文本准备
76
+ 需要使用者自行准备原文本json文件, 或使用本程序将原文本csv文件转换为json文件 \
77
+ csv文件格式要求为:
78
+ * 至少包含人名列、文本列, 按顺序排列的表格
79
+
80
+ 只有文本没有人名也可以使用, 在csv里新建空列'name'即可。\
81
+ 若不指定id列名, 程序会自动生成id。 \
82
+ 可以指定人名和文本的列名, 将会分别以'name'、'text'为键输入json文件;其它列将会以原列名为键输入, 以防数据丢失。\
83
+ 生成json文件后, 下载, 然后输入其路径(不一定要与代码同一文件夹)即可使用。
84
+
85
+ json文件格式要求为:
86
+ * 由key为id, value为{'name':'原文人名','text':'原文文本'}的键值对组成, 按文本顺序正序排序的字典。
87
+
88
+ 运行途中会频繁修改json文件, 所以最好做好备份。\
89
+ 可以随时在页面中修改json文件路径, 修改前务必保存, 修改后请按Load按钮以同步更新否则不知道会有什么bug。\
90
+ 上次编辑文本编号将会重置, 路径与编号将直接更新至config文件。
91
+
92
+ #### 3. 修改配置文件`config.json`
93
+ * 必须:
94
+ 1. 设置文本文件`file_path`及人名词典`name_dict_path`的路径(推荐使用绝对路径)。之后结果会直接保存至对应路径。
95
+
96
+ * 可选:
97
+ 1. 设置替换词典`replace_dict_path`路径, 如不使用此功能则不需要;
98
+ 2. 可设置api key和分隔符等, 也可以直接在程序更改。程序中的修改会改变预设api key, 但不会改变预设的分隔符。
99
+ ***
100
+ ### Run
101
+ 直接点开`EasyTranslator.py`或在文件夹下执行命令:
102
+ ```
103
+ python EasyTranslator.py
104
+ ```
105
+ 然后在网页中打开程序给出的网址(eg: http://127.0.0.1:7860 )
106
+ <br><br>
107
+
108
+ ## 演示
109
+ 摸鱼模式 \
110
+ ![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/moyu_mode.png) \
111
+ 批量翻译 \
112
+ ![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/batch_translate.gif) \
113
+ 上下文预览\
114
+ ![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/context_preview.gif)
115
+
116
+ ![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/id%20search.gif)
117
+
118
+ ![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/name.gif)
119
+
120
+ ![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/last%26next%20text.gif)
121
+
122
+ ![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/replace.gif)
123
+
124
+ ![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/api%20key%20setting.gif)
125
+
126
+ ![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/derive%20text.gif)
127
+
128
+ ![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/part_translate.gif)
129
+
130
+ ## 计划追加功能
131
+ 1. 可选主题
132
+ 2. 追加翻译接口
133
+ 3. 追加文本输出格式
134
+ 4. 发生修改时直接存入小规模临时文件, 防止数据丢失
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moyu_mode": "0",
3
+ "if_save_id_immediately": "1",
4
+ "last_edited_id": "100001001",
5
+ "target_id": "100001005",
6
+ "file_path": "./example_text.json",
7
+ "context_half_length": "10",
8
+ "name_dict_path": "./example_name_dict.json",
9
+ "replace_dict_path": "./example_replace_dict.json",
10
+ "output_txt_path": "./output.txt",
11
+ "seperator_long": "===============================",
12
+ "seperator_short": "---------------------",
13
+ "csv_column_name": {
14
+ "id": "",
15
+ "text": "text",
16
+ "name": "name"
17
+ },
18
+ "baidu_api_settings": {
19
+ "api_id": "YOUR BAIDU API ID",
20
+ "api_key": "YOUR BAIDU API KEY",
21
+ "from_lang": "jp",
22
+ "to_lang": "zh"
23
+ },
24
+ "openai_api_settings": {
25
+ "openai_api_key": "YOUR OPENAI API KEY",
26
+ "prompt_prefix": "翻译为中文:",
27
+ "prompt_postfix": "",
28
+ "time_limit": "15"
29
+ }
30
+ }
example_name_dict.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {原文人名} {译文人名}
2
+ キム 金
example_replace_dict.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ {旧词} {新词}
example_text.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "100001001": {
3
+ "name": "高橋",
4
+ "text": "キムさん、こちらは山田さんです。山田さん、こちらはキムさんです。",
5
+ "name_CN": "高橋",
6
+ "gpt3": "",
7
+ "baidu": "",
8
+ "text_CN": ""
9
+ },
10
+ "100001002": {
11
+ "name": "山田",
12
+ "text": "山田です。はじめまして、どうぞよろしく。",
13
+ "name_CN": "山田"
14
+ },
15
+ "100001003": {
16
+ "name": "キム",
17
+ "text": "私はキムです。こちらこそ、どうぞよろしくお願いいたします。"
18
+ },
19
+ "100001004": {
20
+ "name": "山田",
21
+ "text": "キムさん、お仕事は。"
22
+ },
23
+ "100001005": {
24
+ "name": "キム",
25
+ "text": "学生です。"
26
+ }
27
+ }
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ openai>=1.0
themes.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import Iterable
3
+ import gradio as gr
4
+ from gradio.themes.base import Base
5
+ from gradio.themes.default import Default
6
+ from gradio.themes.utils import colors, fonts, sizes
7
+ import time
8
+
9
+
10
+ class Theme1(Base):
11
+ def __init__(
12
+ self,
13
+ *,
14
+ primary_hue: colors.Color | str = colors.emerald,
15
+ secondary_hue: colors.Color | str = colors.blue,
16
+ neutral_hue: colors.Color | str = colors.gray,
17
+ spacing_size: sizes.Size | str = sizes.spacing_md,
18
+ radius_size: sizes.Size | str = sizes.radius_md,
19
+ text_size: sizes.Size | str = sizes.text_lg,
20
+ font: fonts.Font
21
+ | str
22
+ | Iterable[fonts.Font | str] = (
23
+ fonts.GoogleFont("Quicksand"),
24
+ "ui-sans-serif",
25
+ "sans-serif",
26
+ ),
27
+ font_mono: fonts.Font
28
+ | str
29
+ | Iterable[fonts.Font | str] = (
30
+ fonts.GoogleFont("IBM Plex Mono"),
31
+ "ui-monospace",
32
+ "monospace",
33
+ ),
34
+ ):
35
+ super().__init__(
36
+ primary_hue=primary_hue,
37
+ secondary_hue=secondary_hue,
38
+ neutral_hue=neutral_hue,
39
+ spacing_size=spacing_size,
40
+ radius_size=radius_size,
41
+ text_size=text_size,
42
+ font=font,
43
+ font_mono=font_mono,
44
+
45
+ )
46
+ super().set(
47
+ input_background_fill="*neutral_100",
48
+ block_title_text_weight="600",
49
+ # button_shadow_active="*neutral_400 0px 0px 2px 2px",
50
+ # block_border_width="3px",
51
+ # button_large_padding="32px",
52
+ # button_secondary_background_fill_hover="*neutral_300",
53
+ )
54
+
utils.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import requests
3
+ import random
4
+ import json
5
+ from hashlib import md5
6
+ from os import path as osp
7
+ import csv
8
+ import threading
9
+
10
+ def load_config(filepath):
11
+ with open(filepath, "r", encoding="utf-8") as file:
12
+ args = json.load(file)
13
+ return args
14
+
15
+ def save_config(args,filepath):
16
+ with open(filepath, "w", encoding ="utf8") as json_file:
17
+ json.dump(args,json_file,indent = 1,ensure_ascii = False)
18
+ return
19
+
20
+ def smart_path(path):
21
+ file_dir = osp.dirname(osp.abspath(__file__))
22
+ if osp.isabs(path):
23
+ return path
24
+ else:
25
+ return osp.join(file_dir,path)
26
+ args = load_config(smart_path("./config.json"))
27
+
28
+ # Baidu preparation
29
+ endpoint = "http://api.fanyi.baidu.com"
30
+ path = "/api/trans/vip/translate"
31
+ url = endpoint + path
32
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
33
+ # Generate salt and sign
34
+ def make_md5(s, encoding="utf-8"):
35
+ return md5(s.encode(encoding)).hexdigest()
36
+
37
+ def get_baidu_completion(text,api_id,api_key,from_lang,to_lang):
38
+ salt = random.randint(32768, 65536)
39
+ sign = make_md5(api_id + text + str(salt) + api_key)
40
+ payload = {"appid": api_id, "q": text, "from": from_lang, "to": to_lang, "salt": salt, "sign": sign}
41
+ r = requests.post(url, params=payload, headers=headers)
42
+ result = r.json()
43
+ return result["trans_result"][0]["dst"]
44
+
45
+ # OPENAI preparation
46
+ openai_api_key = args["openai_api_settings"]["openai_api_key"]
47
+ time_limit = float(args["openai_api_settings"]["time_limit"])
48
+ client = openai.OpenAI(api_key = openai_api_key)
49
+ class GPTThread(threading.Thread):
50
+ def __init__(self, model, messages, temperature):
51
+ super().__init__()
52
+ self.model = model
53
+ self.messages = messages
54
+ self.temperature = temperature
55
+ self.result = ""
56
+ def terminate(self):
57
+ self._running = False
58
+ def run(self):
59
+ response = client.chat.completions.create(
60
+ model=self.model,
61
+ messages=self.messages,
62
+ temperature=self.temperature,
63
+ )
64
+ self.result = response.choices[0].message.content
65
+
66
+ def get_gpt_completion(prompt, model="gpt-3.5-turbo",api_key = openai_api_key):
67
+ messages = [{"role": "user", "content": prompt}]
68
+ temperature = random.uniform(0,1)
69
+ thread = GPTThread(model, messages,temperature)
70
+ thread.start()
71
+ thread.join(10)
72
+ if thread.is_alive():
73
+ thread.terminate()
74
+ print("请求超时")
75
+ return "TimeoutError", False
76
+ else:
77
+ return thread.result, True
78
+
79
+ def left_pad_zero(number, digit):
80
+ number_str = str(number)
81
+ padding_count = digit - len(number_str)
82
+ padded_number_str = "0" * padding_count + number_str
83
+ return padded_number_str
84
+
85
+ def generate_ids(num: int):
86
+ length = len(str(num))+1
87
+ ids = []
88
+ for i in range(num):
89
+ ids.append(left_pad_zero(i,length))
90
+ return ids
91
+
92
+ def convert_to_json(files, text_col, name_col, id_col):
93
+ out_files = []
94
+ for file_target in files:
95
+ dic = {}
96
+ path = file_target.name
97
+ dir = osp.dirname(path)
98
+ base_name = osp.basename(path)
99
+ new_name = base_name[:-4]+".json"
100
+ new_path = osp.join(dir,new_name)
101
+ with open(path,"r",encoding="utf-8") as f:
102
+ reader = csv.DictReader(f)
103
+ line_num = sum(1 for _ in open(path,"r",encoding="utf-8"))
104
+ fieldnames = reader.fieldnames
105
+ if id_col not in fieldnames:
106
+ ids = generate_ids(line_num)
107
+ i = 0
108
+ for row in reader:
109
+ dic[ids[i]]={"name":row[name_col],"text":row[text_col]}
110
+ for field in fieldnames:
111
+ if field not in (name_col,text_col):
112
+ dic[ids[i]][field] = row[field]
113
+ i += 1
114
+ else:
115
+ for row in reader:
116
+ dic[row[id_col]]={"name":row[name_col],"text":row[text_col]}
117
+ for field in fieldnames:
118
+ if field not in (name_col,text_col,id_col):
119
+ dic[row[id_col]][field] = row[field]
120
+
121
+ f.close()
122
+ with open(new_path, "w", encoding= "utf-8") as f2:
123
+ json.dump(dic,f2,indent=1,ensure_ascii=False)
124
+ out_files.append(new_path)
125
+ return out_files
126
+
127
+ def convert_to_csv(files):
128
+ out_files = []
129
+ for file_target in files:
130
+ path = file_target.name
131
+ dir = osp.dirname(path)
132
+ base_name = osp.basename(path)
133
+ new_name = base_name[:-4]+".csv"
134
+ new_path = osp.join(dir,new_name)
135
+ with open(path, "r", encoding= "utf-8") as f:
136
+ dic = json.load(f)
137
+ field_names = []
138
+ for value in dic.values():
139
+ for field in value.keys():
140
+ if field not in field_names: field_names.append(field)
141
+ for key in dic.keys():
142
+ dic[key]["id"] = key
143
+ for field in field_names:
144
+ if field not in dic[key]:
145
+ dic[key][field] = ""
146
+ field_names.insert(0,"id")
147
+ with open(new_path, "w", encoding= "utf-8",newline="") as f2:
148
+ writer = csv.DictWriter(f2,fieldnames=field_names)
149
+ writer.writeheader()
150
+ writer.writerows(list(dic.values()))
151
+ out_files.append(new_path)
152
+ return out_files
153
+