Spaces:
Sleeping
Sleeping
first
Browse files- EasyTranslator.py +607 -0
- README.md +122 -1
- config.json +30 -0
- example_name_dict.txt +2 -0
- example_replace_dict.txt +1 -0
- example_text.json +27 -0
- requirements.txt +1 -0
- themes.py +54 -0
- utils.py +153 -0
EasyTranslator.py
ADDED
@@ -0,0 +1,607 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from os import path as osp
|
3 |
+
import json
|
4 |
+
from utils import *
|
5 |
+
from themes import *
|
6 |
+
|
7 |
+
# Initialization
|
8 |
+
# id指代台词的编号,为一个字符串
|
9 |
+
# idx指代顺序排列的序号,0,1,2,...
|
10 |
+
config_path = osp.join(osp.dirname(osp.abspath(__file__)),"./config.json")
|
11 |
+
args = load_config(config_path)
|
12 |
+
if_save_id_immediately = True if int(args["if_save_id_immediately"]) else False
|
13 |
+
moyu_mode = True if int(args["moyu_mode"]) else False
|
14 |
+
path = args["file_path"]
|
15 |
+
abs_path = smart_path(path)
|
16 |
+
replace_dict_path = smart_path(args["replace_dict_path"])
|
17 |
+
name_dict_path = smart_path(args["name_dict_path"])
|
18 |
+
altered_text_finals= set()
|
19 |
+
|
20 |
+
|
21 |
+
if osp.exists(abs_path):
|
22 |
+
with open(abs_path, "r", encoding ="utf8") as json_file:
|
23 |
+
dic = json.load(json_file)
|
24 |
+
id_lis = list(dic.keys())
|
25 |
+
idx_dic = dict()
|
26 |
+
for idx,id_ in enumerate(id_lis):
|
27 |
+
idx_dic[id_] = idx
|
28 |
+
id_idx = 0
|
29 |
+
if args["last_edited_id"] in id_lis:
|
30 |
+
id_idx = idx_dic[args["last_edited_id"]]
|
31 |
+
|
32 |
+
# Dict for replacement
|
33 |
+
replace_dic = {}
|
34 |
+
if osp.exists(replace_dict_path):
|
35 |
+
with open(replace_dict_path, "r", encoding="utf-8") as f:
|
36 |
+
for line in f:
|
37 |
+
if not line:continue
|
38 |
+
item = line.split(" ")
|
39 |
+
item[1] = item[1].replace("\n","")
|
40 |
+
replace_dic[item[0]]=item[1]
|
41 |
+
f.close()
|
42 |
+
|
43 |
+
# Dict for name
|
44 |
+
name_dic = {}
|
45 |
+
if osp.exists(name_dict_path):
|
46 |
+
with open(name_dict_path, "r", encoding="utf-8") as f:
|
47 |
+
for line in f:
|
48 |
+
if not line:continue
|
49 |
+
item = line.split(" ")
|
50 |
+
item[1] = item[1].replace("\n","")
|
51 |
+
name_dic[item[0]]=item[1]
|
52 |
+
|
53 |
+
# Translate
|
54 |
+
def gpt_translate(text,text_id):
|
55 |
+
text = text.replace("\n"," ")
|
56 |
+
prompt = args["openai_api_settings"]["prompt_prefix"]+text+args["openai_api_settings"]["prompt_postfix"]
|
57 |
+
translation, if_succ = get_gpt_completion(prompt, api_key = args["openai_api_settings"]["openai_api_key"])
|
58 |
+
if dic[text_id]["text"].replace("\n"," ") == text and if_succ:
|
59 |
+
dic[text_id]["gpt3"] = translation
|
60 |
+
return translation
|
61 |
+
|
62 |
+
def baidu_translate(text,text_id):
|
63 |
+
text = text.replace("\n"," ")
|
64 |
+
translation = get_baidu_completion(text,
|
65 |
+
api_id = args["baidu_api_settings"]["api_id"],
|
66 |
+
api_key = args["baidu_api_settings"]["api_key"],
|
67 |
+
from_lang=args["baidu_api_settings"]["from_lang"],
|
68 |
+
to_lang=args["baidu_api_settings"]["to_lang"],)
|
69 |
+
if dic[text_id]["text"].replace("\n"," ") == text:
|
70 |
+
dic[text_id]["baidu"] = translation
|
71 |
+
return translation
|
72 |
+
|
73 |
+
def batch_translate(radio, check, text_start_id,text_end_id,progress=gr.Progress()):
|
74 |
+
progress(0, desc="Starting...")
|
75 |
+
if text_start_id not in id_lis or text_end_id not in id_lis or idx_dic[text_start_id] > idx_dic[text_end_id]:
|
76 |
+
gr.Warning("找不到指定序号, 或id前后顺序错误")
|
77 |
+
return
|
78 |
+
start = idx_dic[text_start_id]
|
79 |
+
end = idx_dic[text_end_id] + 1
|
80 |
+
lis = id_lis[start:end]
|
81 |
+
if radio == "Gpt3":
|
82 |
+
for key in progress.tqdm(lis):
|
83 |
+
gpt_translate(dic[key]['text'],key)
|
84 |
+
time.sleep(0.1)
|
85 |
+
if radio == 'Baidu':
|
86 |
+
for key in progress.tqdm(lis):
|
87 |
+
baidu_translate(dic[key]['text'],key)
|
88 |
+
time.sleep(0.1)
|
89 |
+
if check:
|
90 |
+
save_json(show_info=False)
|
91 |
+
gr.Info(f"批量机翻成功, 共完成{end-start}句翻译")
|
92 |
+
return f"已完成{end-start}句翻译"
|
93 |
+
|
94 |
+
# Other actions
|
95 |
+
def last_text():
|
96 |
+
global id_idx
|
97 |
+
if id_idx > 0:
|
98 |
+
id_idx -= 1
|
99 |
+
return id_lis[id_idx]
|
100 |
+
|
101 |
+
def next_text():
|
102 |
+
global id_idx
|
103 |
+
if id_idx < len(id_lis)-1:
|
104 |
+
id_idx += 1
|
105 |
+
return id_lis[id_idx]
|
106 |
+
|
107 |
+
def replace(text_gpt,text_baidu,text_final,text_id, check_file = True):
|
108 |
+
if not text_id:
|
109 |
+
text_id = id_lis[id_idx]
|
110 |
+
if check_file:
|
111 |
+
if osp.exists(replace_dict_path):
|
112 |
+
with open(replace_dict_path, "r", encoding="utf-8") as f:
|
113 |
+
for line in f:
|
114 |
+
item = line.split(" ")
|
115 |
+
item[1] = item[1].replace("\n","")
|
116 |
+
replace_dic[item[0]]=item[1]
|
117 |
+
f.close()
|
118 |
+
for key,value in replace_dic.items():
|
119 |
+
text_gpt = text_gpt.replace(key, value)
|
120 |
+
text_baidu = text_baidu.replace(key, value)
|
121 |
+
text_final = text_final.replace(key, value)
|
122 |
+
dic[text_id]["gpt3"] = text_gpt
|
123 |
+
dic[text_id]["baidu"] = text_baidu
|
124 |
+
dic[text_id]["text_CN"] = text_final
|
125 |
+
return text_gpt,text_baidu,text_final
|
126 |
+
|
127 |
+
def change_id(text_id):
|
128 |
+
if not text_id or text_id not in idx_dic: return args["file_path"],"","","","","",""
|
129 |
+
global id_idx
|
130 |
+
id_idx = idx_dic[text_id]
|
131 |
+
if "gpt3" not in dic[text_id]:
|
132 |
+
dic[text_id]["gpt3"] = ""
|
133 |
+
if "baidu" not in dic[text_id]:
|
134 |
+
dic[text_id]["baidu"] = ""
|
135 |
+
if "text_CN" not in dic[text_id]:
|
136 |
+
dic[text_id]["text_CN"] = ""
|
137 |
+
if dic[text_id]["name"] not in name_dic:
|
138 |
+
name_dic[dic[text_id]["name"]] = dic[text_id]["name"]
|
139 |
+
dic[text_id]["name_CN"] = name_dic[dic[text_id]["name"]]
|
140 |
+
replace(dic[text_id]["gpt3"],dic[text_id]["baidu"],dic[text_id]["text_CN"],text_id,False)
|
141 |
+
if if_save_id_immediately:
|
142 |
+
args["last_edited_id"] = text_id
|
143 |
+
save_config(args,config_path)
|
144 |
+
return args["file_path"],dic[text_id]["text"],dic[text_id]["name"],name_dic[dic[text_id]["name"]],\
|
145 |
+
dic[text_id]["gpt3"],dic[text_id]["baidu"],dic[text_id]["text_CN"]
|
146 |
+
|
147 |
+
def change_final(text,text_id):
|
148 |
+
if not text_id or not text_id in idx_dic: return
|
149 |
+
if text != dic[text_id]["text_CN"]:
|
150 |
+
dic[text_id]["text_CN"] = text
|
151 |
+
altered_text_finals.add(text_id)
|
152 |
+
return
|
153 |
+
|
154 |
+
def change_name(name,name_cn,text_id):
|
155 |
+
if not text_id or not text_id in idx_dic: return
|
156 |
+
name_dic[name] = name_cn
|
157 |
+
dic[text_id]["name_CN"] = name_cn
|
158 |
+
return
|
159 |
+
|
160 |
+
def save_json(show_info = True):
|
161 |
+
global altered_text_finals
|
162 |
+
with open(abs_path, "w", encoding ="utf8") as json_file:
|
163 |
+
json.dump(dic,json_file,indent = 1,ensure_ascii = False)
|
164 |
+
if osp.exists(name_dict_path):
|
165 |
+
with open(name_dict_path,"w",encoding = "utf-8") as f:
|
166 |
+
for key,value in name_dic.items():
|
167 |
+
f.write(f"{key} {value}\n")
|
168 |
+
if show_info:
|
169 |
+
gr.Info(f"JSON保存成功, 共更新{len(altered_text_finals)}句译文")
|
170 |
+
altered_text_finals = set()
|
171 |
+
|
172 |
+
def save_last_position(text_id):
|
173 |
+
args["last_edited_id"] = text_id
|
174 |
+
save_config(args,config_path)
|
175 |
+
return
|
176 |
+
|
177 |
+
def load_last_position(text_path):
|
178 |
+
global id_idx,id_lis,idx_dic,path,dic
|
179 |
+
if not osp.exists(smart_path(text_path)):
|
180 |
+
raise gr.Error("文件不存在")
|
181 |
+
if path != text_path:
|
182 |
+
path = text_path
|
183 |
+
with open(smart_path(text_path), "r", encoding ="utf8") as json_file:
|
184 |
+
dic = json.load(json_file)
|
185 |
+
id_lis = list(dic.keys())
|
186 |
+
idx_dic = dict()
|
187 |
+
for idx,id_ in enumerate(id_lis):
|
188 |
+
idx_dic[id_] = idx
|
189 |
+
id_idx = 0
|
190 |
+
args["file_path"] = path
|
191 |
+
save_config(args,config_path)
|
192 |
+
return args["last_edited_id"]
|
193 |
+
|
194 |
+
def submit_api(baidu_api_id, baidu_api_key, from_lang, to_lang, openai_api_key,prefix,postfix,target_id):
|
195 |
+
global args
|
196 |
+
if baidu_api_id != "":
|
197 |
+
args["baidu_api_settings"]["api_id"] = baidu_api_id
|
198 |
+
if baidu_api_key != "":
|
199 |
+
args["baidu_api_settings"]["api_key"] = baidu_api_key
|
200 |
+
if from_lang != "":
|
201 |
+
args["baidu_api_settings"]["from_lang"] = from_lang
|
202 |
+
if to_lang != "":
|
203 |
+
args["baidu_api_settings"]["to_lang"] = to_lang
|
204 |
+
if openai_api_key != "":
|
205 |
+
args["openai_api_settings"]["openai_api_key"] = openai_api_key
|
206 |
+
args["openai_api_settings"]["prompt_prefix"] = prefix
|
207 |
+
args["openai_api_settings"]["prompt_postfix"] = postfix
|
208 |
+
args["target_id"] = target_id
|
209 |
+
save_config(args,config_path)
|
210 |
+
return
|
211 |
+
|
212 |
+
def refresh_context(refresh_id,length,context_type):
|
213 |
+
if not refresh_id or not refresh_id in idx_dic: return [],id_lis[id_idx]
|
214 |
+
length = int(length)
|
215 |
+
idx = idx_dic[refresh_id]
|
216 |
+
if context_type == "上下文":
|
217 |
+
ids = id_lis[max(idx-length, 0):idx+length+1]
|
218 |
+
elif context_type == "上文":
|
219 |
+
ids = id_lis[max(idx-length, 0):idx+1]
|
220 |
+
elif context_type == "下文":
|
221 |
+
ids = id_lis[idx:idx+length+1]
|
222 |
+
data = []
|
223 |
+
for i in ids:
|
224 |
+
if dic[i]["name"] not in name_dic:
|
225 |
+
name_dic[dic[i]["name"]] = dic[i]["name"]
|
226 |
+
dic[i]["name_CN"] = name_dic[dic[i]["name"]]
|
227 |
+
if 'text_CN' not in dic[i]:
|
228 |
+
dic[i]['text_CN'] = ""
|
229 |
+
row = [i, dic[i]['name'],dic[i]['name_CN'], dic[i]['text'],dic[i]['text_CN']]
|
230 |
+
if i == id_lis[idx]: row[0] = f"**{i}**"
|
231 |
+
if i in altered_text_finals:
|
232 |
+
row[4] = f"*{row[4]}"
|
233 |
+
data.append(row)
|
234 |
+
return data,id_lis[id_idx]
|
235 |
+
|
236 |
+
def save_context(data, refresh_id, if_save = False):
|
237 |
+
altered = 0
|
238 |
+
for i in range(len(data)):
|
239 |
+
text_id = data['id'][i]
|
240 |
+
text_cn = data['text_CN'][i]
|
241 |
+
text_id = text_id.replace("*","")
|
242 |
+
if text_id in altered_text_finals and text_cn and text_cn[0] == "*":
|
243 |
+
text_cn = text_cn[1:]
|
244 |
+
if dic[text_id]['text_CN'] != text_cn:
|
245 |
+
altered += 1
|
246 |
+
altered_text_finals.add(text_id)
|
247 |
+
dic[text_id]['text_CN'] = text_cn
|
248 |
+
gr.Info(f"已修改{altered}条译文")
|
249 |
+
if if_save:
|
250 |
+
save_json()
|
251 |
+
return
|
252 |
+
|
253 |
+
# Derive text
|
254 |
+
def derive_text(radio_type, text_start_id, text_end_id,text_seperator_long,text_seperator_short, output_txt_path):
|
255 |
+
output_txt_path = smart_path(output_txt_path)
|
256 |
+
if output_txt_path[-4:] != ".txt":
|
257 |
+
gr.Warning("输出路径错误")
|
258 |
+
return
|
259 |
+
if text_start_id not in id_lis or text_end_id not in id_lis or idx_dic[text_start_id] > idx_dic[text_end_id]:
|
260 |
+
gr.Warning("找不到指定序号, 或id前后顺序错误")
|
261 |
+
return
|
262 |
+
start = idx_dic[text_start_id]
|
263 |
+
end = idx_dic[text_end_id] + 1
|
264 |
+
lis = id_lis[start:end]
|
265 |
+
if radio_type == "双语|人名文本":
|
266 |
+
with open(output_txt_path,"w",encoding="utf-8") as f:
|
267 |
+
for key in lis:
|
268 |
+
# if key[-3:] == "001":
|
269 |
+
# f.write("【"+key[-4]+"】\n")
|
270 |
+
f.write(text_seperator_long+"\n")
|
271 |
+
f.write(dic[key]["name"]+"\n")
|
272 |
+
f.write("\n")
|
273 |
+
f.write(dic[key]["text"]+"\n")
|
274 |
+
f.write("\n")
|
275 |
+
f.write(text_seperator_short+"\n")
|
276 |
+
f.write(dic[key]["name_CN"]+"\n\n")
|
277 |
+
f.write(dic[key]["text_CN"]+"\n")
|
278 |
+
f.write("\n")
|
279 |
+
return
|
280 |
+
if radio_type == "中文|人名文本":
|
281 |
+
with open(output_txt_path,"w",encoding="utf-8") as f:
|
282 |
+
for key in lis:
|
283 |
+
# if key[-3:] == "001":
|
284 |
+
# f.write("【"+key[-4]+"】\n")
|
285 |
+
f.write(text_seperator_long+"\n")
|
286 |
+
f.write(dic[key]["name_CN"]+"\n\n")
|
287 |
+
f.write(dic[key]["text_CN"]+"\n")
|
288 |
+
f.write("\n")
|
289 |
+
return
|
290 |
+
if radio_type == "中文|单次人名文本":
|
291 |
+
with open(output_txt_path,"w",encoding="utf-8") as f:
|
292 |
+
name_lis = []
|
293 |
+
for key in lis:
|
294 |
+
name = dic[key]["name_CN"]
|
295 |
+
if name not in name_lis:
|
296 |
+
name_lis.append(name)
|
297 |
+
f.write(name + ": "+ dic[key]["text_CN"]+"\n")
|
298 |
+
else:
|
299 |
+
f.write(dic[key]["text_CN"]+"\n")
|
300 |
+
f.write("\n")
|
301 |
+
if radio_type == "中文|纯文本":
|
302 |
+
with open(output_txt_path,"w",encoding="utf-8") as f:
|
303 |
+
for key in lis:
|
304 |
+
f.write(dic[key]["text_CN"]+"\n")
|
305 |
+
f.write("\n")
|
306 |
+
gr.Info(f"Txt导出成功, 共导出{len(lis)}条记录")
|
307 |
+
|
308 |
+
def get_remaining_text_num():
|
309 |
+
if args["target_id"] in id_lis:
|
310 |
+
target_idx= idx_dic[args["target_id"]]
|
311 |
+
rem = target_idx - id_idx
|
312 |
+
label = f"目标剩余{rem}条"
|
313 |
+
else:
|
314 |
+
label = "目标剩余???条"
|
315 |
+
return label
|
316 |
+
|
317 |
+
def merge_json(merged_path,file_merging_json,text_start_id,text_end_id,type):
|
318 |
+
merged_path = smart_path(merged_path)
|
319 |
+
if not osp.exists(merged_path):
|
320 |
+
gr.Warning("路径不存在")
|
321 |
+
return
|
322 |
+
with open(merged_path, "r", encoding ="utf8") as json_file:
|
323 |
+
dic_merge = json.load(json_file)
|
324 |
+
id_lis_merge = list(dic_merge.keys())
|
325 |
+
idx_dic_merge = dict()
|
326 |
+
for idx,id_ in enumerate(id_lis_merge):
|
327 |
+
idx_dic_merge[id_] = idx
|
328 |
+
if text_start_id not in id_lis_merge or text_end_id not in id_lis_merge or idx_dic_merge[text_start_id] > idx_dic_merge[text_end_id]:
|
329 |
+
gr.Warning("找不到指定序号, 或id前后顺序错误")
|
330 |
+
return
|
331 |
+
path = file_merging_json.name
|
332 |
+
with open(path, "r", encoding ="utf8") as json_file:
|
333 |
+
dic_new = json.load(json_file)
|
334 |
+
for idx in range(idx_dic_merge[text_start_id],idx_dic_merge[text_end_id] + 1):
|
335 |
+
if type == "仅人工翻译":
|
336 |
+
dic_merge[id_lis_merge[idx]]['text_CN'] = dic_new[id_lis_merge[idx]]['text_CN']
|
337 |
+
else:
|
338 |
+
dic_merge[id_lis_merge[idx]] = dic_new[id_lis_merge[idx]]
|
339 |
+
with open(merged_path, "w", encoding ="utf8") as json_file:
|
340 |
+
json.dump(dic_merge,json_file,indent = 1,ensure_ascii = False)
|
341 |
+
gr.Info(f"合并成功,共更新{idx_dic_merge[text_end_id] - idx_dic_merge[text_start_id] + 1}条译文")
|
342 |
+
return
|
343 |
+
|
344 |
+
def output_json(merged_path,text_start_id,text_end_id):
|
345 |
+
merged_path = smart_path(merged_path)
|
346 |
+
if not osp.exists(merged_path):
|
347 |
+
gr.Warning("路径不存在")
|
348 |
+
return
|
349 |
+
with open(merged_path, "r", encoding ="utf8") as json_file:
|
350 |
+
dic_merge = json.load(json_file)
|
351 |
+
id_lis_merge = list(dic_merge.keys())
|
352 |
+
idx_dic_merge = dict()
|
353 |
+
for idx,id_ in enumerate(id_lis_merge):
|
354 |
+
idx_dic_merge[id_] = idx
|
355 |
+
if text_start_id not in id_lis_merge or text_end_id not in id_lis_merge or idx_dic_merge[text_start_id] > idx_dic_merge[text_end_id]:
|
356 |
+
gr.Warning("找不到指定序号, 或id前后顺序错误")
|
357 |
+
return
|
358 |
+
dic_new = {}
|
359 |
+
for idx in range(idx_dic_merge[text_start_id],idx_dic_merge[text_end_id] + 1):
|
360 |
+
dic_new[id_lis_merge[idx]] = dic_merge[id_lis_merge[idx]]
|
361 |
+
name = "small_" + osp.basename(path)
|
362 |
+
new_path = osp.join(osp.dirname(merged_path), name)
|
363 |
+
with open(new_path, "w", encoding ="utf8") as json_file:
|
364 |
+
json.dump(dic_new,json_file,indent = 1,ensure_ascii = False)
|
365 |
+
return new_path
|
366 |
+
|
367 |
+
shortcut_js = """
|
368 |
+
<script>
|
369 |
+
function shortcuts(e) {
|
370 |
+
|
371 |
+
if (e.key.toLowerCase() == "s" && e.shiftKey) {
|
372 |
+
document.getElementById("button_save").click();
|
373 |
+
}
|
374 |
+
if (e.key.toLowerCase() == "w" && e.shiftKey) {
|
375 |
+
document.getElementById("button_up").click();
|
376 |
+
}
|
377 |
+
if (e.key.toLowerCase() == "x" && e.shiftKey) {
|
378 |
+
document.getElementById("button_down").click();
|
379 |
+
}
|
380 |
+
if (e.key.toLowerCase() == "r" && e.shiftKey) {
|
381 |
+
document.getElementById("button_replace").click();
|
382 |
+
}
|
383 |
+
if (e.key.toLowerCase() == "g" && e.shiftKey) {
|
384 |
+
document.getElementById("button_translate_gpt").click();
|
385 |
+
}
|
386 |
+
if (e.key.toLowerCase() == "b" && e.shiftKey) {
|
387 |
+
document.getElementById("button_translate_baidu").click();
|
388 |
+
}
|
389 |
+
|
390 |
+
}
|
391 |
+
document.addEventListener('keyup', shortcuts, false);
|
392 |
+
</script>
|
393 |
+
"""
|
394 |
+
|
395 |
+
with gr.Blocks(theme=Theme1(),head=shortcut_js) as demo:
|
396 |
+
gr.Markdown("# <center>EasyTranslator v1.0.6</center> ",visible=True)
|
397 |
+
# 文本编辑页
|
398 |
+
with gr.Tab("文本编辑"):
|
399 |
+
gr.Markdown("## 文本编辑及保存区")
|
400 |
+
with gr.Row():
|
401 |
+
text_file_path = gr.Textbox(label = "File Path", value = args["file_path"])
|
402 |
+
text_id = gr.Textbox(label = "Text id",show_copy_button=True)
|
403 |
+
button_load_pos = gr.Button("LOAD last edited position")
|
404 |
+
if not if_save_id_immediately:
|
405 |
+
button_save_pos = gr.Button("SAVE last edited position")
|
406 |
+
with gr.Row():
|
407 |
+
if not moyu_mode:
|
408 |
+
# 全屏mode
|
409 |
+
with gr.Column():
|
410 |
+
text_name = gr.Textbox(label = "Name")
|
411 |
+
text_text = gr.Textbox(label = "Text", lines=10,show_copy_button=True)
|
412 |
+
button_save = gr.Button("SAVE FILE",scale= 2,elem_id = "button_save")
|
413 |
+
with gr.Column():
|
414 |
+
text_name_cn = gr.Textbox(label = "Name_CN")
|
415 |
+
with gr.Row():
|
416 |
+
text_gpt = gr.Textbox(label = "GPT", lines=3,show_copy_button=True,interactive = True)
|
417 |
+
button_translate_gpt = gr.Button("Translate(GPT)",elem_id = "button_translate_gpt")
|
418 |
+
with gr.Row():
|
419 |
+
text_baidu = gr.Textbox(label = "Baidu", lines=3,show_copy_button=True,interactive = True)
|
420 |
+
button_translate_baidu = gr.Button("Translate(Baidu)",elem_id = "button_translate_baidu")
|
421 |
+
text_final = gr.Textbox(label = "Text_CN", lines=3,show_copy_button=True,interactive = True)
|
422 |
+
with gr.Row():
|
423 |
+
button_up = gr.Button("↑",elem_id = "button_up")
|
424 |
+
button_down = gr.Button("↓",elem_id = "button_down")
|
425 |
+
button_replace = gr.Button("Replace",elem_id = "button_replace")
|
426 |
+
else:
|
427 |
+
# 摸鱼mode
|
428 |
+
with gr.Column():
|
429 |
+
button_save = gr.Button("SAVE FILE",scale= 2)
|
430 |
+
text_name = gr.Textbox(label = "Name")
|
431 |
+
text_name_cn = gr.Textbox(label = "Name_CN")
|
432 |
+
with gr.Column():
|
433 |
+
with gr.Row():
|
434 |
+
text_gpt = gr.Textbox(label = "GPT", lines=3,show_copy_button=True,interactive = True)
|
435 |
+
button_translate_gpt = gr.Button("Translate(GPT)")
|
436 |
+
with gr.Row():
|
437 |
+
text_baidu = gr.Textbox(label = "Baidu", lines=3,show_copy_button=True,interactive = True)
|
438 |
+
button_translate_baidu = gr.Button("Translate(Baidu)")
|
439 |
+
text_text = gr.Textbox(label = "Text", lines=3,show_copy_button=True)
|
440 |
+
text_final = gr.Textbox(label = "Text_CN", lines=3,show_copy_button=True,interactive = True)
|
441 |
+
with gr.Row():
|
442 |
+
button_up = gr.Button("↑")
|
443 |
+
button_down = gr.Button("↓")
|
444 |
+
button_replace = gr.Button("Replace")
|
445 |
+
label_remaining_text = gr.Label(label="进度",value = "目标剩余???条")
|
446 |
+
gr.Markdown("## 批量机翻区")
|
447 |
+
with gr.Row():
|
448 |
+
text_translate_start_id = gr.Textbox(label = "起始句id")
|
449 |
+
text_translate_end_id = gr.Textbox(label = "结束句id")
|
450 |
+
with gr.Row():
|
451 |
+
radio_translator = gr.Radio(choices = ["Baidu","Gpt3"],label = "接口")
|
452 |
+
label_progress = gr.Label(label = "进度条",value="")
|
453 |
+
checkbox_if_save_translation = gr.Checkbox(value= False, label = "翻译完成后直接保存JSON")
|
454 |
+
button_batch_translate = gr.Button("批量翻译")
|
455 |
+
|
456 |
+
tab_context = gr.Tab("文本预览及导出")
|
457 |
+
with tab_context:
|
458 |
+
gr.Markdown("## 上下文预览区")
|
459 |
+
with gr.Row():
|
460 |
+
with gr.Column():
|
461 |
+
with gr.Row():
|
462 |
+
text_refresh_id = gr.Textbox(label = "编号", value = args["last_edited_id"])
|
463 |
+
text_context_length = gr.Textbox(label = "上下文长度", value = args["context_half_length"])
|
464 |
+
radio_context_type = gr.Radio(choices = ["上下文","上文", "下文"], label = "预览模式",value="下文")
|
465 |
+
with gr.Column():
|
466 |
+
with gr.Row():
|
467 |
+
button_refresh = gr.Button("Refresh")
|
468 |
+
button_save_context = gr.Button("Save Changes")
|
469 |
+
checkbox_if_save_context = gr.Checkbox(value= False, label = "修改直接保存JSON")
|
470 |
+
dataframe_context = gr.DataFrame(headers=['id','name','name_CN','text','text_CN'],
|
471 |
+
interactive=True)
|
472 |
+
gr.Markdown("## 文档导出区")
|
473 |
+
radio_type = gr.Radio(choices = ["中文|纯文本","中文|单次人名文本", "中文|人名文本", "双语|人名文本"],label = "导出类型")
|
474 |
+
with gr.Row():
|
475 |
+
text_derive_start_id = gr.Textbox(label = "起始句id")
|
476 |
+
text_derive_end_id = gr.Textbox(label = "结束句id")
|
477 |
+
with gr.Row():
|
478 |
+
text_seperator_long = gr.Textbox(label = "句间分隔符(长)", value = args["seperator_long"])
|
479 |
+
text_seperator_short = gr.Textbox(label = "双语间分隔符(短)", value = args["seperator_short"])
|
480 |
+
text_output_path = gr.Textbox(label = "输出文件路径", value = args["output_txt_path"])
|
481 |
+
button_derive_text = gr.Button("导出文本")
|
482 |
+
|
483 |
+
# 文件转换页
|
484 |
+
with gr.Tab("文件转换"):
|
485 |
+
gr.Markdown("## CSV to JSON(支持批量上传)")
|
486 |
+
gr.Markdown("准备好台词csv文件(至少包含正序排列的台词)并将台词列命名为text,如自带角色名则将此列命名为name,如自带id则将此列命名为id。\
|
487 |
+
在此处上传csv文件,保存生成的json文件,之后在主界面输入json文件路径即可使用。")
|
488 |
+
with gr.Row():
|
489 |
+
with gr.Column():
|
490 |
+
|
491 |
+
file_target_csv = gr.File(file_types=["csv"],file_count = "multiple", label="Input CSV")
|
492 |
+
with gr.Row():
|
493 |
+
text_text_column = gr.Textbox(label="text列名",value = args["csv_column_name"]["text"])
|
494 |
+
text_name_column = gr.Textbox(label="name列名",value = args["csv_column_name"]["name"])
|
495 |
+
text_id_column = gr.Textbox(label="id列名(optional)",value = args["csv_column_name"]["id"],placeholder = "若不指定或找不到指定列,程序会自动编号")
|
496 |
+
button_convert2json = gr.Button("Convert")
|
497 |
+
file_result_json = gr.File(file_types=["json"],label="Output JSON",interactive=False)
|
498 |
+
gr.Markdown("## JSON to CSV(支持批量上传)")
|
499 |
+
with gr.Row():
|
500 |
+
with gr.Column():
|
501 |
+
file_target_json = gr.File(file_types=["json"],file_count = "multiple",label="Input JSON")
|
502 |
+
button_convert2csv = gr.Button("Convert")
|
503 |
+
file_result_csv = gr.File(file_types=["jcsv"],label="Output CSV",interactive=False)
|
504 |
+
# 文件合并页
|
505 |
+
with gr.Tab("文件合并"):
|
506 |
+
gr.Markdown("## 合并JSON文件")
|
507 |
+
gr.Markdown("将两个json文件中的译文合并,方便多人协作。使用方法为上传部分翻译后的json文件,指定起止id。\
|
508 |
+
程序会用【上传文件】中,从起始句id到结束句id的全部内容,覆盖【指定地址】中的json文件从起始句id到结束句id的全部内容。\
|
509 |
+
若起止id顺序颠倒或不存在,按钮不会作用。请仔细检查并做好备份!!")
|
510 |
+
with gr.Column():
|
511 |
+
|
512 |
+
text_merged_path = gr.Textbox(label = "File Path", value = args["file_path"])
|
513 |
+
file_merging_json = gr.File(file_types=["json"],file_count = "single", label="File to be merged")
|
514 |
+
with gr.Row():
|
515 |
+
text_merge_start_id = gr.Textbox(label="起始句id",value = "")
|
516 |
+
text_merge_end_id = gr.Textbox(label="结束句id",value = "")
|
517 |
+
radio_merge_type = gr.Radio(choices = ["仅人工翻译","全部替换"], label = "合并模式",value="仅人工翻译")
|
518 |
+
|
519 |
+
button_merge = gr.Button("Merge")
|
520 |
+
|
521 |
+
# button_output_json = gr.Button("Merge")
|
522 |
+
gr.Markdown("## 导出JSON文件")
|
523 |
+
gr.Markdown("支持导出起止id范围的小型json文件,以减少协作时的传输负担。使用上面File Path的指定地址。")
|
524 |
+
with gr.Row():
|
525 |
+
text_output_start_id = gr.Textbox(label="起始句id",value = "")
|
526 |
+
text_output_end_id = gr.Textbox(label="结束句id",value = "")
|
527 |
+
button_output = gr.Button("Output")
|
528 |
+
file_output_json = gr.File(file_types=["json"],label="Output JSON",interactive=False)
|
529 |
+
|
530 |
+
|
531 |
+
|
532 |
+
# API设置页
|
533 |
+
with gr.Tab("API Settings"):
|
534 |
+
gr.Markdown("## 百度 API")
|
535 |
+
text_baidu_api_id = gr.Textbox(label="Baidu API Id",value = args["baidu_api_settings"]["api_id"])
|
536 |
+
text_baidu_api_key = gr.Textbox(label="Baidu API Key", value = args["baidu_api_settings"]["api_key"])
|
537 |
+
with gr.Row():
|
538 |
+
text_from_lang = gr.Textbox(label="From Lang",value = args["baidu_api_settings"]["from_lang"])
|
539 |
+
text_to_lang = gr.Textbox(label="To Lang",value = args["baidu_api_settings"]["to_lang"])
|
540 |
+
gr.Markdown("## OPENAI API")
|
541 |
+
text_openai_api = gr.Textbox(label="OPENAI API Key",value = args["openai_api_settings"]["openai_api_key"])
|
542 |
+
with gr.Row():
|
543 |
+
text_prefix = gr.Textbox(label="Prompt Prefix",value = args["openai_api_settings"]["prompt_prefix"])
|
544 |
+
text_postfix = gr.Textbox(label="Prompt Postfix",value = args["openai_api_settings"]["prompt_postfix"])
|
545 |
+
gr.Markdown("## 目标id")
|
546 |
+
text_target_id = gr.Textbox(label="Target Id",value = args["target_id"])
|
547 |
+
button_api_submit = gr.Button("Submit")
|
548 |
+
|
549 |
+
|
550 |
+
# 标签页行为
|
551 |
+
tab_context.select(refresh_context, inputs=[text_id,text_context_length,radio_context_type],outputs=[dataframe_context,text_refresh_id])
|
552 |
+
|
553 |
+
# 文本框行为
|
554 |
+
text_id.change(change_id, inputs = [text_id],
|
555 |
+
outputs = [text_file_path,text_text,text_name,text_name_cn,text_gpt,text_baidu,text_final])
|
556 |
+
text_id.change(get_remaining_text_num,inputs = None, outputs= [label_remaining_text])
|
557 |
+
text_final.change(change_final,inputs = [text_final,text_id])
|
558 |
+
text_name_cn.change(change_name,inputs = [text_name,text_name_cn,text_id])
|
559 |
+
|
560 |
+
# 按钮行为
|
561 |
+
# -文本编辑页
|
562 |
+
button_load_pos.click(load_last_position,inputs=text_file_path, outputs = text_id)
|
563 |
+
if not if_save_id_immediately:
|
564 |
+
button_save_pos.click(save_last_position, inputs = [text_id])
|
565 |
+
button_up.click(last_text, outputs = text_id)
|
566 |
+
button_down.click(next_text, outputs = text_id)
|
567 |
+
button_translate_gpt.click(gpt_translate,
|
568 |
+
inputs=[text_text,text_id], outputs=text_gpt)
|
569 |
+
button_translate_baidu.click(baidu_translate,
|
570 |
+
inputs=[text_text,text_id], outputs=text_baidu)
|
571 |
+
button_replace.click(replace,
|
572 |
+
inputs = [text_gpt,text_baidu,text_final,text_id],
|
573 |
+
outputs=[text_gpt,text_baidu,text_final])
|
574 |
+
button_save.click(save_json)
|
575 |
+
|
576 |
+
button_batch_translate.click(batch_translate, inputs = [radio_translator,checkbox_if_save_translation,text_translate_start_id,text_translate_end_id],
|
577 |
+
outputs = [label_progress])
|
578 |
+
|
579 |
+
# -预览及导出页
|
580 |
+
# button_refresh.click(save_context, inputs=[dataframe_context, text_refresh_id, checkbox_if_save_context])
|
581 |
+
button_refresh.click(refresh_context,inputs=[text_refresh_id,text_context_length,radio_context_type], outputs = [dataframe_context,text_id])
|
582 |
+
button_save_context.click(save_context, inputs=[dataframe_context, text_refresh_id, checkbox_if_save_context])
|
583 |
+
button_derive_text.click(derive_text,
|
584 |
+
inputs = [radio_type, text_derive_start_id, text_derive_end_id,
|
585 |
+
text_seperator_long,text_seperator_short,text_output_path])
|
586 |
+
|
587 |
+
# -文件转换页
|
588 |
+
button_convert2json.click(convert_to_json,
|
589 |
+
inputs = [file_target_csv, text_text_column, text_name_column, text_id_column],
|
590 |
+
outputs = file_result_json)
|
591 |
+
button_convert2csv.click(convert_to_csv,
|
592 |
+
inputs = file_target_json,
|
593 |
+
outputs = file_result_csv)
|
594 |
+
|
595 |
+
# -文件合并页
|
596 |
+
button_merge.click(merge_json, inputs=[text_merged_path,file_merging_json,text_merge_start_id,text_merge_end_id,radio_merge_type])
|
597 |
+
button_output.click(output_json, inputs=[text_merged_path,text_output_start_id,text_output_end_id],outputs=file_output_json)
|
598 |
+
|
599 |
+
# -API管理页
|
600 |
+
button_api_submit.click(submit_api,
|
601 |
+
inputs = [text_baidu_api_id,text_baidu_api_key,text_from_lang,text_to_lang,
|
602 |
+
text_openai_api,text_prefix,text_postfix,text_target_id])
|
603 |
+
|
604 |
+
demo.queue()
|
605 |
+
|
606 |
+
if __name__=="__main__":
|
607 |
+
demo.launch(show_error=True)
|
README.md
CHANGED
@@ -10,4 +10,125 @@ pinned: false
|
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
+
# EasyTranslator v1.0.6
|
14 |
+
基于gradio的汉化辅助工具
|
15 |
+
## v1.0.6更新内容
|
16 |
+
1. 更新文件合并功能,方便多人协作。在文件合并页中可将依照指示将两个json文件合并,同步人工翻译进度。并支持导出小规模json文件方便传输。
|
17 |
+
|
18 |
+
## v1.0.5更新内容
|
19 |
+
1. 支持键盘快捷键<br>
|
20 |
+
shift+w: ↑<br>
|
21 |
+
shift+x: ↓<br>
|
22 |
+
shift+s: save json<br>
|
23 |
+
shift+r: replace<br>
|
24 |
+
shift+g: gpt translate<br>
|
25 |
+
shift+b: baidu translate<br>
|
26 |
+
|
27 |
+
## v1.0.4更新内容
|
28 |
+
1. 追加摸鱼模式, 将必要组件集中在半个屏幕内。在`config.json`中`moyu_mode`设为1开启, 设为0关闭
|
29 |
+
2. 加入对GPT翻译的超时检测, 时间上限在`config.json`的`openai_api_settings`中的`time_limit`处设置, 单位为秒。若请求超时, 会打印超时提示, 但不会报错)
|
30 |
+
3. GPT翻译现在将不返回重复结果
|
31 |
+
|
32 |
+
## v1.0.3更新内容
|
33 |
+
1. 支持预览页直接修改译文, 建议保存JSON后再使用此功能
|
34 |
+
2. 可选是否即时更新上次编辑id
|
35 |
+
|
36 |
+
`config.json`中设置`"if_save_id_immediately"`参数, 若为1则逻辑与之前一样, 在切换id时立刻保存进`config.json`;若为0则会显示保存编辑id按钮`SAVE last edited position`, 在点击后存入`config.json`。
|
37 |
+
|
38 |
+
## v1.0.2更新内容
|
39 |
+
1. 支持批量机翻
|
40 |
+
|
41 |
+
## v1.0.1更新内容
|
42 |
+
1. 优化文件读取逻辑
|
43 |
+
2. 增加错误提示、警告等。保存JSON成功时会提示更新的译文条数
|
44 |
+
3. 允许自定义传输到gpt的prompt、自定义百度翻译的原文及目标语言
|
45 |
+
4. 追加上下文预览功能, 并允许自定义预览条数和编号。指定id将会以双星号标记, 修改过的译文将会在前面加星号标记
|
46 |
+
5. 优化按钮手感
|
47 |
+
|
48 |
+
## 特性
|
49 |
+
1. 一键机翻接口, 提供复制到剪贴板按钮
|
50 |
+
2. 便捷的上下句切换, 直接跳转功能
|
51 |
+
3. 记忆上次编辑位置功能
|
52 |
+
4. 人名翻译记忆功能, 一次修改将会同步到全体。人名词典在程序启动时读取并在保存JSON文件时保存。开启程序时可以直接改`name_cn`, 关闭程序后可以修改人名词典。下次开启程序时人名词典中的内容将会覆盖JSON文件中的`name_cn`。
|
53 |
+
5. 文本翻译记忆功能, 机翻/修改后只要不关闭程序, 切换上下句, 刷新 网页都不会影响
|
54 |
+
6. 译文缓存。相对地原文不会缓存, 所以手滑改或删掉只要切换或者刷新即可恢复。因此想查看原文具体某个词的翻译也可以直接编辑原文再机翻, 不会影响原文本。
|
55 |
+
7. 一键替换功能, 用于专有名词错译的情况。会将机翻及手翻文本中的对象全部替换。替换词典可以在运行中直接更改, 不用重开程序。
|
56 |
+
8. 便利的api key管理及prompt修改等
|
57 |
+
9. 提供JSON文件与CSV文件互转
|
58 |
+
10. 上下文预览功能
|
59 |
+
<br><br>
|
60 |
+
|
61 |
+
## 使用
|
62 |
+
至少需要安装python3(作者使用的版本是3.10, 其它版本尚未测试)
|
63 |
+
***
|
64 |
+
### Install
|
65 |
+
```
|
66 |
+
git clone https://github.com/alienet1109/EasyTranslator.git
|
67 |
+
```
|
68 |
+
不想安git可以直接下载压缩包
|
69 |
+
***
|
70 |
+
### Preparation
|
71 |
+
#### 1. 安装依赖
|
72 |
+
```
|
73 |
+
pip install -r requirements.txt
|
74 |
+
```
|
75 |
+
#### 2. 文本准备
|
76 |
+
需要使用者自行准备原文本json文件, 或使用本程序将原文本csv文件转换为json文件 \
|
77 |
+
csv文件格式要求为:
|
78 |
+
* 至少包含人名列、文本列, 按顺序排列的表格
|
79 |
+
|
80 |
+
只有文本没有人名也可以使用, 在csv里新建空列'name'即可。\
|
81 |
+
若不指定id列名, 程序会自动生成id。 \
|
82 |
+
可以指定人名和文本的列名, 将会分别以'name'、'text'为键输入json文件;其它列将会以原列名为键输入, 以防数据丢失。\
|
83 |
+
生成json文件后, 下载, 然后输入其路径(不一定要与代码同一文件夹)即可使用。
|
84 |
+
|
85 |
+
json文件格式要求为:
|
86 |
+
* 由key为id, value为{'name':'原文人名','text':'原文文本'}的键值对组成, 按文本顺序正序排序的字典。
|
87 |
+
|
88 |
+
运行途中会频繁修改json文件, 所以最好做好备份。\
|
89 |
+
可以随时在页面中修改json文件路径, 修改前务必保存, 修改后请按Load按钮以同步更新否则不知道会有什么bug。\
|
90 |
+
上次编辑文本编号将会重置, 路径与编号将直接更新至config文件。
|
91 |
+
|
92 |
+
#### 3. 修改配置文件`config.json`
|
93 |
+
* 必须:
|
94 |
+
1. 设置文本文件`file_path`及人名词典`name_dict_path`的路径(推荐使用绝对路径)。之后结果会直接保存至对应路径。
|
95 |
+
|
96 |
+
* 可选:
|
97 |
+
1. 设置替换词典`replace_dict_path`路径, 如不使用此功能则不需要;
|
98 |
+
2. 可设置api key和分隔符等, 也可以直接在程序更改。程序中的修改会改变预设api key, 但不会改变预设的分隔符。
|
99 |
+
***
|
100 |
+
### Run
|
101 |
+
直接点开`EasyTranslator.py`或在文件夹下执行命令:
|
102 |
+
```
|
103 |
+
python EasyTranslator.py
|
104 |
+
```
|
105 |
+
然后在网页中打开程序给出的网址(eg: http://127.0.0.1:7860 )
|
106 |
+
<br><br>
|
107 |
+
|
108 |
+
## 演示
|
109 |
+
摸鱼模式 \
|
110 |
+
 \
|
111 |
+
批量翻译 \
|
112 |
+
 \
|
113 |
+
上下文预览\
|
114 |
+

|
115 |
+
|
116 |
+

|
117 |
+
|
118 |
+

|
119 |
+
|
120 |
+

|
121 |
+
|
122 |
+

|
123 |
+
|
124 |
+

|
125 |
+
|
126 |
+

|
127 |
+
|
128 |
+

|
129 |
+
|
130 |
+
## 计划追加功能
|
131 |
+
1. 可选主题
|
132 |
+
2. 追加翻译接口
|
133 |
+
3. 追加文本输出格式
|
134 |
+
4. 发生修改时直接存入小规模临时文件, 防止数据丢失
|
config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"moyu_mode": "0",
|
3 |
+
"if_save_id_immediately": "1",
|
4 |
+
"last_edited_id": "100001001",
|
5 |
+
"target_id": "100001005",
|
6 |
+
"file_path": "./example_text.json",
|
7 |
+
"context_half_length": "10",
|
8 |
+
"name_dict_path": "./example_name_dict.json",
|
9 |
+
"replace_dict_path": "./example_replace_dict.json",
|
10 |
+
"output_txt_path": "./output.txt",
|
11 |
+
"seperator_long": "===============================",
|
12 |
+
"seperator_short": "---------------------",
|
13 |
+
"csv_column_name": {
|
14 |
+
"id": "",
|
15 |
+
"text": "text",
|
16 |
+
"name": "name"
|
17 |
+
},
|
18 |
+
"baidu_api_settings": {
|
19 |
+
"api_id": "YOUR BAIDU API ID",
|
20 |
+
"api_key": "YOUR BAIDU API KEY",
|
21 |
+
"from_lang": "jp",
|
22 |
+
"to_lang": "zh"
|
23 |
+
},
|
24 |
+
"openai_api_settings": {
|
25 |
+
"openai_api_key": "YOUR OPENAI API KEY",
|
26 |
+
"prompt_prefix": "翻译为中文:",
|
27 |
+
"prompt_postfix": "",
|
28 |
+
"time_limit": "15"
|
29 |
+
}
|
30 |
+
}
|
example_name_dict.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
{原文人名} {译文人名}
|
2 |
+
キム 金
|
example_replace_dict.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{旧词} {新词}
|
example_text.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"100001001": {
|
3 |
+
"name": "高橋",
|
4 |
+
"text": "キムさん、こちらは山田さんです。山田さん、こちらはキムさんです。",
|
5 |
+
"name_CN": "高橋",
|
6 |
+
"gpt3": "",
|
7 |
+
"baidu": "",
|
8 |
+
"text_CN": ""
|
9 |
+
},
|
10 |
+
"100001002": {
|
11 |
+
"name": "山田",
|
12 |
+
"text": "山田です。はじめまして、どうぞよろしく。",
|
13 |
+
"name_CN": "山田"
|
14 |
+
},
|
15 |
+
"100001003": {
|
16 |
+
"name": "キム",
|
17 |
+
"text": "私はキムです。こちらこそ、どうぞよろしくお願いいたします。"
|
18 |
+
},
|
19 |
+
"100001004": {
|
20 |
+
"name": "山田",
|
21 |
+
"text": "キムさん、お仕事は。"
|
22 |
+
},
|
23 |
+
"100001005": {
|
24 |
+
"name": "キム",
|
25 |
+
"text": "学生です。"
|
26 |
+
}
|
27 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
openai>=1.0
|
themes.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
from typing import Iterable
|
3 |
+
import gradio as gr
|
4 |
+
from gradio.themes.base import Base
|
5 |
+
from gradio.themes.default import Default
|
6 |
+
from gradio.themes.utils import colors, fonts, sizes
|
7 |
+
import time
|
8 |
+
|
9 |
+
|
10 |
+
class Theme1(Base):
|
11 |
+
def __init__(
|
12 |
+
self,
|
13 |
+
*,
|
14 |
+
primary_hue: colors.Color | str = colors.emerald,
|
15 |
+
secondary_hue: colors.Color | str = colors.blue,
|
16 |
+
neutral_hue: colors.Color | str = colors.gray,
|
17 |
+
spacing_size: sizes.Size | str = sizes.spacing_md,
|
18 |
+
radius_size: sizes.Size | str = sizes.radius_md,
|
19 |
+
text_size: sizes.Size | str = sizes.text_lg,
|
20 |
+
font: fonts.Font
|
21 |
+
| str
|
22 |
+
| Iterable[fonts.Font | str] = (
|
23 |
+
fonts.GoogleFont("Quicksand"),
|
24 |
+
"ui-sans-serif",
|
25 |
+
"sans-serif",
|
26 |
+
),
|
27 |
+
font_mono: fonts.Font
|
28 |
+
| str
|
29 |
+
| Iterable[fonts.Font | str] = (
|
30 |
+
fonts.GoogleFont("IBM Plex Mono"),
|
31 |
+
"ui-monospace",
|
32 |
+
"monospace",
|
33 |
+
),
|
34 |
+
):
|
35 |
+
super().__init__(
|
36 |
+
primary_hue=primary_hue,
|
37 |
+
secondary_hue=secondary_hue,
|
38 |
+
neutral_hue=neutral_hue,
|
39 |
+
spacing_size=spacing_size,
|
40 |
+
radius_size=radius_size,
|
41 |
+
text_size=text_size,
|
42 |
+
font=font,
|
43 |
+
font_mono=font_mono,
|
44 |
+
|
45 |
+
)
|
46 |
+
super().set(
|
47 |
+
input_background_fill="*neutral_100",
|
48 |
+
block_title_text_weight="600",
|
49 |
+
# button_shadow_active="*neutral_400 0px 0px 2px 2px",
|
50 |
+
# block_border_width="3px",
|
51 |
+
# button_large_padding="32px",
|
52 |
+
# button_secondary_background_fill_hover="*neutral_300",
|
53 |
+
)
|
54 |
+
|
utils.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
import requests
|
3 |
+
import random
|
4 |
+
import json
|
5 |
+
from hashlib import md5
|
6 |
+
from os import path as osp
|
7 |
+
import csv
|
8 |
+
import threading
|
9 |
+
|
10 |
+
def load_config(filepath):
|
11 |
+
with open(filepath, "r", encoding="utf-8") as file:
|
12 |
+
args = json.load(file)
|
13 |
+
return args
|
14 |
+
|
15 |
+
def save_config(args,filepath):
|
16 |
+
with open(filepath, "w", encoding ="utf8") as json_file:
|
17 |
+
json.dump(args,json_file,indent = 1,ensure_ascii = False)
|
18 |
+
return
|
19 |
+
|
20 |
+
def smart_path(path):
|
21 |
+
file_dir = osp.dirname(osp.abspath(__file__))
|
22 |
+
if osp.isabs(path):
|
23 |
+
return path
|
24 |
+
else:
|
25 |
+
return osp.join(file_dir,path)
|
26 |
+
args = load_config(smart_path("./config.json"))
|
27 |
+
|
28 |
+
# Baidu preparation
|
29 |
+
endpoint = "http://api.fanyi.baidu.com"
|
30 |
+
path = "/api/trans/vip/translate"
|
31 |
+
url = endpoint + path
|
32 |
+
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
33 |
+
# Generate salt and sign
|
34 |
+
def make_md5(s, encoding="utf-8"):
|
35 |
+
return md5(s.encode(encoding)).hexdigest()
|
36 |
+
|
37 |
+
def get_baidu_completion(text,api_id,api_key,from_lang,to_lang):
|
38 |
+
salt = random.randint(32768, 65536)
|
39 |
+
sign = make_md5(api_id + text + str(salt) + api_key)
|
40 |
+
payload = {"appid": api_id, "q": text, "from": from_lang, "to": to_lang, "salt": salt, "sign": sign}
|
41 |
+
r = requests.post(url, params=payload, headers=headers)
|
42 |
+
result = r.json()
|
43 |
+
return result["trans_result"][0]["dst"]
|
44 |
+
|
45 |
+
# OPENAI preparation
|
46 |
+
openai_api_key = args["openai_api_settings"]["openai_api_key"]
|
47 |
+
time_limit = float(args["openai_api_settings"]["time_limit"])
|
48 |
+
client = openai.OpenAI(api_key = openai_api_key)
|
49 |
+
class GPTThread(threading.Thread):
|
50 |
+
def __init__(self, model, messages, temperature):
|
51 |
+
super().__init__()
|
52 |
+
self.model = model
|
53 |
+
self.messages = messages
|
54 |
+
self.temperature = temperature
|
55 |
+
self.result = ""
|
56 |
+
def terminate(self):
|
57 |
+
self._running = False
|
58 |
+
def run(self):
|
59 |
+
response = client.chat.completions.create(
|
60 |
+
model=self.model,
|
61 |
+
messages=self.messages,
|
62 |
+
temperature=self.temperature,
|
63 |
+
)
|
64 |
+
self.result = response.choices[0].message.content
|
65 |
+
|
66 |
+
def get_gpt_completion(prompt, model="gpt-3.5-turbo",api_key = openai_api_key):
|
67 |
+
messages = [{"role": "user", "content": prompt}]
|
68 |
+
temperature = random.uniform(0,1)
|
69 |
+
thread = GPTThread(model, messages,temperature)
|
70 |
+
thread.start()
|
71 |
+
thread.join(10)
|
72 |
+
if thread.is_alive():
|
73 |
+
thread.terminate()
|
74 |
+
print("请求超时")
|
75 |
+
return "TimeoutError", False
|
76 |
+
else:
|
77 |
+
return thread.result, True
|
78 |
+
|
79 |
+
def left_pad_zero(number, digit):
|
80 |
+
number_str = str(number)
|
81 |
+
padding_count = digit - len(number_str)
|
82 |
+
padded_number_str = "0" * padding_count + number_str
|
83 |
+
return padded_number_str
|
84 |
+
|
85 |
+
def generate_ids(num: int):
|
86 |
+
length = len(str(num))+1
|
87 |
+
ids = []
|
88 |
+
for i in range(num):
|
89 |
+
ids.append(left_pad_zero(i,length))
|
90 |
+
return ids
|
91 |
+
|
92 |
+
def convert_to_json(files, text_col, name_col, id_col):
|
93 |
+
out_files = []
|
94 |
+
for file_target in files:
|
95 |
+
dic = {}
|
96 |
+
path = file_target.name
|
97 |
+
dir = osp.dirname(path)
|
98 |
+
base_name = osp.basename(path)
|
99 |
+
new_name = base_name[:-4]+".json"
|
100 |
+
new_path = osp.join(dir,new_name)
|
101 |
+
with open(path,"r",encoding="utf-8") as f:
|
102 |
+
reader = csv.DictReader(f)
|
103 |
+
line_num = sum(1 for _ in open(path,"r",encoding="utf-8"))
|
104 |
+
fieldnames = reader.fieldnames
|
105 |
+
if id_col not in fieldnames:
|
106 |
+
ids = generate_ids(line_num)
|
107 |
+
i = 0
|
108 |
+
for row in reader:
|
109 |
+
dic[ids[i]]={"name":row[name_col],"text":row[text_col]}
|
110 |
+
for field in fieldnames:
|
111 |
+
if field not in (name_col,text_col):
|
112 |
+
dic[ids[i]][field] = row[field]
|
113 |
+
i += 1
|
114 |
+
else:
|
115 |
+
for row in reader:
|
116 |
+
dic[row[id_col]]={"name":row[name_col],"text":row[text_col]}
|
117 |
+
for field in fieldnames:
|
118 |
+
if field not in (name_col,text_col,id_col):
|
119 |
+
dic[row[id_col]][field] = row[field]
|
120 |
+
|
121 |
+
f.close()
|
122 |
+
with open(new_path, "w", encoding= "utf-8") as f2:
|
123 |
+
json.dump(dic,f2,indent=1,ensure_ascii=False)
|
124 |
+
out_files.append(new_path)
|
125 |
+
return out_files
|
126 |
+
|
127 |
+
def convert_to_csv(files):
|
128 |
+
out_files = []
|
129 |
+
for file_target in files:
|
130 |
+
path = file_target.name
|
131 |
+
dir = osp.dirname(path)
|
132 |
+
base_name = osp.basename(path)
|
133 |
+
new_name = base_name[:-4]+".csv"
|
134 |
+
new_path = osp.join(dir,new_name)
|
135 |
+
with open(path, "r", encoding= "utf-8") as f:
|
136 |
+
dic = json.load(f)
|
137 |
+
field_names = []
|
138 |
+
for value in dic.values():
|
139 |
+
for field in value.keys():
|
140 |
+
if field not in field_names: field_names.append(field)
|
141 |
+
for key in dic.keys():
|
142 |
+
dic[key]["id"] = key
|
143 |
+
for field in field_names:
|
144 |
+
if field not in dic[key]:
|
145 |
+
dic[key][field] = ""
|
146 |
+
field_names.insert(0,"id")
|
147 |
+
with open(new_path, "w", encoding= "utf-8",newline="") as f2:
|
148 |
+
writer = csv.DictWriter(f2,fieldnames=field_names)
|
149 |
+
writer.writeheader()
|
150 |
+
writer.writerows(list(dic.values()))
|
151 |
+
out_files.append(new_path)
|
152 |
+
return out_files
|
153 |
+
|