Spaces:

alienet
/

EasyTranslator

Sleeping

App Files Files Community

alienet commited on Oct 12, 2024

Commit

aa28c7f

1 Parent(s): 8053048

first

Browse files

Files changed (9) hide show

EasyTranslator.py +607 -0
README.md +122 -1
config.json +30 -0
example_name_dict.txt +2 -0
example_replace_dict.txt +1 -0
example_text.json +27 -0
requirements.txt +1 -0
themes.py +54 -0
utils.py +153 -0

EasyTranslator.py ADDED Viewed

	@@ -0,0 +1,607 @@

+import gradio as gr
+from os import path as osp
+import json
+from utils import *
+from themes import *
+# Initialization
+# id指代台词的编号，为一个字符串
+# idx指代顺序排列的序号，0,1,2,...
+config_path = osp.join(osp.dirname(osp.abspath(__file__)),"./config.json")
+args = load_config(config_path)
+if_save_id_immediately = True if int(args["if_save_id_immediately"]) else False
+moyu_mode = True if int(args["moyu_mode"]) else False
+path = args["file_path"]
+abs_path = smart_path(path)
+replace_dict_path = smart_path(args["replace_dict_path"])
+name_dict_path = smart_path(args["name_dict_path"])
+altered_text_finals= set()
+if osp.exists(abs_path):
+    with open(abs_path, "r", encoding ="utf8") as json_file:
+        dic = json.load(json_file)
+    id_lis = list(dic.keys())
+    idx_dic = dict()
+    for idx,id_ in enumerate(id_lis):
+        idx_dic[id_] = idx
+    id_idx = 0
+    if args["last_edited_id"] in id_lis:
+        id_idx = idx_dic[args["last_edited_id"]]
+# Dict for replacement
+replace_dic = {}
+if osp.exists(replace_dict_path):
+    with open(replace_dict_path, "r", encoding="utf-8") as f:
+        for line in f:
+            if not line:continue
+            item = line.split(" ")
+            item[1] = item[1].replace("\n","")
+            replace_dic[item[0]]=item[1]
+        f.close()
+# Dict for name
+name_dic = {}
+if osp.exists(name_dict_path):
+    with open(name_dict_path, "r", encoding="utf-8") as f:
+            for line in f:
+                if not line:continue
+                item = line.split(" ")
+                item[1] = item[1].replace("\n","")
+                name_dic[item[0]]=item[1]
+# Translate
+def gpt_translate(text,text_id):
+    text = text.replace("\n"," ")
+    prompt = args["openai_api_settings"]["prompt_prefix"]+text+args["openai_api_settings"]["prompt_postfix"]
+    translation, if_succ = get_gpt_completion(prompt, api_key = args["openai_api_settings"]["openai_api_key"])
+    if dic[text_id]["text"].replace("\n"," ") == text and if_succ:
+        dic[text_id]["gpt3"] = translation
+    return translation
+def baidu_translate(text,text_id):
+    text = text.replace("\n"," ")
+    translation = get_baidu_completion(text,
+                                        api_id = args["baidu_api_settings"]["api_id"],
+                                        api_key = args["baidu_api_settings"]["api_key"],
+                                        from_lang=args["baidu_api_settings"]["from_lang"],
+                                        to_lang=args["baidu_api_settings"]["to_lang"],)
+    if dic[text_id]["text"].replace("\n"," ") == text:
+        dic[text_id]["baidu"] = translation
+    return translation
+def batch_translate(radio, check, text_start_id,text_end_id,progress=gr.Progress()):
+    progress(0, desc="Starting...")
+    if text_start_id not in id_lis or text_end_id not in id_lis or idx_dic[text_start_id] > idx_dic[text_end_id]:
+        gr.Warning("找不到指定序号, 或id前后顺序错误")
+        return
+    start = idx_dic[text_start_id]
+    end = idx_dic[text_end_id] + 1
+    lis = id_lis[start:end]
+    if radio == "Gpt3":
+        for key in progress.tqdm(lis):
+            gpt_translate(dic[key]['text'],key)
+            time.sleep(0.1)
+    if radio == 'Baidu':
+        for key in progress.tqdm(lis):
+            baidu_translate(dic[key]['text'],key)
+            time.sleep(0.1)
+    if check:
+        save_json(show_info=False)
+    gr.Info(f"批量机翻成功, 共完成{end-start}句翻译")
+    return f"已完成{end-start}句翻译"
+# Other actions
+def last_text():
+    global id_idx
+    if id_idx > 0:
+        id_idx -= 1
+    return id_lis[id_idx]
+def next_text():
+    global id_idx
+    if id_idx < len(id_lis)-1:
+        id_idx += 1
+    return id_lis[id_idx]
+def replace(text_gpt,text_baidu,text_final,text_id, check_file = True):
+    if not text_id:
+        text_id = id_lis[id_idx]
+    if check_file:
+        if osp.exists(replace_dict_path):
+            with open(replace_dict_path, "r", encoding="utf-8") as f:
+                for line in f:
+                    item = line.split(" ")
+                    item[1] = item[1].replace("\n","")
+                    replace_dic[item[0]]=item[1]
+                f.close()
+    for key,value in replace_dic.items():
+        text_gpt = text_gpt.replace(key, value)
+        text_baidu = text_baidu.replace(key, value)
+        text_final = text_final.replace(key, value)
+    dic[text_id]["gpt3"] = text_gpt
+    dic[text_id]["baidu"] = text_baidu
+    dic[text_id]["text_CN"] = text_final
+    return text_gpt,text_baidu,text_final
+def change_id(text_id):
+    if not text_id or text_id not in idx_dic: return args["file_path"],"","","","","",""
+    global id_idx
+    id_idx = idx_dic[text_id]
+    if "gpt3" not in dic[text_id]:
+        dic[text_id]["gpt3"] = ""
+    if "baidu" not in dic[text_id]:
+        dic[text_id]["baidu"] = ""
+    if "text_CN" not in dic[text_id]:
+        dic[text_id]["text_CN"] = ""
+    if dic[text_id]["name"] not in name_dic:
+        name_dic[dic[text_id]["name"]] = dic[text_id]["name"]
+    dic[text_id]["name_CN"] = name_dic[dic[text_id]["name"]]
+    replace(dic[text_id]["gpt3"],dic[text_id]["baidu"],dic[text_id]["text_CN"],text_id,False)
+    if if_save_id_immediately:
+        args["last_edited_id"] = text_id
+        save_config(args,config_path)
+    return args["file_path"],dic[text_id]["text"],dic[text_id]["name"],name_dic[dic[text_id]["name"]],\
+        dic[text_id]["gpt3"],dic[text_id]["baidu"],dic[text_id]["text_CN"]
+def change_final(text,text_id):
+    if not text_id or not text_id in idx_dic: return
+    if text != dic[text_id]["text_CN"]:
+        dic[text_id]["text_CN"] = text
+        altered_text_finals.add(text_id)
+    return
+def change_name(name,name_cn,text_id):
+    if not text_id or not text_id in idx_dic: return
+    name_dic[name] = name_cn
+    dic[text_id]["name_CN"] = name_cn
+    return
+def save_json(show_info = True):
+    global altered_text_finals
+    with open(abs_path, "w", encoding ="utf8") as json_file:
+        json.dump(dic,json_file,indent = 1,ensure_ascii = False)
+    if osp.exists(name_dict_path):
+        with open(name_dict_path,"w",encoding = "utf-8") as f:
+            for key,value in name_dic.items():
+                f.write(f"{key} {value}\n")
+    if show_info:
+        gr.Info(f"JSON保存成功, 共更新{len(altered_text_finals)}句译文")
+    altered_text_finals = set()
+def save_last_position(text_id):
+    args["last_edited_id"] = text_id
+    save_config(args,config_path)
+    return
+def load_last_position(text_path):
+    global id_idx,id_lis,idx_dic,path,dic
+    if not osp.exists(smart_path(text_path)):
+        raise gr.Error("文件不存在")
+    if path != text_path:
+        path = text_path
+        with open(smart_path(text_path), "r", encoding ="utf8") as json_file:
+            dic = json.load(json_file)
+        id_lis = list(dic.keys())
+        idx_dic = dict()
+        for idx,id_ in enumerate(id_lis):
+            idx_dic[id_] = idx
+        id_idx = 0
+        args["file_path"] = path
+        save_config(args,config_path)
+    return args["last_edited_id"]
+def submit_api(baidu_api_id, baidu_api_key, from_lang, to_lang, openai_api_key,prefix,postfix,target_id):
+    global args
+    if baidu_api_id != "":
+        args["baidu_api_settings"]["api_id"] = baidu_api_id
+    if baidu_api_key != "":
+        args["baidu_api_settings"]["api_key"] = baidu_api_key
+    if from_lang != "":
+        args["baidu_api_settings"]["from_lang"] = from_lang
+    if to_lang != "":
+        args["baidu_api_settings"]["to_lang"] = to_lang
+    if openai_api_key != "":
+        args["openai_api_settings"]["openai_api_key"] = openai_api_key
+    args["openai_api_settings"]["prompt_prefix"] = prefix
+    args["openai_api_settings"]["prompt_postfix"] = postfix
+    args["target_id"] = target_id
+    save_config(args,config_path)
+    return
+def refresh_context(refresh_id,length,context_type):
+    if not refresh_id or not refresh_id in idx_dic: return [],id_lis[id_idx]
+    length = int(length)
+    idx = idx_dic[refresh_id]
+    if context_type == "上下文":
+        ids = id_lis[max(idx-length, 0):idx+length+1]
+    elif context_type == "上文":
+        ids = id_lis[max(idx-length, 0):idx+1]
+    elif context_type == "下文":
+        ids = id_lis[idx:idx+length+1]
+    data = []
+    for i in ids:
+        if dic[i]["name"] not in name_dic:
+            name_dic[dic[i]["name"]] = dic[i]["name"]
+        dic[i]["name_CN"] = name_dic[dic[i]["name"]]
+        if 'text_CN' not in dic[i]:
+            dic[i]['text_CN'] = ""
+        row = [i, dic[i]['name'],dic[i]['name_CN'], dic[i]['text'],dic[i]['text_CN']]
+        if i == id_lis[idx]: row[0] = f"**{i}**"
+        if i in altered_text_finals:
+            row[4] = f"*{row[4]}"
+        data.append(row)
+    return data,id_lis[id_idx]
+def save_context(data, refresh_id, if_save = False):
+    altered = 0
+    for i in range(len(data)):
+        text_id = data['id'][i]
+        text_cn = data['text_CN'][i]
+        text_id = text_id.replace("*","")
+        if text_id in altered_text_finals and text_cn and text_cn[0] == "*":
+            text_cn = text_cn[1:]
+        if dic[text_id]['text_CN'] != text_cn:
+            altered += 1
+            altered_text_finals.add(text_id)
+            dic[text_id]['text_CN'] = text_cn
+    gr.Info(f"已修改{altered}条译文")
+    if if_save:
+        save_json()
+    return
+# Derive text
+def derive_text(radio_type, text_start_id, text_end_id,text_seperator_long,text_seperator_short, output_txt_path):
+    output_txt_path = smart_path(output_txt_path)
+    if output_txt_path[-4:] != ".txt":
+        gr.Warning("输出路径错误")
+        return
+    if text_start_id not in id_lis or text_end_id not in id_lis or idx_dic[text_start_id] > idx_dic[text_end_id]:
+        gr.Warning("找不到指定序号, 或id前后顺序错误")
+        return
+    start = idx_dic[text_start_id]
+    end = idx_dic[text_end_id] + 1
+    lis = id_lis[start:end]
+    if radio_type == "双语|人名文本":
+        with open(output_txt_path,"w",encoding="utf-8") as f:
+            for key in lis:
+                # if key[-3:] == "001":
+                #     f.write("【"+key[-4]+"】\n")
+                f.write(text_seperator_long+"\n")
+                f.write(dic[key]["name"]+"\n")
+                f.write("\n")
+                f.write(dic[key]["text"]+"\n")
+                f.write("\n")
+                f.write(text_seperator_short+"\n")
+                f.write(dic[key]["name_CN"]+"\n\n")
+                f.write(dic[key]["text_CN"]+"\n")
+                f.write("\n")
+        return
+    if radio_type == "中文|人名文本":
+        with open(output_txt_path,"w",encoding="utf-8") as f:
+            for key in lis:
+                # if key[-3:] == "001":
+                #     f.write("【"+key[-4]+"】\n")
+                f.write(text_seperator_long+"\n")
+                f.write(dic[key]["name_CN"]+"\n\n")
+                f.write(dic[key]["text_CN"]+"\n")
+                f.write("\n")
+        return
+    if radio_type == "中文|单次人名文本":
+        with open(output_txt_path,"w",encoding="utf-8") as f:
+            name_lis = []
+            for key in lis:
+                name = dic[key]["name_CN"]
+                if name not in name_lis:
+                    name_lis.append(name)
+                    f.write(name + ": "+ dic[key]["text_CN"]+"\n")
+                else:
+                    f.write(dic[key]["text_CN"]+"\n")
+                f.write("\n")
+    if radio_type == "中文|纯文本":
+        with open(output_txt_path,"w",encoding="utf-8") as f:
+            for key in lis:
+                f.write(dic[key]["text_CN"]+"\n")
+                f.write("\n")
+    gr.Info(f"Txt导出成功, 共导出{len(lis)}条记录")
+def get_remaining_text_num():
+    if args["target_id"] in id_lis:
+        target_idx= idx_dic[args["target_id"]]
+        rem = target_idx - id_idx
+        label = f"目标剩余{rem}条"
+    else:
+        label = "目标剩余???条"
+    return label
+def merge_json(merged_path,file_merging_json,text_start_id,text_end_id,type):
+    merged_path = smart_path(merged_path)
+    if not osp.exists(merged_path):
+        gr.Warning("路径不存在")
+        return
+    with open(merged_path, "r", encoding ="utf8") as json_file:
+        dic_merge = json.load(json_file)
+    id_lis_merge = list(dic_merge.keys())
+    idx_dic_merge = dict()
+    for idx,id_ in enumerate(id_lis_merge):
+        idx_dic_merge[id_] = idx
+    if text_start_id not in id_lis_merge or text_end_id not in id_lis_merge or idx_dic_merge[text_start_id] > idx_dic_merge[text_end_id]:
+        gr.Warning("找不到指定序号, 或id前后顺序错误")
+        return
+    path = file_merging_json.name
+    with open(path, "r", encoding ="utf8") as json_file:
+        dic_new = json.load(json_file)
+    for idx in range(idx_dic_merge[text_start_id],idx_dic_merge[text_end_id] + 1):
+        if type == "仅人工翻译":
+            dic_merge[id_lis_merge[idx]]['text_CN'] = dic_new[id_lis_merge[idx]]['text_CN']
+        else:
+            dic_merge[id_lis_merge[idx]] = dic_new[id_lis_merge[idx]]
+    with open(merged_path, "w", encoding ="utf8") as json_file:
+        json.dump(dic_merge,json_file,indent = 1,ensure_ascii = False)
+    gr.Info(f"合并成功，共更新{idx_dic_merge[text_end_id] - idx_dic_merge[text_start_id] + 1}条译文")
+    return
+def output_json(merged_path,text_start_id,text_end_id):
+    merged_path = smart_path(merged_path)
+    if not osp.exists(merged_path):
+        gr.Warning("路径不存在")
+        return
+    with open(merged_path, "r", encoding ="utf8") as json_file:
+        dic_merge = json.load(json_file)
+    id_lis_merge = list(dic_merge.keys())
+    idx_dic_merge = dict()
+    for idx,id_ in enumerate(id_lis_merge):
+        idx_dic_merge[id_] = idx
+    if text_start_id not in id_lis_merge or text_end_id not in id_lis_merge or idx_dic_merge[text_start_id] > idx_dic_merge[text_end_id]:
+        gr.Warning("找不到指定序号, 或id前后顺序错误")
+        return
+    dic_new = {}
+    for idx in range(idx_dic_merge[text_start_id],idx_dic_merge[text_end_id] + 1):
+        dic_new[id_lis_merge[idx]] = dic_merge[id_lis_merge[idx]]
+    name = "small_" + osp.basename(path)
+    new_path = osp.join(osp.dirname(merged_path), name)
+    with open(new_path, "w", encoding ="utf8") as json_file:
+        json.dump(dic_new,json_file,indent = 1,ensure_ascii = False)
+    return new_path
+shortcut_js = """
+<script>
+function shortcuts(e) {
+    if (e.key.toLowerCase() == "s" && e.shiftKey) {
+        document.getElementById("button_save").click();
+    }
+    if (e.key.toLowerCase() == "w" && e.shiftKey) {
+        document.getElementById("button_up").click();
+    }
+    if (e.key.toLowerCase() == "x" && e.shiftKey) {
+        document.getElementById("button_down").click();
+    }
+    if (e.key.toLowerCase() == "r" && e.shiftKey) {
+        document.getElementById("button_replace").click();
+    }
+    if (e.key.toLowerCase() == "g" && e.shiftKey) {
+        document.getElementById("button_translate_gpt").click();
+    }
+    if (e.key.toLowerCase() == "b" && e.shiftKey) {
+        document.getElementById("button_translate_baidu").click();
+    }
+}
+document.addEventListener('keyup', shortcuts, false);
+</script>
+"""
+with gr.Blocks(theme=Theme1(),head=shortcut_js) as demo:
+    gr.Markdown("# <center>EasyTranslator v1.0.6</center> ",visible=True)
+    # 文本编辑页
+    with gr.Tab("文本编辑"):
+        gr.Markdown("## 文本编辑及保存区")
+        with gr.Row():
+            text_file_path = gr.Textbox(label = "File Path", value = args["file_path"])
+            text_id = gr.Textbox(label = "Text id",show_copy_button=True)
+            button_load_pos = gr.Button("LOAD last edited position")
+            if not if_save_id_immediately:
+                button_save_pos = gr.Button("SAVE last edited position")
+        with gr.Row():
+            if not moyu_mode:
+                # 全屏mode
+                with gr.Column():
+                    text_name = gr.Textbox(label = "Name")
+                    text_text = gr.Textbox(label = "Text", lines=10,show_copy_button=True)
+                    button_save = gr.Button("SAVE FILE",scale= 2,elem_id = "button_save")
+                with gr.Column():
+                    text_name_cn = gr.Textbox(label = "Name_CN")
+                    with gr.Row():
+                        text_gpt = gr.Textbox(label = "GPT", lines=3,show_copy_button=True,interactive = True)
+                        button_translate_gpt = gr.Button("Translate(GPT)",elem_id = "button_translate_gpt")
+                    with gr.Row():
+                        text_baidu = gr.Textbox(label = "Baidu", lines=3,show_copy_button=True,interactive = True)
+                        button_translate_baidu = gr.Button("Translate(Baidu)",elem_id = "button_translate_baidu")
+                    text_final = gr.Textbox(label = "Text_CN", lines=3,show_copy_button=True,interactive = True)
+                    with gr.Row():
+                        button_up = gr.Button("↑",elem_id = "button_up")
+                        button_down = gr.Button("↓",elem_id = "button_down")
+                        button_replace = gr.Button("Replace",elem_id = "button_replace")
+            else:
+                # 摸鱼mode
+                with gr.Column():
+                    button_save = gr.Button("SAVE FILE",scale= 2)
+                    text_name = gr.Textbox(label = "Name")
+                    text_name_cn = gr.Textbox(label = "Name_CN")
+                with gr.Column():
+                    with gr.Row():
+                        text_gpt = gr.Textbox(label = "GPT", lines=3,show_copy_button=True,interactive = True)
+                        button_translate_gpt = gr.Button("Translate(GPT)")
+                    with gr.Row():
+                        text_baidu = gr.Textbox(label = "Baidu", lines=3,show_copy_button=True,interactive = True)
+                        button_translate_baidu = gr.Button("Translate(Baidu)")
+                    text_text = gr.Textbox(label = "Text", lines=3,show_copy_button=True)
+                    text_final = gr.Textbox(label = "Text_CN", lines=3,show_copy_button=True,interactive = True)
+                    with gr.Row():
+                        button_up = gr.Button("↑")
+                        button_down = gr.Button("↓")
+                        button_replace = gr.Button("Replace")
+        label_remaining_text = gr.Label(label="进度",value = "目标剩余???条")
+        gr.Markdown("## 批量机翻区")
+        with gr.Row():
+            text_translate_start_id = gr.Textbox(label = "起始句id")
+            text_translate_end_id = gr.Textbox(label = "结束句id")
+        with gr.Row():
+            radio_translator = gr.Radio(choices = ["Baidu","Gpt3"],label = "接口")
+            label_progress = gr.Label(label = "进度条",value="")
+        checkbox_if_save_translation = gr.Checkbox(value= False, label = "翻译完成后直接保存JSON")
+        button_batch_translate = gr.Button("批量翻译")
+    tab_context = gr.Tab("文本预览及导出")
+    with tab_context:
+        gr.Markdown("## 上下文预览区")
+        with gr.Row():
+            with gr.Column():
+                with gr.Row():
+                    text_refresh_id = gr.Textbox(label = "编号", value = args["last_edited_id"])
+                    text_context_length = gr.Textbox(label = "上下文长度", value = args["context_half_length"])
+                radio_context_type = gr.Radio(choices = ["上下文","上文", "下文"], label = "预览模式",value="下文")
+            with gr.Column():
+                with gr.Row():
+                    button_refresh = gr.Button("Refresh")
+                    button_save_context = gr.Button("Save Changes")
+                checkbox_if_save_context = gr.Checkbox(value= False, label = "修改直接保存JSON")
+        dataframe_context = gr.DataFrame(headers=['id','name','name_CN','text','text_CN'],
+                                         interactive=True)
+        gr.Markdown("## 文档导出区")
+        radio_type = gr.Radio(choices = ["中文|纯文本","中文|单次人名文本", "中文|人名文本", "双语|人名文本"],label = "导出类型")
+        with gr.Row():
+            text_derive_start_id = gr.Textbox(label = "起始句id")
+            text_derive_end_id = gr.Textbox(label = "结束句id")
+        with gr.Row():
+            text_seperator_long = gr.Textbox(label = "句间分隔符(长)", value = args["seperator_long"])
+            text_seperator_short = gr.Textbox(label = "双语间分隔符(短)", value = args["seperator_short"])
+        text_output_path = gr.Textbox(label = "输出文件路径", value = args["output_txt_path"])
+        button_derive_text = gr.Button("导出文本")
+    # 文件转换页
+    with gr.Tab("文件转换"):
+        gr.Markdown("## CSV to JSON(支持批量上传)")
+        gr.Markdown("准备好台词csv文件（至少包含正序排列的台词）并将台词列命名为text，如自带角色名则将此列命名为name，如自带id则将此列命名为id。\
+            在此处上传csv文件，保存生成的json文件，之后在主界面输入json文件路径即可使用。")
+        with gr.Row():
+            with gr.Column():
+                file_target_csv = gr.File(file_types=["csv"],file_count = "multiple", label="Input CSV")
+                with gr.Row():
+                    text_text_column = gr.Textbox(label="text列名",value = args["csv_column_name"]["text"])
+                    text_name_column = gr.Textbox(label="name列名",value = args["csv_column_name"]["name"])
+                    text_id_column = gr.Textbox(label="id列名(optional)",value = args["csv_column_name"]["id"],placeholder = "若不指定或找不到指定列，程序会自动编号")
+                button_convert2json =  gr.Button("Convert")
+            file_result_json = gr.File(file_types=["json"],label="Output JSON",interactive=False)
+        gr.Markdown("## JSON to CSV(支持批量上传)")
+        with gr.Row():
+            with gr.Column():
+                file_target_json = gr.File(file_types=["json"],file_count = "multiple",label="Input JSON")
+                button_convert2csv =  gr.Button("Convert")
+            file_result_csv = gr.File(file_types=["jcsv"],label="Output CSV",interactive=False)
+    # 文件合并页
+    with gr.Tab("文件合并"):
+        gr.Markdown("## 合并JSON文件")
+        gr.Markdown("将两个json文件中的译文合并，方便多人协作。使用方法为上传部分翻译后的json文件，指定起止id。\
+            程序会用【上传文件】中，从起始句id到结束句id的全部内容，覆盖【指定地址】中的json文件从起始句id到结束句id的全部内容。\
+                若起止id顺序颠倒或不存在，按钮不会作用。请仔细检查并做好备份！！")
+        with gr.Column():
+            text_merged_path = gr.Textbox(label = "File Path", value = args["file_path"])
+            file_merging_json = gr.File(file_types=["json"],file_count = "single", label="File to be merged")
+            with gr.Row():
+                text_merge_start_id = gr.Textbox(label="起始句id",value = "")
+                text_merge_end_id  = gr.Textbox(label="结束句id",value = "")
+                radio_merge_type = gr.Radio(choices = ["仅人工翻译","全部替换"], label = "合并模式",value="仅人工翻译")
+                button_merge =  gr.Button("Merge")
+            # button_output_json =  gr.Button("Merge")
+        gr.Markdown("## 导出JSON文件")
+        gr.Markdown("支持导出起止id范围的小型json文件，以减少协作时的传输负担。使用上面File Path的指定地址。")
+        with gr.Row():
+            text_output_start_id = gr.Textbox(label="起始句id",value = "")
+            text_output_end_id  = gr.Textbox(label="结束句id",value = "")
+            button_output =  gr.Button("Output")
+        file_output_json = gr.File(file_types=["json"],label="Output JSON",interactive=False)
+    # API设置页
+    with gr.Tab("API Settings"):
+        gr.Markdown("## 百度 API")
+        text_baidu_api_id = gr.Textbox(label="Baidu API Id",value = args["baidu_api_settings"]["api_id"])
+        text_baidu_api_key = gr.Textbox(label="Baidu API Key", value = args["baidu_api_settings"]["api_key"])
+        with gr.Row():
+            text_from_lang = gr.Textbox(label="From Lang",value = args["baidu_api_settings"]["from_lang"])
+            text_to_lang = gr.Textbox(label="To Lang",value = args["baidu_api_settings"]["to_lang"])
+        gr.Markdown("## OPENAI API")
+        text_openai_api = gr.Textbox(label="OPENAI API Key",value = args["openai_api_settings"]["openai_api_key"])
+        with gr.Row():
+            text_prefix = gr.Textbox(label="Prompt Prefix",value = args["openai_api_settings"]["prompt_prefix"])
+            text_postfix = gr.Textbox(label="Prompt Postfix",value = args["openai_api_settings"]["prompt_postfix"])
+        gr.Markdown("## 目标id")
+        text_target_id = gr.Textbox(label="Target Id",value = args["target_id"])
+        button_api_submit = gr.Button("Submit")
+    # 标签页行为
+    tab_context.select(refresh_context, inputs=[text_id,text_context_length,radio_context_type],outputs=[dataframe_context,text_refresh_id])
+    # 文本框行为
+    text_id.change(change_id, inputs = [text_id],
+                outputs = [text_file_path,text_text,text_name,text_name_cn,text_gpt,text_baidu,text_final])
+    text_id.change(get_remaining_text_num,inputs = None, outputs= [label_remaining_text])
+    text_final.change(change_final,inputs = [text_final,text_id])
+    text_name_cn.change(change_name,inputs = [text_name,text_name_cn,text_id])
+    # 按钮行为
+    # -文本编辑页
+    button_load_pos.click(load_last_position,inputs=text_file_path, outputs = text_id)
+    if not if_save_id_immediately:
+        button_save_pos.click(save_last_position, inputs = [text_id])
+    button_up.click(last_text, outputs = text_id)
+    button_down.click(next_text, outputs = text_id)
+    button_translate_gpt.click(gpt_translate,
+                            inputs=[text_text,text_id], outputs=text_gpt)
+    button_translate_baidu.click(baidu_translate,
+                                inputs=[text_text,text_id], outputs=text_baidu)
+    button_replace.click(replace,
+                        inputs = [text_gpt,text_baidu,text_final,text_id],
+                        outputs=[text_gpt,text_baidu,text_final])
+    button_save.click(save_json)
+    button_batch_translate.click(batch_translate, inputs = [radio_translator,checkbox_if_save_translation,text_translate_start_id,text_translate_end_id],
+                                 outputs = [label_progress])
+    # -预览及导出页
+    # button_refresh.click(save_context, inputs=[dataframe_context, text_refresh_id, checkbox_if_save_context])
+    button_refresh.click(refresh_context,inputs=[text_refresh_id,text_context_length,radio_context_type], outputs = [dataframe_context,text_id])
+    button_save_context.click(save_context, inputs=[dataframe_context, text_refresh_id, checkbox_if_save_context])
+    button_derive_text.click(derive_text,
+                            inputs = [radio_type, text_derive_start_id, text_derive_end_id,
+                                    text_seperator_long,text_seperator_short,text_output_path])
+    # -文件转换页
+    button_convert2json.click(convert_to_json,
+                        inputs = [file_target_csv, text_text_column, text_name_column, text_id_column],
+                        outputs = file_result_json)
+    button_convert2csv.click(convert_to_csv,
+                        inputs = file_target_json,
+                        outputs = file_result_csv)
+    # -文件合并页
+    button_merge.click(merge_json, inputs=[text_merged_path,file_merging_json,text_merge_start_id,text_merge_end_id,radio_merge_type])
+    button_output.click(output_json, inputs=[text_merged_path,text_output_start_id,text_output_end_id],outputs=file_output_json)
+    # -API管理页
+    button_api_submit.click(submit_api,
+                            inputs = [text_baidu_api_id,text_baidu_api_key,text_from_lang,text_to_lang,
+                                      text_openai_api,text_prefix,text_postfix,text_target_id])
+demo.queue()
+if __name__=="__main__":
+    demo.launch(show_error=True)

README.md CHANGED Viewed

@@ -10,4 +10,125 @@ pinned: false
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 license: apache-2.0
 ---
+# EasyTranslator v1.0.6
+基于gradio的汉化辅助工具
+## v1.0.6更新内容
+1. 更新文件合并功能，方便多人协作。在文件合并页中可将依照指示将两个json文件合并，同步人工翻译进度。并支持导出小规模json文件方便传输。
+## v1.0.5更新内容
+1. 支持键盘快捷键<br>
+    shift+w: ↑<br>
+    shift+x: ↓<br>
+    shift+s: save json<br>
+    shift+r: replace<br>
+    shift+g: gpt translate<br>
+    shift+b: baidu translate<br>
+## v1.0.4更新内容
+1. 追加摸鱼模式, 将必要组件集中在半个屏幕内。在`config.json`中`moyu_mode`设为1开启, 设为0关闭
+2. 加入对GPT翻译的超时检测, 时间上限在`config.json`的`openai_api_settings`中的`time_limit`处设置, 单位为秒。若请求超时, 会打印超时提示, 但不会报错）
+3. GPT翻译现在将不返回重复结果
+## v1.0.3更新内容
+1. 支持预览页直接修改译文, 建议保存JSON后再使用此功能
+2. 可选是否即时更新上次编辑id
+    `config.json`中设置`"if_save_id_immediately"`参数, 若为1则逻辑与之前一样, 在切换id时立刻保存进`config.json`；若为0则会显示保存编辑id按钮`SAVE last edited position`, 在点击后存入`config.json`。
+## v1.0.2更新内容
+1. 支持批量机翻
+## v1.0.1更新内容
+1. 优化文件读取逻辑
+2. 增加错误提示、警告等。保存JSON成功时会提示更新的译文条数
+3. 允许自定义传输到gpt的prompt、自定义百度翻译的原文及目标语言
+4. 追加上下文预览功能, 并允许自定义预览条数和编号。指定id将会以双星号标记, 修改过的译文将会在前面加星号标记
+5. 优化按钮手感
+## 特性
+1. 一键机翻接口, 提供复制到剪贴板按钮
+2. 便捷的上下句切换, 直接跳转功能
+3. 记忆上次编辑位置功能
+4. 人名翻译记忆功能, 一次修改将会同步到全体。人名词典在程序启动时读取并在保存JSON文件时保存。开启程序时可以直接改`name_cn`, 关闭程序后可以修改人名词典。下次开启程序时人名词典中的内容将会覆盖JSON文件中的`name_cn`。
+5. 文本翻译记忆功能, 机翻/修改后只要不关闭程序, 切换上下句, 刷新 网页都不会影响
+6. 译文缓存。相对地原文不会缓存, 所以手滑改或删掉只要切换或者刷新即可恢复。因此想查看原文具体某个词的翻译也可以直接编辑原文再机翻, 不会影响原文本。
+7. 一键替换功能, 用于专有名词错译的情况。会将机翻及手翻文本中的对象全部替换。替换词典可以在运行中直接更改, 不用重开程序。
+8. 便利的api key管理及prompt修改等
+9. 提供JSON文件与CSV文件互转
+10. 上下文预览功能
+<br><br>
+## 使用
+至少需要安装python3(作者使用的版本是3.10, 其它版本尚未测试)
+***
+### Install
+```
+git clone https://github.com/alienet1109/EasyTranslator.git
+```
+不想安git可以直接下载压缩包
+***
+### Preparation
+#### 1. 安装依赖
+```
+pip install -r requirements.txt
+```
+#### 2. 文本准备
+需要使用者自行准备原文本json文件, 或使用本程序将原文本csv文件转换为json文件 \
+csv文件格式要求为：
+* 至少包含人名列、文本列, 按顺序排列的表格
+只有文本没有人名也可以使用, 在csv里新建空列'name'即可。\
+若不指定id列名, 程序会自动生成id。 \
+可以指定人名和文本的列名, 将会分别以'name'、'text'为键输入json文件；其它列将会以原列名为键输入, 以防数据丢失。\
+生成json文件后, 下载, 然后输入其路径（不一定要与代码同一文件夹）即可使用。
+json文件格式要求为：
+* 由key为id, value为{'name':'原文人名','text':'原文文本'}的键值对组成, 按文本顺序正序排序的字典。
+运行途中会频繁修改json文件, 所以最好做好备份。\
+可以随时在页面中修改json文件路径, 修改前务必保存, 修改后请按Load按钮以同步更新否则不知道会有什么bug。\
+上次编辑文本编号将会重置, 路径与编号将直接更新至config文件。
+#### 3. 修改配置文件`config.json`
+* 必须：
+    1. 设置文本文件`file_path`及人名词典`name_dict_path`的路径(推荐使用绝对路径)。之后结果会直接保存至对应路径。
+* 可选：
+    1. 设置替换词典`replace_dict_path`路径, 如不使用此功能则不需要；
+    2. 可设置api key和分隔符等, 也可以直接在程序更改。程序中的修改会改变预设api key, 但不会改变预设的分隔符。
+***
+### Run
+直接点开`EasyTranslator.py`或在文件夹下执行命令:
+```
+python EasyTranslator.py
+```
+然后在网页中打开程序给出的网址（eg: http://127.0.0.1:7860 ）
+<br><br>
+## 演示
+摸鱼模式 \
+![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/moyu_mode.png) \
+批量翻译 \
+![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/batch_translate.gif) \
+上下文预览\
+![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/context_preview.gif)
+![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/id%20search.gif)
+![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/name.gif)
+![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/last%26next%20text.gif)
+![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/replace.gif)
+![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/api%20key%20setting.gif)
+![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/derive%20text.gif)
+![image](https://github.com/alienet1109/EasyTranslator/blob/master/assets/part_translate.gif)
+## 计划追加功能
+1. 可选主题
+2. 追加翻译接口
+3. 追加文本输出格式
+4. 发生修改时直接存入小规模临时文件, 防止数据丢失

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+ "moyu_mode": "0",
+ "if_save_id_immediately": "1",
+ "last_edited_id": "100001001",
+ "target_id": "100001005",
+ "file_path": "./example_text.json",
+ "context_half_length": "10",
+ "name_dict_path": "./example_name_dict.json",
+ "replace_dict_path": "./example_replace_dict.json",
+ "output_txt_path": "./output.txt",
+ "seperator_long": "===============================",
+ "seperator_short": "---------------------",
+ "csv_column_name": {
+  "id": "",
+  "text": "text",
+  "name": "name"
+ },
+ "baidu_api_settings": {
+  "api_id": "YOUR BAIDU API ID",
+  "api_key": "YOUR BAIDU API KEY",
+  "from_lang": "jp",
+  "to_lang": "zh"
+ },
+ "openai_api_settings": {
+  "openai_api_key": "YOUR OPENAI API KEY",
+  "prompt_prefix": "翻译为中文:",
+  "prompt_postfix": "",
+  "time_limit": "15"
+ }
+}

example_name_dict.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {原文人名} {译文人名}
2	+ キム金

example_replace_dict.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ {旧词} {新词}

example_text.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+ "100001001": {
+  "name": "高橋",
+  "text": "キムさん、こちらは山田さんです。山田さん、こちらはキムさんです。",
+  "name_CN": "高橋",
+  "gpt3": "",
+  "baidu": "",
+  "text_CN": ""
+ },
+ "100001002": {
+  "name": "山田",
+  "text": "山田です。はじめまして、どうぞよろしく。",
+  "name_CN": "山田"
+ },
+ "100001003": {
+  "name": "キム",
+  "text": "私はキムです。こちらこそ、どうぞよろしくお願いいたします。"
+ },
+ "100001004": {
+  "name": "山田",
+  "text": "キムさん、お仕事は。"
+ },
+ "100001005": {
+  "name": "キム",
+  "text": "学生です。"
+ }
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ openai>=1.0

themes.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from __future__ import annotations
+from typing import Iterable
+import gradio as gr
+from gradio.themes.base import Base
+from gradio.themes.default import Default
+from gradio.themes.utils import colors, fonts, sizes
+import time
+class Theme1(Base):
+    def __init__(
+        self,
+        *,
+        primary_hue: colors.Color | str = colors.emerald,
+        secondary_hue: colors.Color | str = colors.blue,
+        neutral_hue: colors.Color | str = colors.gray,
+        spacing_size: sizes.Size | str = sizes.spacing_md,
+        radius_size: sizes.Size | str = sizes.radius_md,
+        text_size: sizes.Size | str = sizes.text_lg,
+        font: fonts.Font
+        | str
+        | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("Quicksand"),
+            "ui-sans-serif",
+            "sans-serif",
+        ),
+        font_mono: fonts.Font
+        | str
+        | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("IBM Plex Mono"),
+            "ui-monospace",
+            "monospace",
+        ),
+    ):
+        super().__init__(
+            primary_hue=primary_hue,
+            secondary_hue=secondary_hue,
+            neutral_hue=neutral_hue,
+            spacing_size=spacing_size,
+            radius_size=radius_size,
+            text_size=text_size,
+            font=font,
+            font_mono=font_mono,
+        )
+        super().set(
+            input_background_fill="*neutral_100",
+            block_title_text_weight="600",
+            # button_shadow_active="*neutral_400 0px 0px 2px 2px",
+            # block_border_width="3px",
+            # button_large_padding="32px",
+            # button_secondary_background_fill_hover="*neutral_300",
+        )

utils.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import openai
+import requests
+import random
+import json
+from hashlib import md5
+from os import path as osp
+import csv
+import threading
+def load_config(filepath):
+    with open(filepath, "r", encoding="utf-8") as file:
+        args = json.load(file)
+    return args
+def save_config(args,filepath):
+    with open(filepath, "w", encoding ="utf8") as json_file:
+        json.dump(args,json_file,indent = 1,ensure_ascii = False)
+    return
+def smart_path(path):
+    file_dir = osp.dirname(osp.abspath(__file__))
+    if osp.isabs(path):
+        return path
+    else:
+        return osp.join(file_dir,path)
+args = load_config(smart_path("./config.json"))
+# Baidu preparation
+endpoint = "http://api.fanyi.baidu.com"
+path = "/api/trans/vip/translate"
+url = endpoint + path
+headers = {"Content-Type": "application/x-www-form-urlencoded"}
+# Generate salt and sign
+def make_md5(s, encoding="utf-8"):
+    return md5(s.encode(encoding)).hexdigest()
+def get_baidu_completion(text,api_id,api_key,from_lang,to_lang):
+    salt = random.randint(32768, 65536)
+    sign = make_md5(api_id + text + str(salt) + api_key)
+    payload = {"appid": api_id, "q": text, "from": from_lang, "to": to_lang, "salt": salt, "sign": sign}
+    r = requests.post(url, params=payload, headers=headers)
+    result = r.json()
+    return result["trans_result"][0]["dst"]
+# OPENAI preparation
+openai_api_key = args["openai_api_settings"]["openai_api_key"]
+time_limit = float(args["openai_api_settings"]["time_limit"])
+client = openai.OpenAI(api_key = openai_api_key)
+class GPTThread(threading.Thread):
+    def __init__(self, model, messages, temperature):
+        super().__init__()
+        self.model = model
+        self.messages = messages
+        self.temperature = temperature
+        self.result = ""
+    def terminate(self):
+        self._running = False
+    def run(self):
+        response = client.chat.completions.create(
+        model=self.model,
+        messages=self.messages,
+        temperature=self.temperature,
+    )
+        self.result = response.choices[0].message.content
+def get_gpt_completion(prompt, model="gpt-3.5-turbo",api_key = openai_api_key):
+    messages = [{"role": "user", "content": prompt}]
+    temperature = random.uniform(0,1)
+    thread = GPTThread(model, messages,temperature)
+    thread.start()
+    thread.join(10)
+    if thread.is_alive():
+        thread.terminate()
+        print("请求超时")
+        return "TimeoutError", False
+    else:
+        return thread.result, True
+def left_pad_zero(number, digit):
+    number_str = str(number)
+    padding_count = digit - len(number_str)
+    padded_number_str = "0" * padding_count + number_str
+    return padded_number_str
+def generate_ids(num: int):
+    length = len(str(num))+1
+    ids = []
+    for i in range(num):
+        ids.append(left_pad_zero(i,length))
+    return ids
+def convert_to_json(files, text_col, name_col, id_col):
+    out_files = []
+    for file_target in files:
+        dic = {}
+        path = file_target.name
+        dir = osp.dirname(path)
+        base_name = osp.basename(path)
+        new_name = base_name[:-4]+".json"
+        new_path = osp.join(dir,new_name)
+        with open(path,"r",encoding="utf-8") as f:
+            reader = csv.DictReader(f)
+            line_num = sum(1 for _ in open(path,"r",encoding="utf-8"))
+            fieldnames = reader.fieldnames
+            if id_col not in fieldnames:
+                ids = generate_ids(line_num)
+                i = 0
+                for row in reader:
+                    dic[ids[i]]={"name":row[name_col],"text":row[text_col]}
+                    for field in fieldnames:
+                        if field not in (name_col,text_col):
+                            dic[ids[i]][field] = row[field]
+                    i += 1
+            else:
+                for row in reader:
+                    dic[row[id_col]]={"name":row[name_col],"text":row[text_col]}
+                    for field in fieldnames:
+                        if field not in (name_col,text_col,id_col):
+                            dic[row[id_col]][field] = row[field]
+            f.close()
+        with open(new_path, "w", encoding= "utf-8") as f2:
+            json.dump(dic,f2,indent=1,ensure_ascii=False)
+        out_files.append(new_path)
+    return out_files
+def convert_to_csv(files):
+    out_files = []
+    for file_target in files:
+        path = file_target.name
+        dir = osp.dirname(path)
+        base_name = osp.basename(path)
+        new_name = base_name[:-4]+".csv"
+        new_path = osp.join(dir,new_name)
+        with open(path, "r", encoding= "utf-8") as f:
+            dic = json.load(f)
+        field_names = []
+        for value in dic.values():
+            for field in value.keys():
+                if field not in field_names: field_names.append(field)
+        for key in dic.keys():
+            dic[key]["id"] = key
+            for field in field_names:
+                if field not in dic[key]:
+                    dic[key][field] = ""
+        field_names.insert(0,"id")
+        with open(new_path, "w", encoding= "utf-8",newline="") as f2:
+            writer = csv.DictWriter(f2,fieldnames=field_names)
+            writer.writeheader()
+            writer.writerows(list(dic.values()))
+        out_files.append(new_path)
+    return out_files