Spaces:
Runtime error
Runtime error
| from transformers import MT5ForConditionalGeneration, AutoTokenizer, Text2TextGenerationPipeline, AutoModelForSeq2SeqLM | |
| import gradio as gr | |
| import re | |
| # 翻译任务设置 | |
| trans_mdl = MT5ForConditionalGeneration.from_pretrained("K024/mt5-zh-ja-en-trimmed") | |
| trans_tokenizer = AutoTokenizer.from_pretrained("K024/mt5-zh-ja-en-trimmed") | |
| trans_pipe = Text2TextGenerationPipeline(model=trans_mdl, tokenizer=trans_tokenizer) | |
| # 摘要任务设置 | |
| sum_mdl = AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/mT5_multilingual_XLSum") | |
| sum_tokenizer = AutoTokenizer.from_pretrained("csebuetnlp/mT5_multilingual_XLSum") | |
| def translation_job(job, text): | |
| # 设置翻译任务和提示语的映射 | |
| job_key = ["中译日", "中译英", "日译中", "英译中", "日译英", "英译日"] | |
| job_value = ["zh2ja:", "zh2en:", "ja2zh:", "en2zh:", "ja2en:", "en2ja:"] | |
| job_map = dict(zip(job_key, job_value)) | |
| input = job_map[job] + text | |
| print(input) | |
| response = trans_pipe(input, max_length=100, num_beams=4) | |
| return response[0]['generated_text'] | |
| def sum_job(text): | |
| # 去除源文本中的空格 | |
| WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip())) | |
| input_ids = sum_tokenizer( | |
| [WHITESPACE_HANDLER(text)], | |
| return_tensors="pt", | |
| padding="max_length", | |
| truncation=True, | |
| max_length=512 | |
| )["input_ids"] | |
| output_ids = sum_mdl.generate( | |
| input_ids=input_ids, | |
| max_length=84, | |
| no_repeat_ngram_size=2, | |
| num_beams=4 | |
| )[0] | |
| response = sum_tokenizer.decode( | |
| output_ids, | |
| skip_special_tokens=True, | |
| clean_up_tokenization_spaces=False | |
| ) | |
| return response | |
| with gr.Blocks() as app: | |
| # 中英日三语翻译任务 | |
| with gr.Tab("中英日三语翻译"): | |
| job_name = gr.Dropdown( | |
| ["中译日", "中译英", "日译中", "英译中", "日译英", "英译日"], | |
| label = "翻译任务选择", | |
| info = "单选" | |
| ) | |
| source_text = gr.Textbox(lines=1, label="翻译文本", placeholder="请输入要翻译的文本") | |
| trans_result = gr.Textbox(lines=1, label="翻译结果") | |
| trans_btn = gr.Button("翻译") | |
| # 多语言自动摘要任务 | |
| with gr.Tab("多语言自动摘要"): | |
| article_text = gr.Textbox(lines=8, label="待总结文本", placeholder="请输入要进行摘要的文本") | |
| sum_result = gr.Textbox(lines=2, label="摘要结果") | |
| sum_btn = gr.Button("摘要") | |
| trans_btn.click(translation_job, inputs=[job_name, source_text], outputs=trans_result) | |
| sum_btn.click(sum_job, inputs=article_text, outputs=sum_result) | |
| app.launch() | |