|
import re |
|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, T5Tokenizer, MT5ForConditionalGeneration |
|
|
|
tokenizer = T5Tokenizer.from_pretrained("engmatic-earth/mt5-zh-ja-en-trimmed-fine-tuned-v1") |
|
model = AutoModelForSeq2SeqLM.from_pretrained("engmatic-earth/mt5-zh-ja-en-trimmed-fine-tuned-v1") |
|
|
|
def output(input_text): |
|
input_text = input_text.lower() |
|
input_text = re.sub('(!|οΌ)', '.', input_text) |
|
target_sentence_list = [] |
|
for part in input_text.split("."): |
|
sentence = part.strip() |
|
if sentence != '': |
|
target_sentence_list.append(sentence) |
|
|
|
translation_subject_list = [] |
|
for i in target_sentence_list: |
|
target_sentence = ["en2ja: " + str(i)] |
|
translated = model.generate(**tokenizer(target_sentence, return_tensors="pt"), max_length=1000) |
|
tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated] |
|
translated_phrase = tgt_text[0] + "γ" |
|
translated_phrase = translated_phrase.replace(" ", "") |
|
translation_subject_list.append(translated_phrase) |
|
translated_phrase = "\n".join(translation_subject_list) |
|
|
|
return translated_phrase |
|
|
|
interface = gr.Interface(fn=output, inputs=gr.inputs.Textbox(lines=3, placeholder="Write what you want to say in Japanese.") |
|
,outputs='text') |
|
interface.launch() |