|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
md_name1 = "vinai/vinai-translate-vi2en-v2" |
|
tokenizer_vi2en = AutoTokenizer.from_pretrained(md_name1, src_lang="vi_VN") |
|
model_vi2en = AutoModelForSeq2SeqLM.from_pretrained(md_name1) |
|
|
|
def translate_vi2en(vi_text: str) -> str: |
|
input_ids = tokenizer_vi2en(vi_text, return_tensors="pt").input_ids |
|
output_ids = model_vi2en.generate( |
|
input_ids, |
|
decoder_start_token_id=tokenizer_vi2en.lang_code_to_id["en_XX"], |
|
num_return_sequences=1, |
|
|
|
do_sample=True, |
|
top_k=100, |
|
top_p=0.8, |
|
|
|
num_beams=5, |
|
early_stopping=True |
|
) |
|
en_text = tokenizer_vi2en.batch_decode(output_ids, skip_special_tokens=True) |
|
en_text = " ".join(en_text) |
|
return en_text |
|
|
|
md_name2 = "vinai/vinai-translate-en2vi-v2" |
|
tokenizer_en2vi = AutoTokenizer.from_pretrained(md_name2, src_lang="en_XX") |
|
model_en2vi = AutoModelForSeq2SeqLM.from_pretrained(md_name2) |
|
|
|
def translate_en2vi(en_text: str) -> str: |
|
input_ids = tokenizer_en2vi(en_text, return_tensors="pt").input_ids |
|
output_ids = model_en2vi.generate( |
|
input_ids, |
|
decoder_start_token_id=tokenizer_en2vi.lang_code_to_id["vi_VN"], |
|
num_return_sequences=1, |
|
|
|
do_sample=True, |
|
top_k=100, |
|
top_p=0.8, |
|
|
|
num_beams=5, |
|
early_stopping=True |
|
) |
|
vi_text = tokenizer_en2vi.batch_decode(output_ids, skip_special_tokens=True) |
|
vi_text = " ".join(vi_text) |
|
return vi_text |
|
|
|
vi_example_text = ["Xin chào, chúng tôi là nhóm 01, bao gồm 3 thành viên: Minh Trí, Kim Thanh và Hồng Ngọc", |
|
"Chúng ta đang từng bước học cách trở nên tốt đẹp hơn!", |
|
"Bạn có phải là người chăm chỉ?", |
|
"Luận văn thạc sĩ Khoa học Máy tính", |
|
"Hãy sống như những đoá hoa toả ngát hương thơm"] |
|
|
|
en_example_text = ["Life is countless days of trying.", |
|
"Always remember, what doesn't kill you makes you stronger", |
|
"What's up man?", |
|
"How could you...?", |
|
"Could you do me a favor?"] |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), title="Charmed's One MT") as demo: |
|
with gr.Row(): |
|
test = gr.Text(label="MACHINE TRANSLATION", value="The Application of English-Vietnamese automatic translation was created by The Power of Three: Doan Minh Tri, Che Thi Kim Thanh and Nguyen Thi Hong Ngoc",) |
|
with gr.Tabs(): |
|
with gr.TabItem("VIETNAMESE TO ENGLISH"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
vietnamese = gr.Textbox(label="Vietnamese Text") |
|
gr.ClearButton(vietnamese) |
|
with gr.Column(): |
|
english = gr.Textbox(label="English Text") |
|
translate_to_english = gr.Button(value="Translate To English") |
|
translate_to_english.click(lambda text: translate_vi2en(text), inputs=vietnamese, outputs=english) |
|
gr.Examples(examples=vi_example_text, |
|
inputs=[vietnamese]) |
|
|
|
with gr.TabItem("ENGLISH TO VIETNAMESE"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
english = gr.Textbox(label="English Text") |
|
gr.ClearButton(english) |
|
with gr.Column(): |
|
vietnamese = gr.Textbox(label="Vietnamese Text") |
|
translate_to_vietnamese = gr.Button(value="Translate To Vietnamese") |
|
translate_to_vietnamese.click(lambda text: translate_en2vi(text), inputs=english, outputs=vietnamese) |
|
gr.Examples(examples=en_example_text, |
|
inputs=[english]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True) |