|
import os |
|
import spaces |
|
import gradio as gr |
|
import torch |
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer |
|
import sentencepiece as spm |
|
import ctranslate2 |
|
from nltk import sent_tokenize |
|
|
|
|
|
ct_model_path = "mbart25enmy_ct2/" |
|
sp_model_path = "mbart25enmy_ct2/sentence.bpe.model" |
|
translator = ctranslate2.Translator(ct_model_path) |
|
sp_model = spm.SentencePieceProcessor(sp_model_path) |
|
|
|
|
|
|
|
|
|
def translate(source, translator, sp_model): |
|
"""Use CTranslate model to translate a sentence |
|
|
|
Args: |
|
source (str): Source sentences to translate |
|
translator (object): Object of Translator, with the CTranslate2 model |
|
sp_model (object): Object of SentencePieceProcessor, with the SentencePiece source model |
|
|
|
Returns: |
|
Translation of the source text |
|
""" |
|
|
|
|
|
source_tokenized = sp_model.encode(source, out_type=str) |
|
|
|
source_tokenized = ["[en_XX]"] + source_tokenized |
|
|
|
target_prefix = ["[my_MM]"] |
|
translations = translator.translate_batch([source_tokenized],target_prefix= [target_prefix]) |
|
|
|
translations = sp_model.decode(translations[0].hypotheses[0][1:]) |
|
|
|
|
|
return translations |
|
|
|
def call_model(source): |
|
translation = translate(source, translator, sp_model) |
|
return translation |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
with gr.Column(): |
|
english = gr.Textbox(label="English text") |
|
translate_btn = gr.Button(value="Translate") |
|
with gr.Column(): |
|
myanmar = gr.Textbox(label="Myanmar Text") |
|
|
|
translate_btn.click(call_model, inputs=english, outputs=myanmar, api_name="English-to-Myanmar") |
|
examples = gr.Examples(examples=["I went to the supermarket yesterday.", "Helen is a good swimmer."], |
|
inputs=[english]) |
|
|
|
demo.launch() |