mT5-new / app.py
NgalNgal's picture
Update app.py
898542d verified
raw
history blame
No virus
2.07 kB
import os
import spaces
import gradio as gr #gr.load("models/NgalNgal/mT5-new").launch()
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import sentencepiece as spm
import ctranslate2
from nltk import sent_tokenize
ct_model_path = "mbart25enmy_ct2/"
sp_model_path = "mbart25enmy_ct2/sentence.bpe.model"
translator = ctranslate2.Translator(ct_model_path)
sp_model = spm.SentencePieceProcessor(sp_model_path)
def translate(source, translator, sp_model):
"""Use CTranslate model to translate a sentence
Args:
source (str): Source sentences to translate
translator (object): Object of Translator, with the CTranslate2 model
sp_model (object): Object of SentencePieceProcessor, with the SentencePiece source model
Returns:
Translation of the source text
"""
# source_sentences = sent_tokenize(source)
source_tokenized = sp_model.encode(source, out_type=str)
# print("print 1" , source_tokenized)
source_tokenized = ["[en_XX]"] + source_tokenized
# print("print " + source_tokenized)
target_prefix = ["[my_MM]"]
translations = translator.translate_batch([source_tokenized],target_prefix= [target_prefix])
# translations = [translation[0]["tokens"] for translation in translations]
translations = sp_model.decode(translations[0].hypotheses[0][1:])
# translation = " ".join(translations_detokenized)
return translations
def call_model(source):
translation = translate(source, translator, sp_model)
return translation
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
english = gr.Textbox(label="English text")
translate_btn = gr.Button(value="Translate")
with gr.Column():
myanmar = gr.Textbox(label="Myanmar Text")
translate_btn.click(call_model, inputs=english, outputs=myanmar, api_name="English-to-Myanmar")
examples = gr.Examples(examples=["I went to the supermarket yesterday.", "Helen is a good swimmer."],
inputs=[english])
demo.launch()