ante-toki / app.py
lebakasable's picture
fix
b41125b
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona")
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
device = "cuda:0" if torch.cuda.is_available() else "cpu"
LANG_CODES = {"English": "en", "Toki Pona": "tl"}
def translate(text, src_lang, tgt_lang, candidates: int):
src = LANG_CODES.get(src_lang)
tgt = LANG_CODES.get(tgt_lang)
tokenizer.src_lang = src
tokenizer.tgt_lang = tgt
ins = tokenizer(text, return_tensors="pt").to(device)
gen_args = {
"return_dict_in_generate": True,
"output_scores": True,
"output_hidden_states": True,
"length_penalty": 0.0,
"num_return_sequences": candidates,
"num_beams": candidates,
"forced_bos_token_id": tokenizer.lang_code_to_id[tgt],
}
outs = model.generate(**{**ins, **gen_args})
output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True)
return "\n".join(output)
with gr.Blocks(theme="gstaff/xkcd") as app:
markdown = """
# ante toki
## toki a! πŸ’¬
This is an English to Toki Pona / Toki Pona to English translator.
Input your text to translate and desired number of generated sentences !
"""
with gr.Row():
gr.Markdown(markdown)
with gr.Column():
input_text = gr.components.Textbox(
label="Input Text",
value="Dogs are cute, but I prefer cats.",
)
source_lang = gr.components.Dropdown(
label="Source Language",
value="English",
choices=list(LANG_CODES.keys()),
)
target_lang = gr.components.Dropdown(
label="Target Language",
value="Toki Pona",
choices=list(LANG_CODES.keys()),
)
return_seqs = gr.Slider(
label="Number of generated sentences",
value=3,
minimum=1,
maximum=12,
step=1,
)
inputs = [input_text, source_lang, target_lang, return_seqs]
outputs = gr.Textbox()
translate_btn = gr.Button("o ante toki!")
translate_btn.click(translate, inputs=inputs, outputs=outputs)
app.launch()