import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona") tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M") device = "cuda:0" if torch.cuda.is_available() else "cpu" LANG_CODES = {"English": "en", "Toki Pona": "tl"} def translate(text, src_lang, tgt_lang, candidates: int): src = LANG_CODES.get(src_lang) tgt = LANG_CODES.get(tgt_lang) tokenizer.src_lang = src tokenizer.tgt_lang = tgt ins = tokenizer(text, return_tensors="pt").to(device) gen_args = { "return_dict_in_generate": True, "output_scores": True, "output_hidden_states": True, "length_penalty": 0.0, "num_return_sequences": candidates, "num_beams": candidates, "forced_bos_token_id": tokenizer.lang_code_to_id[tgt], } outs = model.generate(**{**ins, **gen_args}) output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True) return "\n".join(output) with gr.Blocks(theme="gstaff/xkcd") as app: markdown = """ # ante toki ## toki a! 💬 This is an English to Toki Pona / Toki Pona to English translator. Input your text to translate and desired number of generated sentences ! """ with gr.Row(): gr.Markdown(markdown) with gr.Column(): input_text = gr.components.Textbox( label="Input Text", value="Dogs are cute, but I prefer cats.", ) source_lang = gr.components.Dropdown( label="Source Language", value="English", choices=list(LANG_CODES.keys()), ) target_lang = gr.components.Dropdown( label="Target Language", value="Toki Pona", choices=list(LANG_CODES.keys()), ) return_seqs = gr.Slider( label="Number of generated sentences", value=3, minimum=1, maximum=12, step=1, ) inputs = [input_text, source_lang, target_lang, return_seqs] outputs = gr.Textbox() translate_btn = gr.Button("o ante toki!") translate_btn.click(translate, inputs=inputs, outputs=outputs) app.launch()