EricaCorral's picture
Upload app.py
2570705
raw
history blame
No virus
1.44 kB
from pypinyin import pinyin
from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
from LAC import LAC
import gradio as gr
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
lac = LAC(mode="seg")
def make_request(chinese_text):
tokenizer.src_lang="zh"
encoded_zh = tokenizer(chinese_text, return_tensors="pt")
generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
def generatepinyin(input):
pinyin_list = pinyin(input)
pinyin_string = ""
for piece in pinyin_list:
pinyin_string = pinyin_string+" "+piece[0]
return pinyin_string
def generate_response(string_to_translate):
response = []
response.append([string_to_translate,make_request(string_to_translate),generatepinyin(string_to_translate)])
segmented_string_list = lac.run(string_to_translate)
for piece in segmented_string_list:
response.append([piece,make_request(piece),generatepinyin(piece)])
return response
iface = gr.Interface(
fn=generate_response,
title="English to Chinese",
description="English to Chinese with a SOTA model",
inputs=gr.inputs.Textbox(lines=5, placeholder="Enter text in Chinese"),
outputs="text")
iface.launch()