import streamlit as st from pypinyin import pinyin from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration from LAC import LAC lac = LAC(mode='seg') model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M") tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M") tokenizer.src_lang = "zh" def make_request(chinese_text): encoded_zh = tokenizer(chinese_text, return_tensors="pt") generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en")) return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) def generatepinyin(input): pinyin_list = pinyin(input) pinyin_string = "" for piece in pinyin_list: pinyin_string = pinyin_string+" "+piece[0] return pinyin_string st.title("Chinese-To-English-Tools") string_to_translate = st.text_area( "Chinese Text to Translate", height = 500 ) if st.button("Run"): response = [] response.append([string_to_translate,make_request(string_to_translate),generatepinyin(string_to_translate)]) segmented_string_list = lac.run(string_to_translate) for piece in segmented_string_list: response.append([piece,make_request(piece),generatepinyin(piece)]) st.success(response)