import re import streamlit as st from wordcloud import WordCloud from googletrans import Translator from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline from enums import MODEL_NAME, MESSAGES, DESCRIPTION def iso2lang(iso): return MESSAGES["iso"][iso] def simple_clean(text): return re.sub('[!@#$.,\n-?]', ' ', text.lower()) def load_tokenizer(): return AutoTokenizer.from_pretrained(MODEL_NAME) @st.cache(allow_output_mutation=True) def load_model(): return AutoModelWithLMHead.from_pretrained(MODEL_NAME) def load_pipe(): model = load_model() tokenizer = load_tokenizer() return pipeline("text-generation", model=model, tokenizer=tokenizer) # ---------------------------------------------------------------------- # st.write(DESCRIPTION) lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang) translator = Translator() with st.spinner(MESSAGES["loading_text"][lang]): pipe = load_pipe() st.success(MESSAGES["success_model_load"][lang]) text = st.text_input( MESSAGES["input_description"][lang], MESSAGES["input_default"][lang]) with st.spinner(MESSAGES["loading_text"][lang]): if lang == "mn": result = pipe(text)[0]['generated_text'] st.write(result) elif lang == "en": text = translator.translate(text, src='en', dest='mn').text result = pipe(text)[0]['generated_text'] result_en = translator.translate(result, src='mn', dest='en').text st.write(f"*Translated:* {result_en}") st.write(f"> *Original:* {result}") st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage') st.write("### WordCloud:") with st.spinner(MESSAGES["loading_text"][lang]): wordcloud = WordCloud(width = 300, height = 300, background_color ='white', min_font_size = 5).generate(result) st.image(wordcloud.to_array())