Mongolian-GPT2 / app.py
bayartsogt's picture
get rid of state + some intro
3e5d9ea
import re
import streamlit as st
from wordcloud import WordCloud
from googletrans import Translator
from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
from enums import MODEL_NAME, MESSAGES, DESCRIPTION
def iso2lang(iso):
return MESSAGES["iso"][iso]
def simple_clean(text):
return re.sub('[!@#$.,\n-?]', ' ', text.lower())
def load_tokenizer():
return AutoTokenizer.from_pretrained(MODEL_NAME)
@st.cache(allow_output_mutation=True)
def load_model():
return AutoModelWithLMHead.from_pretrained(MODEL_NAME)
def load_pipe():
model = load_model()
tokenizer = load_tokenizer()
return pipeline("text-generation", model=model, tokenizer=tokenizer)
# ---------------------------------------------------------------------- #
st.write(DESCRIPTION)
lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang)
translator = Translator()
with st.spinner(MESSAGES["loading_text"][lang]):
pipe = load_pipe()
st.success(MESSAGES["success_model_load"][lang])
text = st.text_input(
MESSAGES["input_description"][lang], MESSAGES["input_default"][lang])
with st.spinner(MESSAGES["loading_text"][lang]):
if lang == "mn":
result = pipe(text)[0]['generated_text']
st.write(result)
elif lang == "en":
text = translator.translate(text, src='en', dest='mn').text
result = pipe(text)[0]['generated_text']
result_en = translator.translate(result, src='mn', dest='en').text
st.write(f"*Translated:* {result_en}")
st.write(f"> *Original:* {result}")
st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage')
st.write("### WordCloud:")
with st.spinner(MESSAGES["loading_text"][lang]):
wordcloud = WordCloud(width = 300, height = 300,
background_color ='white',
min_font_size = 5).generate(result)
st.image(wordcloud.to_array())