Spaces:

flax-community
/

Mongolian-GPT2

Runtime error

App Files Files Community

bayartsogt commited on Jul 18, 2021

Commit

3e5d9ea

•

1 Parent(s): a46edff

get rid of state + some intro

Browse files

Files changed (2) hide show

app.py +3 -21
enums.py +5 -2

app.py CHANGED Viewed

@@ -1,7 +1,5 @@
 import re
-import time
 import streamlit as st
-import pandas as pd
 from wordcloud import WordCloud
 from googletrans import Translator
 from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
@@ -11,9 +9,6 @@ from enums import MODEL_NAME, MESSAGES, DESCRIPTION
 def iso2lang(iso):
     return MESSAGES["iso"][iso]
-def create_df_from_io(input, output):
-    return pd.DataFrame([[input, output, time.time()]], columns=["input", "output", "timestamp"])
 def simple_clean(text):
     return re.sub('[!@#$.,\n-?]', ' ', text.lower())
@@ -33,8 +28,6 @@ st.write(DESCRIPTION)
 lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang)
 translator = Translator()
-if "df" not in st.session_state:
-    st.session_state.df = pd.DataFrame(columns=["input", "output", "timestamp"])
 with st.spinner(MESSAGES["loading_text"][lang]):
     pipe = load_pipe()
@@ -55,21 +48,10 @@ with st.spinner(MESSAGES["loading_text"][lang]):
         st.write(f"> *Original:* {result}")
         st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage')
-    st.session_state.df = st.session_state.df.append(create_df_from_io(text, result))
-st.write("### WordCloud based on previous outputs")
 with st.spinner(MESSAGES["loading_text"][lang]):
-    wordcloud_input = ""
-    for text in st.session_state.df.output.tolist():
-        wordcloud_input += simple_clean(text)
-    wordcloud = WordCloud(width = 800, height = 800,
                 background_color ='white',
-                min_font_size = 10).generate(wordcloud_input)
     st.image(wordcloud.to_array())
-st.write("### Түүх / History")
-with st.spinner(MESSAGES["loading_text"][lang]):
-    st.table(st.session_state.df.sort_values(by="timestamp", ascending=False))

 import re
 import streamlit as st
 from wordcloud import WordCloud
 from googletrans import Translator
 from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
 def iso2lang(iso):
     return MESSAGES["iso"][iso]
 def simple_clean(text):
     return re.sub('[!@#$.,\n-?]', ' ', text.lower())
 lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang)
 translator = Translator()
 with st.spinner(MESSAGES["loading_text"][lang]):
     pipe = load_pipe()
         st.write(f"> *Original:* {result}")
         st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage')
+st.write("### WordCloud:")
 with st.spinner(MESSAGES["loading_text"][lang]):
+    wordcloud = WordCloud(width = 300, height = 300,
                 background_color ='white',
+                min_font_size = 5).generate(result)
     st.image(wordcloud.to_array())

enums.py CHANGED Viewed

@@ -18,13 +18,16 @@ MESSAGES = {
     },
     "iso": {
         'mn': 'Монгол / Mongolian',
-        'en': 'Англи / English'
     }
 }
 DESCRIPTION = """
-## Mongolian GPT2
 * **Goal:** To create GPT2 model that is able write text in Mongolian during [HuggingFace Community Week #2](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104).
 * **Discussion:** https://discuss.huggingface.co/t/pretrain-gpt-2-from-scratch-in-mongolian/7879
 * **Creator:** Bayartsogt Yadamsuren
 [[✉️ email](mailto:bayartsogt.yadamsuren@gmail.com)]

     },
     "iso": {
         'mn': 'Монгол / Mongolian',
+        'en': 'Англи / English (with translation)'
     }
 }
 DESCRIPTION = """
+## Mongolian GPT2 🇲🇳
 * **Goal:** To create GPT2 model that is able write text in Mongolian during [HuggingFace Community Week #2](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104).
+* **Overall Result:** So Fluent in Mongolian
+* **Data:** OSCAR (2GB) + Mongolian News Dataset (6GB)
+* **Train Steps:** 50k steps
 * **Discussion:** https://discuss.huggingface.co/t/pretrain-gpt-2-from-scratch-in-mongolian/7879
 * **Creator:** Bayartsogt Yadamsuren
 [[✉️ email](mailto:bayartsogt.yadamsuren@gmail.com)]