bayartsogt commited on
Commit
3e5d9ea
1 Parent(s): a46edff

get rid of state + some intro

Browse files
Files changed (2) hide show
  1. app.py +3 -21
  2. enums.py +5 -2
app.py CHANGED
@@ -1,7 +1,5 @@
1
  import re
2
- import time
3
  import streamlit as st
4
- import pandas as pd
5
  from wordcloud import WordCloud
6
  from googletrans import Translator
7
  from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
@@ -11,9 +9,6 @@ from enums import MODEL_NAME, MESSAGES, DESCRIPTION
11
  def iso2lang(iso):
12
  return MESSAGES["iso"][iso]
13
 
14
- def create_df_from_io(input, output):
15
- return pd.DataFrame([[input, output, time.time()]], columns=["input", "output", "timestamp"])
16
-
17
  def simple_clean(text):
18
  return re.sub('[!@#$.,\n-?]', ' ', text.lower())
19
 
@@ -33,8 +28,6 @@ st.write(DESCRIPTION)
33
 
34
  lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang)
35
  translator = Translator()
36
- if "df" not in st.session_state:
37
- st.session_state.df = pd.DataFrame(columns=["input", "output", "timestamp"])
38
 
39
  with st.spinner(MESSAGES["loading_text"][lang]):
40
  pipe = load_pipe()
@@ -55,21 +48,10 @@ with st.spinner(MESSAGES["loading_text"][lang]):
55
  st.write(f"> *Original:* {result}")
56
  st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage')
57
 
58
- st.session_state.df = st.session_state.df.append(create_df_from_io(text, result))
59
-
60
-
61
- st.write("### WordCloud based on previous outputs")
62
  with st.spinner(MESSAGES["loading_text"][lang]):
63
- wordcloud_input = ""
64
- for text in st.session_state.df.output.tolist():
65
- wordcloud_input += simple_clean(text)
66
-
67
- wordcloud = WordCloud(width = 800, height = 800,
68
  background_color ='white',
69
- min_font_size = 10).generate(wordcloud_input)
70
 
71
  st.image(wordcloud.to_array())
72
-
73
- st.write("### Түүх / History")
74
- with st.spinner(MESSAGES["loading_text"][lang]):
75
- st.table(st.session_state.df.sort_values(by="timestamp", ascending=False))
1
  import re
 
2
  import streamlit as st
 
3
  from wordcloud import WordCloud
4
  from googletrans import Translator
5
  from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
9
  def iso2lang(iso):
10
  return MESSAGES["iso"][iso]
11
 
 
 
 
12
  def simple_clean(text):
13
  return re.sub('[!@#$.,\n-?]', ' ', text.lower())
14
 
28
 
29
  lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang)
30
  translator = Translator()
 
 
31
 
32
  with st.spinner(MESSAGES["loading_text"][lang]):
33
  pipe = load_pipe()
48
  st.write(f"> *Original:* {result}")
49
  st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage')
50
 
51
+ st.write("### WordCloud:")
 
 
 
52
  with st.spinner(MESSAGES["loading_text"][lang]):
53
+ wordcloud = WordCloud(width = 300, height = 300,
 
 
 
 
54
  background_color ='white',
55
+ min_font_size = 5).generate(result)
56
 
57
  st.image(wordcloud.to_array())
 
 
 
 
enums.py CHANGED
@@ -18,13 +18,16 @@ MESSAGES = {
18
  },
19
  "iso": {
20
  'mn': 'Монгол / Mongolian',
21
- 'en': 'Англи / English'
22
  }
23
  }
24
 
25
  DESCRIPTION = """
26
- ## Mongolian GPT2
27
  * **Goal:** To create GPT2 model that is able write text in Mongolian during [HuggingFace Community Week #2](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104).
 
 
 
28
  * **Discussion:** https://discuss.huggingface.co/t/pretrain-gpt-2-from-scratch-in-mongolian/7879
29
  * **Creator:** Bayartsogt Yadamsuren
30
  [[✉️ email](mailto:bayartsogt.yadamsuren@gmail.com)]
18
  },
19
  "iso": {
20
  'mn': 'Монгол / Mongolian',
21
+ 'en': 'Англи / English (with translation)'
22
  }
23
  }
24
 
25
  DESCRIPTION = """
26
+ ## Mongolian GPT2 🇲🇳
27
  * **Goal:** To create GPT2 model that is able write text in Mongolian during [HuggingFace Community Week #2](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104).
28
+ * **Overall Result:** So Fluent in Mongolian
29
+ * **Data:** OSCAR (2GB) + Mongolian News Dataset (6GB)
30
+ * **Train Steps:** 50k steps
31
  * **Discussion:** https://discuss.huggingface.co/t/pretrain-gpt-2-from-scratch-in-mongolian/7879
32
  * **Creator:** Bayartsogt Yadamsuren
33
  [[✉️ email](mailto:bayartsogt.yadamsuren@gmail.com)]