bayartsogt commited on
Commit
84a03d5
1 Parent(s): 033ed17
Files changed (3) hide show
  1. app.py +46 -0
  2. enums.py +33 -0
  3. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import streamlit as st
3
+ from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
4
+ from googletrans import Translator
5
+
6
+ from enums import MODEL_NAME, MESSAGES, DESCRIPTION
7
+
8
+ def iso2lang(iso):
9
+ return MESSAGES["iso"][iso]
10
+
11
+ def load_tokenizer():
12
+ return AutoTokenizer.from_pretrained(MODEL_NAME)
13
+
14
+ @st.cache(allow_output_mutation=True)
15
+ def load_model():
16
+ return AutoModelWithLMHead.from_pretrained(MODEL_NAME)
17
+
18
+ def load_pipe():
19
+ model = load_model()
20
+ tokenizer = load_tokenizer()
21
+ return pipeline("text-generation", model=model, tokenizer=tokenizer)
22
+
23
+ st.write(DESCRIPTION)
24
+
25
+ lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang)
26
+ translator = Translator()
27
+
28
+ with st.spinner(MESSAGES["loading_text"][lang]):
29
+ pipe = load_pipe()
30
+ st.success(MESSAGES["success_model_load"][lang])
31
+
32
+ text = st.text_area(
33
+ MESSAGES["input_description"][lang], MESSAGES["input_default"][lang])
34
+
35
+ with st.spinner(MESSAGES["loading_text"][lang]):
36
+ if lang == "mn":
37
+ result = pipe(text)[0]['generated_text']
38
+ st.write(result)
39
+ elif lang == "en":
40
+ text = translator.translate(text, src='en', dest='mn').text
41
+ result_mn = pipe(text)[0]['generated_text']
42
+ result_en = translator.translate(result_mn, src='mn', dest='en').text
43
+ st.write(f"*Translated:* {result_en}")
44
+ st.write(f"> *Original:* {result_mn}")
45
+
46
+ st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage')
enums.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL_NAME = "bayartsogt/mongolian-gpt2"
2
+ MESSAGES = {
3
+ "success_model_load": {
4
+ "mn": "Моделийг амжилттай уншлаа!!",
5
+ "en": "Model Loaded!!"
6
+ },
7
+ "loading_text": {
8
+ "mn": "Уншиж байна...",
9
+ "en": "Loading..."
10
+ },
11
+ "input_description": {
12
+ "mn": "Эхлэл хэсэг:",
13
+ "en": "Prompt:"
14
+ },
15
+ "input_default": {
16
+ "mn": "Хүний амьдрал гэдэг",
17
+ "en": "Life is"
18
+ },
19
+ "iso": {
20
+ 'mn': 'Монгол / Mongolian',
21
+ 'en': 'Англи / English'
22
+ }
23
+ }
24
+
25
+ DESCRIPTION = """
26
+ ## Mongolian GPT2
27
+ * **Goal:** To create GPT2 model that is able write text in Mongolian during [HuggingFace Community Week #2](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104).
28
+ * **Discussion:** https://discuss.huggingface.co/t/pretrain-gpt-2-from-scratch-in-mongolian/7879
29
+ * **Creator:** Bayartsogt Yadamsuren
30
+ [[✉️ email](mailto:bayartsogt.yadamsuren@gmail.com)]
31
+ [[🤗 huggingface](https://huggingface.co/bayartsogt)]
32
+ [[🤖 github](https://github.com/bayartsogt-ya)]
33
+ """
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers
2
+ streamlit
3
+ googletrans==3.1.0a