cahya commited on
Commit
49ded92
1 Parent(s): 2744cb8

add the original indonesians gpt-2 models. update info about the app

Browse files
Files changed (2) hide show
  1. app/app.py +50 -10
  2. app/prompts.py +21 -0
app/app.py CHANGED
@@ -13,28 +13,40 @@ import pathlib
13
  # st.set_page_config(page_title="Indonesian GPT-2")
14
 
15
  MODELS = {
 
 
 
 
 
 
 
 
 
 
 
 
16
  "Indonesian Literature - GPT-2 Small": {
17
  "group": "Indonesian Literature",
18
  "name": "cahya/gpt2-small-indonesian-story",
19
- "description": "Indonesian Literature Generator using fine-tuned small GPT-2 model",
20
  "text_generator": None
21
  },
22
  "Indonesian Literature - GPT-2 Medium": {
23
  "group": "Indonesian Literature",
24
  "name": "cahya/gpt2-medium-indonesian-story",
25
- "description": "Indonesian Literature Generator using fine-tuned medium GPT-2 model",
26
  "text_generator": None
27
  },
28
  "Indonesian Academic Journal - GPT-2 Small": {
29
  "group": "Indonesian Journal",
30
  "name": "Galuh/id-journal-gpt2",
31
- "description": "Indonesian Journal Generator using fine-tuned small GPT-2 model",
32
  "text_generator": None
33
  },
34
  "Indonesian Persona Chatbot - GPT-2 Small": {
35
  "group": "Indonesian Persona Chatbot",
36
  "name": "cahya/gpt2-small-indonesian-personachat",
37
- "description": "Indonesian Persona Chatbot using fine-tuned small GPT-2 model",
38
  "text_generator": None
39
  },
40
  }
@@ -52,6 +64,31 @@ def stc_chatbot(root_dir, width=700, height=900):
52
  html = html.replace('<script src="js/main.js"></script>', "<script>\n" + js + "\n</script>")
53
  stc.html(html, width=width, height=height, scrolling=True)
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  model = st.sidebar.selectbox('Model', (MODELS.keys()))
57
 
@@ -77,12 +114,12 @@ def process(text_generator, text: str, max_length: int = 100, do_sample: bool =
77
 
78
  st.title("Indonesian GPT-2 Applications")
79
  prompt_group_name = MODELS[model]["group"]
80
- st.subheader(prompt_group_name)
81
  description = f"This application is a demo for {MODELS[model]['description']}"
82
  st.markdown(description)
83
  model_name = f"Model name: [{MODELS[model]['name']}](https://huggingface.co/{MODELS[model]['name']})"
84
  st.markdown(model_name)
85
- if prompt_group_name in ["Indonesian Literature", "Indonesian Journal"]:
86
  session_state = SessionState.get(prompt=None, prompt_box=None, text=None)
87
  ALL_PROMPTS = list(PROMPT_LIST[prompt_group_name].keys())+["Custom"]
88
 
@@ -128,17 +165,20 @@ if prompt_group_name in ["Indonesian Literature", "Indonesian Journal"]:
128
  value=True
129
  )
130
 
131
- top_k = 40
132
  top_p = 0.95
133
 
134
  if do_sample:
135
  top_k = st.sidebar.number_input(
136
  "Top k",
137
- value=top_k
 
138
  )
139
  top_p = st.sidebar.number_input(
140
  "Top p",
141
- value=top_p
 
 
142
  )
143
 
144
  seed = st.sidebar.number_input(
@@ -148,7 +188,7 @@ if prompt_group_name in ["Indonesian Literature", "Indonesian Journal"]:
148
  )
149
 
150
  for group_name in MODELS:
151
- if MODELS[group_name]["group"] in ["Indonesian Literature", "Indonesian Journal"]:
152
  MODELS[group_name]["text_generator"] = get_generator(MODELS[group_name]["name"])
153
  # text_generator = get_generator()
154
  if st.button("Run"):
 
13
  # st.set_page_config(page_title="Indonesian GPT-2")
14
 
15
  MODELS = {
16
+ "Indonesian GPT-2 Small": {
17
+ "group": "Indonesian GPT-2",
18
+ "name": "flax-community/gpt2-small-indonesian",
19
+ "description": "The original Indonesian small GPT-2 model.",
20
+ "text_generator": None
21
+ },
22
+ "Indonesian GPT-2 Medium": {
23
+ "group": "Indonesian GPT-2",
24
+ "name": "flax-community/gpt2-medium-indonesian",
25
+ "description": "The original Indonesian medium GPT-2 model.",
26
+ "text_generator": None
27
+ },
28
  "Indonesian Literature - GPT-2 Small": {
29
  "group": "Indonesian Literature",
30
  "name": "cahya/gpt2-small-indonesian-story",
31
+ "description": "The Indonesian Literature Generator using fine-tuned small GPT-2 model.",
32
  "text_generator": None
33
  },
34
  "Indonesian Literature - GPT-2 Medium": {
35
  "group": "Indonesian Literature",
36
  "name": "cahya/gpt2-medium-indonesian-story",
37
+ "description": "The Indonesian Literature Generator using fine-tuned medium GPT-2 model.",
38
  "text_generator": None
39
  },
40
  "Indonesian Academic Journal - GPT-2 Small": {
41
  "group": "Indonesian Journal",
42
  "name": "Galuh/id-journal-gpt2",
43
+ "description": "The Indonesian Journal Generator using fine-tuned small GPT-2 model.",
44
  "text_generator": None
45
  },
46
  "Indonesian Persona Chatbot - GPT-2 Small": {
47
  "group": "Indonesian Persona Chatbot",
48
  "name": "cahya/gpt2-small-indonesian-personachat",
49
+ "description": "The Indonesian Persona Chatbot using fine-tuned small GPT-2 model.",
50
  "text_generator": None
51
  },
52
  }
 
64
  html = html.replace('<script src="js/main.js"></script>', "<script>\n" + js + "\n</script>")
65
  stc.html(html, width=width, height=height, scrolling=True)
66
 
67
+ st.sidebar.markdown("""
68
+ <style>
69
+ .aligncenter {
70
+ text-align: center;
71
+ }
72
+ </style>
73
+ <p class="aligncenter">
74
+ <img src="https://huggingface.co/spaces/flax-community/gpt2-indonesian/resolve/main/huggingwayang.png"/>
75
+ </p>
76
+ """, unsafe_allow_html=True)
77
+ st.sidebar.markdown("""
78
+ ___
79
+ <p style='text-align: center'>
80
+ This is a collection of Applications that generates sentences using Indonesian GPT-2 models!
81
+ </p>
82
+ <p style='text-align: center'>
83
+ Created by <a href="https://huggingface.co/indonesian-nlp">Indonesian NLP</a> team @2021
84
+ <br/>
85
+ <a href="https://github.com/indonesian-nlp/gpt2-app" target="_blank">GitHub</a> | <a href="https://github.com/indonesian-nlp/gpt2-app" target="_blank">Project Report</a>
86
+ </p>
87
+ """, unsafe_allow_html=True)
88
+
89
+ st.sidebar.markdown("""
90
+ ___
91
+ """, unsafe_allow_html=True)
92
 
93
  model = st.sidebar.selectbox('Model', (MODELS.keys()))
94
 
 
114
 
115
  st.title("Indonesian GPT-2 Applications")
116
  prompt_group_name = MODELS[model]["group"]
117
+ st.header(prompt_group_name)
118
  description = f"This application is a demo for {MODELS[model]['description']}"
119
  st.markdown(description)
120
  model_name = f"Model name: [{MODELS[model]['name']}](https://huggingface.co/{MODELS[model]['name']})"
121
  st.markdown(model_name)
122
+ if prompt_group_name in ["Indonesian GPT-2", "Indonesian Literature", "Indonesian Journal"]:
123
  session_state = SessionState.get(prompt=None, prompt_box=None, text=None)
124
  ALL_PROMPTS = list(PROMPT_LIST[prompt_group_name].keys())+["Custom"]
125
 
 
165
  value=True
166
  )
167
 
168
+ top_k = 30
169
  top_p = 0.95
170
 
171
  if do_sample:
172
  top_k = st.sidebar.number_input(
173
  "Top k",
174
+ value=top_k,
175
+ help="The number of highest probability vocabulary tokens to keep for top-k-filtering."
176
  )
177
  top_p = st.sidebar.number_input(
178
  "Top p",
179
+ value=top_p,
180
+ help="If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher "
181
+ "are kept for generation."
182
  )
183
 
184
  seed = st.sidebar.number_input(
 
188
  )
189
 
190
  for group_name in MODELS:
191
+ if MODELS[group_name]["group"] in ["Indonesian GPT-2", "Indonesian Literature", "Indonesian Journal"]:
192
  MODELS[group_name]["text_generator"] = get_generator(MODELS[group_name]["name"])
193
  # text_generator = get_generator()
194
  if st.button("Run"):
app/prompts.py CHANGED
@@ -1,4 +1,25 @@
1
  PROMPT_LIST = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "Indonesian Literature": {
3
  "Adult Romance": [
4
  "Ini adalah kisah tentang seorang laki-laki yang berusaha memperjuangkan cintanya",
 
1
  PROMPT_LIST = {
2
+ "Indonesian GPT-2": {
3
+ "Resep masakan (recipe)": [
4
+ "Berikut adalah cara memasak sate ayam:\n",
5
+ "Langkah-langkah membuat nasi goreng:\n",
6
+ "Berikut adalah bahan-bahan membuat nastar:\n"
7
+ ],
8
+ "Puisi (poetry)": [
9
+ "Aku ingin jadi merpati\nTerbang di langit yang damai\nBernyanyi-nyanyi tentang masa depan\n",
10
+ "Terdiam aku satu persatu dengan tatapan binar\nSenyawa merasuk dalam sukma membuat lara\nKefanaan membentuk kelemahan"
11
+ ],
12
+ "Cerpen (short story)": [
13
+ "Putri memakai sepatunya dengan malas. Kalau bisa, selama seminggu ini ia bolos sekolah saja. Namun, Mama pasti akan marah. Ulangan tengah semester telah selesai. Minggu ini, di sekolah sedang berlangsung pekan olahraga.",
14
+ "\"Wah, hari ini cerah sekali ya,\" ucap Budi ketika ia keluar rumah.",
15
+ "Sewindu sudah kita tak berjumpa, rinduku padamu sudah tak terkira."
16
+ ],
17
+ "Sejarah (history)": [
18
+ "Mohammad Natsir adalah seorang ulama, politisi, dan pejuang kemerdekaan Indonesia.",
19
+ "Ir. H. Soekarno adalah Presiden pertama Republik Indonesia. Ia adalah seorang tokoh perjuangan yang memainkan peranan penting dalam memerdekakan bangsa Indonesia",
20
+ "Borobudur adalah sebuah candi Buddha yang terletak di sebelah barat laut Yogyakarta. Monumen ini merupakan model alam semesta dan dibangun sebagai tempat suci untuk memuliakan Buddha"
21
+ ],
22
+ },
23
  "Indonesian Literature": {
24
  "Adult Romance": [
25
  "Ini adalah kisah tentang seorang laki-laki yang berusaha memperjuangkan cintanya",