Tahsin-Mayeesha commited on
Commit
2d78164
1 Parent(s): c232f33

added streamlit and gradio app

Browse files
Files changed (4) hide show
  1. app.py +140 -0
  2. gradioapp.py +25 -0
  3. prompts.py +8 -0
  4. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Modified from https://huggingface.co/spaces/flax-community/gpt2-indonesian/tree/main """
2
+
3
+
4
+ import json
5
+ import requests
6
+ from mtranslate import translate
7
+ from prompts import PROMPT_LIST
8
+ import streamlit as st
9
+ import random
10
+
11
+
12
+
13
+ description = """
14
+ ## Overview
15
+ * **Overall Result:** So Fluent in Mongolian
16
+ * **Data:** [mC4-bn](https://huggingface.co/datasets/mc4)
17
+ * **Train Steps:** 250k steps
18
+ * **Contributors:** M Saiful Bari,Khalid Saifullah,Ibrahim Musa, Tasmiah Tahsin Mayeesha, Ritobrata Ghosh
19
+ * **link** [[🤗 huggingface](https://huggingface.co/flax-community/gpt2-bengali/)]
20
+ """
21
+
22
+
23
+ headers = {}
24
+ MODELS = {
25
+ "GPT-2 Bengali": {
26
+ "url": "https://api-inference.huggingface.co/models/flax-community/gpt2-bengali"
27
+ },
28
+ "GPT-2 Finetuned(On Bengali Songs)": {
29
+ "url": "https://api-inference.huggingface.co/models/khalidsaifullaah/bengali-lyricist-gpt2"
30
+ },
31
+ }
32
+
33
+
34
+ def query(payload, model_name):
35
+ data = json.dumps(payload)
36
+ print("model url:", MODELS[model_name]["url"])
37
+ response = requests.request("POST", MODELS[model_name]["url"], headers=headers, data=data)
38
+ return json.loads(response.content.decode("utf-8"))
39
+
40
+
41
+ def process(text: str,
42
+ model_name: str,
43
+ max_len: int,
44
+ temp: float,
45
+ top_k: int,
46
+ top_p: float):
47
+
48
+ payload = {
49
+ "inputs": text,
50
+ "parameters": {
51
+ "max_new_tokens": max_len,
52
+ "top_k": top_k,
53
+ "top_p": top_p,
54
+ "temperature": temp,
55
+ "repetition_penalty": 2.0,
56
+ },
57
+ "options": {
58
+ "use_cache": True,
59
+ }
60
+ }
61
+ return query(payload, model_name)
62
+
63
+ st.set_page_config(page_title="Bengali GPT-2 Demo")
64
+
65
+ st.title("Bengali GPT-2")
66
+
67
+ st.sidebar.subheader("Configurable parameters")
68
+
69
+ max_len = st.sidebar.number_input(
70
+ "Maximum length",
71
+ value=30,
72
+ help="The maximum length of the sequence to be generated."
73
+ )
74
+
75
+ temp = st.sidebar.slider(
76
+ "Temperature",
77
+ value=1.0,
78
+ min_value=0.1,
79
+ max_value=100.0,
80
+ help="The value used to module the next token probabilities."
81
+ )
82
+
83
+ top_k = st.sidebar.number_input(
84
+ "Top k",
85
+ value=10,
86
+ help="The number of highest probability vocabulary tokens to keep for top-k-filtering."
87
+ )
88
+
89
+ top_p = st.sidebar.number_input(
90
+ "Top p",
91
+ value=0.95,
92
+ help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation."
93
+ )
94
+
95
+ do_sample = st.sidebar.selectbox('Sampling?', (True, False), help="Whether or not to use sampling; use greedy decoding otherwise.")
96
+
97
+ st.markdown(
98
+ """Bengali GPT-2 demo. Part of the [Huggingface JAX/Flax event](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/). Also features a finetuned version on bengali song lyrics."""
99
+ )
100
+
101
+ st.write(description)
102
+
103
+ model_name = st.selectbox('Model',(['GPT-2 Bengali', 'GPT-2 Finetuned(On Bengali Songs)']))
104
+
105
+ ALL_PROMPTS = list(PROMPT_LIST.keys())+["Custom"]
106
+ prompt = st.selectbox('Prompt', ALL_PROMPTS, index=len(ALL_PROMPTS)-1)
107
+
108
+ if prompt == "Custom":
109
+ prompt_box = "Enter your text here"
110
+ else:
111
+ prompt_box = random.choice(PROMPT_LIST[prompt])
112
+
113
+ text = st.text_area("Enter text", prompt_box)
114
+
115
+ if st.button("Run"):
116
+ with st.spinner(text="Getting results..."):
117
+ st.subheader("Result")
118
+ print(f"maxlen:{max_len}, temp:{temp}, top_k:{top_k}, top_p:{top_p}")
119
+ result = process(text=text,
120
+ model_name=model_name,
121
+ max_len=int(max_len),
122
+ temp=temp,
123
+ top_k=int(top_k),
124
+ top_p=float(top_p))
125
+
126
+ print("result:", result)
127
+ if "error" in result:
128
+ if type(result["error"]) is str:
129
+ st.write(f'{result["error"]}.', end=" ")
130
+ if "estimated_time" in result:
131
+ st.write(f'Please try it again in about {result["estimated_time"]:.0f} seconds')
132
+ else:
133
+ if type(result["error"]) is list:
134
+ for error in result["error"]:
135
+ st.write(f'{error}')
136
+ else:
137
+ result = result[0]["generated_text"]
138
+ st.write(result.replace("\n", " \n"))
139
+ st.text("English translation")
140
+ st.write(translate(result, "en", "bn").replace("\n", " \n"))
gradioapp.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio.mix import Parallel
3
+
4
+ examples = [['আমার সোনার বাংলা'],['মনে পড়ে, রুবি রায়']]
5
+
6
+ translator = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-bn-en",title="Translation")
7
+
8
+
9
+ io1 = gr.Interface.load("huggingface/flax-community/gpt2-bengali",
10
+ title="Bengali-GPT2")
11
+
12
+
13
+ io2 = gr.Interface.load("huggingface/khalidsaifullaah/bengali-lyricist-gpt2",
14
+ title = "Finetuned Bengali-GPT2(Song Lyrics)")
15
+
16
+
17
+
18
+ iface = Parallel(translator,io1,io2,
19
+ title = "Bengali-gpt2 demo",
20
+ examples=examples,
21
+ layout='vertical',
22
+ description = "Features pretrained gpt2 bengali model along with finetuned version on song lyrics")
23
+
24
+ if __name__ == "__main__":
25
+ iface.launch()
prompts.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ PROMPT_LIST = {
2
+ "Bangla-Gaan(music)": [
3
+ 'আমার সোনার বাংলা','মনে পড়ে, রুবি রায়'
4
+ ],
5
+ "Wikipedia": [
6
+ "বাংলাদেশ দক্ষিণ এশিয়ার একটি সার্বভৌম রাষ্ট্র। বাংলাদেশের সাংবিধানিক নাম গণপ্রজাতন্ত্রী বাংলাদেশ।\n",
7
+ ]
8
+ }
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ requests==2.24.0
3
+ requests-toolbelt==0.9.1
4
+ mtranslate