Spaces:

flax-community
/

Gpt2-bengali

Runtime error

App Files Files Community

Tahsin-Mayeesha commited on Jul 18, 2021

Commit

2d78164

•

1 Parent(s): c232f33

added streamlit and gradio app

Browse files

Files changed (4) hide show

app.py +140 -0
gradioapp.py +25 -0
prompts.py +8 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,140 @@

+""" Modified from https://huggingface.co/spaces/flax-community/gpt2-indonesian/tree/main """
+import json
+import requests
+from mtranslate import translate
+from prompts import PROMPT_LIST
+import streamlit as st
+import random
+description = """
+## Overview
+* **Overall Result:** So Fluent in Mongolian
+* **Data:** [mC4-bn](https://huggingface.co/datasets/mc4)
+* **Train Steps:** 250k steps
+* **Contributors:** M Saiful Bari,Khalid Saifullah,Ibrahim Musa, Tasmiah Tahsin Mayeesha, Ritobrata Ghosh
+* **link** [[🤗 huggingface](https://huggingface.co/flax-community/gpt2-bengali/)]
+"""
+headers = {}
+MODELS = {
+    "GPT-2 Bengali": {
+        "url": "https://api-inference.huggingface.co/models/flax-community/gpt2-bengali"
+    },
+    "GPT-2 Finetuned(On Bengali Songs)": {
+        "url": "https://api-inference.huggingface.co/models/khalidsaifullaah/bengali-lyricist-gpt2"
+    },
+}
+def query(payload, model_name):
+    data = json.dumps(payload)
+    print("model url:", MODELS[model_name]["url"])
+    response = requests.request("POST", MODELS[model_name]["url"], headers=headers, data=data)
+    return json.loads(response.content.decode("utf-8"))
+def process(text: str,
+            model_name: str,
+            max_len: int,
+            temp: float,
+            top_k: int,
+            top_p: float):
+    payload = {
+        "inputs": text,
+        "parameters": {
+            "max_new_tokens": max_len,
+            "top_k": top_k,
+            "top_p": top_p,
+            "temperature": temp,
+            "repetition_penalty": 2.0,
+        },
+        "options": {
+            "use_cache": True,
+        }
+    }
+    return query(payload, model_name)
+st.set_page_config(page_title="Bengali GPT-2 Demo")
+st.title("Bengali GPT-2")
+st.sidebar.subheader("Configurable parameters")
+max_len = st.sidebar.number_input(
+    "Maximum length",
+    value=30,
+    help="The maximum length of the sequence to be generated."
+)
+temp = st.sidebar.slider(
+    "Temperature",
+    value=1.0,
+    min_value=0.1,
+    max_value=100.0,
+    help="The value used to module the next token probabilities."
+)
+top_k = st.sidebar.number_input(
+    "Top k",
+    value=10,
+    help="The number of highest probability vocabulary tokens to keep for top-k-filtering."
+)
+top_p = st.sidebar.number_input(
+    "Top p",
+    value=0.95,
+    help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation."
+)
+do_sample = st.sidebar.selectbox('Sampling?', (True, False), help="Whether or not to use sampling; use greedy decoding otherwise.")
+st.markdown(
+    """Bengali GPT-2 demo. Part of the [Huggingface JAX/Flax event](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/). Also features a finetuned version on bengali song lyrics."""
+)
+st.write(description)
+model_name = st.selectbox('Model',(['GPT-2 Bengali', 'GPT-2 Finetuned(On Bengali Songs)']))
+ALL_PROMPTS = list(PROMPT_LIST.keys())+["Custom"]
+prompt = st.selectbox('Prompt', ALL_PROMPTS, index=len(ALL_PROMPTS)-1)
+if prompt == "Custom":
+    prompt_box = "Enter your text here"
+else:
+    prompt_box = random.choice(PROMPT_LIST[prompt])
+text = st.text_area("Enter text", prompt_box)
+if st.button("Run"):
+    with st.spinner(text="Getting results..."):
+        st.subheader("Result")
+        print(f"maxlen:{max_len}, temp:{temp}, top_k:{top_k}, top_p:{top_p}")
+        result = process(text=text,
+                         model_name=model_name,
+                         max_len=int(max_len),
+                         temp=temp,
+                         top_k=int(top_k),
+                         top_p=float(top_p))
+        print("result:", result)
+        if "error" in result:
+            if type(result["error"]) is str:
+                st.write(f'{result["error"]}.', end=" ")
+                if "estimated_time" in result:
+                    st.write(f'Please try it again in about {result["estimated_time"]:.0f} seconds')
+            else:
+                if type(result["error"]) is list:
+                    for error in result["error"]:
+                        st.write(f'{error}')
+        else:
+            result = result[0]["generated_text"]
+            st.write(result.replace("\n", "  \n"))
+            st.text("English translation")
+            st.write(translate(result, "en", "bn").replace("\n", "  \n"))

gradioapp.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import gradio as gr
+from gradio.mix import Parallel
+examples = [['আমার সোনার বাংলা'],['মনে পড়ে, রুবি রায়']]
+translator = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-bn-en",title="Translation")
+io1 = gr.Interface.load("huggingface/flax-community/gpt2-bengali",
+                 title="Bengali-GPT2")
+io2 = gr.Interface.load("huggingface/khalidsaifullaah/bengali-lyricist-gpt2",
+                       title = "Finetuned Bengali-GPT2(Song Lyrics)")
+iface = Parallel(translator,io1,io2,
+         title = "Bengali-gpt2 demo",
+         examples=examples,
+         layout='vertical',
+         description = "Features pretrained gpt2 bengali model along with finetuned version on song lyrics")
+if __name__ == "__main__":
+    iface.launch()

prompts.py ADDED Viewed

	@@ -0,0 +1,8 @@

+PROMPT_LIST = {
+    "Bangla-Gaan(music)": [
+        'আমার সোনার বাংলা','মনে পড়ে, রুবি রায়'
+    ],
+    "Wikipedia": [
+        "বাংলাদেশ দক্ষিণ এশিয়ার একটি সার্বভৌম রাষ্ট্র। বাংলাদেশের সাংবিধানিক নাম গণপ্রজাতন্ত্রী বাংলাদেশ।\n",
+    ]
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+requests==2.24.0
+requests-toolbelt==0.9.1
+mtranslate