Spaces:

flax-community
/

Gpt2-bengali

Runtime error

App Files Files Community

Gpt2-bengali / app.py

Tahsin-Mayeesha

Update contributor list order

89a4d48 almost 3 years ago

raw history blame contribute delete

No virus

4.41 kB

	""" Modified from https://huggingface.co/spaces/flax-community/gpt2-indonesian/tree/main """


	import json
	import requests
	from mtranslate import translate
	from prompts import PROMPT_LIST
	import streamlit as st
	import random



	description = """
	## Overview
	* Overall Result: Eval loss : 1.45, Eval Perplexity : 3.141
	* Data: [mC4-bn](https://huggingface.co/datasets/mc4)
	* Train Steps: 250k steps
	* Contributors: Khalid Saifullah, Tasmiah Tahsin Mayeesha, Ritobrata Ghosh, Ibrahim Musa, M Saiful Bari
	* link [🤗 flax-community/gpt2-bengali](https://huggingface.co/flax-community/gpt2-bengali/)
	"""


	headers = {}
	MODELS = {
	"GPT-2 Bengali": {
	"url": "https://api-inference.huggingface.co/models/flax-community/gpt2-bengali"
	},
	"GPT-2 Finetuned(On Bengali Songs)": {
	"url": "https://api-inference.huggingface.co/models/khalidsaifullaah/bengali-lyricist-gpt2"
	},
	}


	def query(payload, model_name):
	data = json.dumps(payload)
	print("model url:", MODELS[model_name]["url"])
	response = requests.request("POST", MODELS[model_name]["url"], headers=headers, data=data)
	return json.loads(response.content.decode("utf-8"))


	def process(text: str,
	model_name: str,
	max_len: int,
	temp: float,
	top_k: int,
	top_p: float):

	payload = {
	"inputs": text,
	"parameters": {
	"max_new_tokens": max_len,
	"top_k": top_k,
	"top_p": top_p,
	"temperature": temp,
	"repetition_penalty": 2.0,
	},
	"options": {
	"use_cache": True,
	}
	}
	return query(payload, model_name)

	st.set_page_config(page_title="Bengali GPT-2 Demo")

	st.title("Bengali GPT-2")

	st.sidebar.subheader("Configurable parameters")

	max_len = st.sidebar.number_input(
	"Maximum length",
	value=30,
	help="The maximum length of the sequence to be generated."
	)

	temp = st.sidebar.slider(
	"Temperature",
	value=1.0,
	min_value=0.1,
	max_value=100.0,
	help="The value used to module the next token probabilities."
	)

	top_k = st.sidebar.number_input(
	"Top k",
	value=10,
	help="The number of highest probability vocabulary tokens to keep for top-k-filtering."
	)

	top_p = st.sidebar.number_input(
	"Top p",
	value=0.95,
	help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation."
	)

	do_sample = st.sidebar.selectbox('Sampling?', (True, False), help="Whether or not to use sampling; use greedy decoding otherwise.")

	st.markdown(
	"""Bengali GPT-2 demo. Part of the [Huggingface JAX/Flax event](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/). Also features a finetuned version on bengali song lyrics."""
	)

	st.write(description)

	model_name = st.selectbox('Model',(['GPT-2 Bengali', 'GPT-2 Finetuned(On Bengali Songs)']))

	ALL_PROMPTS = list(PROMPT_LIST.keys())+["Custom"]
	prompt = st.selectbox('Prompt', ALL_PROMPTS, index=len(ALL_PROMPTS)-1)

	if prompt == "Custom":
	prompt_box = "Enter your text here"
	else:
	prompt_box = random.choice(PROMPT_LIST[prompt])

	text = st.text_area("Enter text", prompt_box)

	if st.button("Run"):
	with st.spinner(text="Getting results..."):
	st.subheader("Result")
	print(f"maxlen:{max_len}, temp:{temp}, top_k:{top_k}, top_p:{top_p}")
	result = process(text=text,
	model_name=model_name,
	max_len=int(max_len),
	temp=temp,
	top_k=int(top_k),
	top_p=float(top_p))

	print("result:", result)
	if "error" in result:
	if type(result["error"]) is str:
	st.write(f'{result["error"]}.', end=" ")
	if "estimated_time" in result:
	st.write(f'Please try it again in about {result["estimated_time"]:.0f} seconds')
	else:
	if type(result["error"]) is list:
	for error in result["error"]:
	st.write(f'{error}')
	else:
	result = result[0]["generated_text"]
	st.write(result.replace("\
	", " \
	"))
	st.text("English translation")
	st.write(translate(result, "en", "bn").replace("\
	", " \
	"))