Spaces:

ysharma
/

Zephyr-Playground

Runtime error

App Files Files Community

Zephyr-Playground / app.py

ysharma HF Staff

Update app.py

29a138d about 2 years ago

raw

history blame contribute delete

8.66 kB

	import gradio as gr
	import os
	import json
	import requests


	HF_TOKEN = os.getenv('HF_TOKEN')
	HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}

	zephyr_7b_beta = os.getenv('zephyr_7b_beta')
	zephyr_7b_alpha = os.getenv('zephyr_7b_alpha')


	def build_input_prompt(message, chatbot):
	"""
	Constructs the input prompt string from the chatbot interactions and the current message.
	"""
	input_prompt = "<\|system\|>\n</s>\n<\|user\|>\n"
	for interaction in chatbot:
	input_prompt = input_prompt + str(interaction[0]) + "</s>\n<\|assistant\|>\n" + str(interaction[1]) + "\n</s>\n<\|user\|>\n"

	input_prompt = input_prompt + str(message) + "</s>\n<\|assistant\|>"
	return input_prompt


	def post_request_beta(payload):
	"""
	Sends a POST request to the predefined Zephyr-7b-Beta URL and returns the JSON response.
	"""
	response = requests.post(zephyr_7b_beta, headers=HEADERS, json=payload)
	response.raise_for_status() # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
	return response.json()


	def post_request_alpha(payload):
	"""
	Sends a POST request to the predefined Zephyr-7b-Alpha URL and returns the JSON response.
	"""
	response = requests.post(zephyr_7b_alpha, headers=HEADERS, json=payload)
	response.raise_for_status() # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
	return response.json()


	def predict_beta(message, chatbot=[], temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0):
	temperature = float(temperature)
	top_p = float(top_p)

	input_prompt = build_input_prompt(message, chatbot)

	data = {
	"inputs": input_prompt,
	"parameters": {
	"max_new_tokens": max_new_tokens,
	"temperature": temperature,
	"top_p": top_p,
	"repetition_penalty": repetition_penalty,
	"do_sample": True,
	},
	}

	try:
	response_data = post_request_beta(data)
	json_obj = response_data[0]

	if 'generated_text' in json_obj and len(json_obj['generated_text']) > 0:
	bot_message = json_obj['generated_text']
	chatbot.append((message, bot_message))
	return "", chatbot
	elif 'error' in json_obj:
	raise gr.Error(json_obj['error'] + ' Please refresh and try again with smaller input prompt')
	else:
	warning_msg = f"Unexpected response: {json_obj}"
	raise gr.Error(warning_msg)
	except requests.HTTPError as e:
	error_msg = f"Request failed with status code {e.response.status_code}"
	raise gr.Error(error_msg)
	except json.JSONDecodeError as e:
	error_msg = f"Failed to decode response as JSON: {str(e)}"
	raise gr.Error(error_msg)


	def predict_alpha(message, chatbot=[], temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0):
	temperature = float(temperature)
	top_p = float(top_p)

	input_prompt = build_input_prompt(message, chatbot)

	data = {
	"inputs": input_prompt,
	"parameters": {
	"max_new_tokens": max_new_tokens,
	"temperature": temperature,
	"top_p": top_p,
	"repetition_penalty": repetition_penalty,
	"do_sample": True,
	},
	}

	try:
	response_data = post_request_alpha(data)
	json_obj = response_data[0]

	if 'generated_text' in json_obj and len(json_obj['generated_text']) > 0:
	bot_message = json_obj['generated_text']
	chatbot.append((message, bot_message))
	return "", chatbot
	elif 'error' in json_obj:
	raise gr.Error(json_obj['error'] + ' Please refresh and try again with smaller input prompt')
	else:
	warning_msg = f"Unexpected response: {json_obj}"
	raise gr.Error(warning_msg)
	except requests.HTTPError as e:
	error_msg = f"Request failed with status code {e.response.status_code}"
	raise gr.Error(error_msg)
	except json.JSONDecodeError as e:
	error_msg = f"Failed to decode response as JSON: {str(e)}"
	raise gr.Error(error_msg)


	def retry_fun_beta(chat_history_beta ):
	"""
	Retries the prediction for the last message in the chat history.
	Removes the last interaction and gets a new prediction for the same message from Zephyr-7b-Beta
	"""
	if not chat_history_beta or len(chat_history_beta) < 1:
	raise gr.Error("Chat history is empty or invalid.")

	message = chat_history_beta[-1][0]
	chat_history_beta.pop()
	_, updated_chat_history_beta = predict_beta(message, chat_history_beta)
	return updated_chat_history_beta


	def retry_fun_alpha(chat_history_alpha ):
	"""
	Retries the prediction for the last message in the chat history.
	Removes the last interaction and gets a new prediction for the same message from Zephyr-7b-Alpha
	"""
	if not chat_history_alpha or len(chat_history_alpha) < 1:
	raise gr.Error("Chat history is empty or invalid.")

	message = chat_history_alpha[-1][0]
	chat_history_alpha.pop()
	_, updated_chat_history_alpha = predict_alpha(message, chat_history_alpha)
	return updated_chat_history_alpha


	title = "🌀Zephyr Playground🎮"
	description = """
	Welcome to the Zephyr Playground! This interactive space lets you experience the prowess of two distinct Zephyr models – [Zephyr-7b-Alpha](https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha) and [Zephyr-7b-Beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) – side by side. These models are products of fine-tuning the Mistral models.

	- 🔎 Dive deep into the nuances and performance of these models by comparing their responses in real-time.
	- 📖 For a comprehensive understanding of the Zephyr models, delve into their [technical report](https://arxiv.org/abs/2310.16944) and experiment with the [official Zephyr demo](https://huggingfaceh4-zephyr-chat.hf.space/).
	- 🛠 If you wish to explore more chat models or set up your own interactive demo, visit the [Hugging Face's chat playground](https://huggingface.co/spaces/HuggingFaceH4/chat-playground).
	"""
	footnote = """Note: All rights, including licensing and acceptable use policies, related to the Zephyr models, can be found on their respective model pages on Hugging Face.
	"""

	css = """
	.gradio-container {
	width: 100vw !important;
	min-height: 100vh !important;
	padding:0 !important;
	margin:0 !important;
	max-width: none !important;
	}
	"""

	# Create chatbot components
	chat_beta = gr.Chatbot(label="zephyr-7b-beta", layout='panel')
	chat_alpha = gr.Chatbot(label="zephyr-7b-alpha", layout='panel')

	# Create input and button components
	textbox = gr.Textbox(container=False,
	placeholder='Enter text and click the Submit button or press Enter')
	submit = gr.Button('Submit', variant='primary',)
	retry = gr.Button('🔄Retry', variant='secondary')
	undo = gr.Button('↩️Undo', variant='secondary')

	# Layout the components using Gradio Blocks API
	with gr.Blocks(css=css) as demo:
	gr.HTML(f'<h1><center> {title} </center></h1>')
	gr.Markdown(description)
	with gr.Row():
	chat_beta.render()
	chat_alpha.render()
	with gr.Group():
	with gr.Row(equal_height=True):
	with gr.Column(scale=5):
	textbox.render()
	with gr.Column(scale=1):
	submit.render()
	with gr.Row():
	retry.render()
	undo.render()
	clear = gr.ClearButton(value='🗑️Clear',
	components=[textbox,
	chat_beta,
	chat_alpha])

	gr.Markdown(footnote)

	# Assign events to components
	textbox.submit(predict_beta, [textbox, chat_beta], [textbox, chat_beta])
	textbox.submit(predict_alpha, [textbox, chat_alpha], [textbox, chat_alpha])
	submit.click(predict_beta, [textbox, chat_beta], [textbox, chat_beta])
	submit.click(predict_alpha, [textbox, chat_alpha], [textbox, chat_alpha])

	undo.click(lambda x:x[:-1], [chat_beta], [chat_beta])
	undo.click(lambda x:x[:-1], [chat_alpha], [chat_alpha])

	retry.click(retry_fun_beta, [chat_beta], [chat_beta])
	retry.click(retry_fun_alpha, [chat_alpha], [chat_alpha])

	gr.Examples([
	['Hi! Who are you?'],
	['What is a meme?'],
	['Explain the plot of Cinderella in a sentence.'],
	['Assuming I am a huge alien species with the ability to consume helicopters, how long would it take me to eat one?'],
	],
	textbox)


	# Launch the demo
	demo.launch(debug=True)