gradio-playground

Runtime error

App Files Files Community

gradio-playground / app.py

ArmelR

Update app.py

572b214 12 months ago

raw

history blame

No virus

9.78 kB

	import json
	import os
	import re
	import shutil
	import requests
	import warnings

	import gradio as gr
	from huggingface_hub import Repository
	from text_generation import Client

	from share_btn import community_icon_html, loading_icon_html, share_js, share_btn_css

	HF_TOKEN = os.environ.get("HF_TOKEN", None)

	API_URL_G = "https://api-inference.huggingface.co/models/ArmelR/starcoder-gradio-v0"
	API_URL_S = "https://api-inference.huggingface.co/models/HuggingFaceH4/starcoderbase-finetuned-oasst1"

	with open("./HHH_prompt_short.txt", "r") as f:
	HHH_PROMPT = f.read() + "\n\n"

	with open("./TA_prompt_v0.txt", "r") as f:
	TA_PROMPT = f.read()

	NO_PROMPT = ""

	FIM_PREFIX = "<fim_prefix>"
	FIM_MIDDLE = "<fim_middle>"
	FIM_SUFFIX = "<fim_suffix>"

	FIM_INDICATOR = "<FILL_HERE>"

	FORMATS = """
	# Chat mode
	Chat mode prepends the custom [TA prompt](https://huggingface.co/spaces/bigcode/chat-playground/blob/main/TA_prompt_v0.txt) or the [HHH prompt](https://gist.github.com/jareddk/2509330f8ef3d787fc5aaac67aab5f11#file-hhh_prompt-txt) from Anthropic to the request which conditions the model to serve as an assistant.

	⚠️ Intended Use: this app and its [supporting model](https://huggingface.co/bigcode) are provided for demonstration purposes; not to serve as replacement for human expertise. For more details on the model's limitations in terms of factuality and biases, see the [model card.](hf.co/bigcode)

	"""

	theme = gr.themes.Monochrome(
	primary_hue="indigo",
	secondary_hue="blue",
	neutral_hue="slate",
	radius_size=gr.themes.sizes.radius_sm,
	font=[
	gr.themes.GoogleFont("Open Sans"),
	"ui-sans-serif",
	"system-ui",
	"sans-serif",
	],
	)

	client_g = Client(
	API_URL_G, headers={"Authorization": f"Bearer {HF_TOKEN}"},
	)

	client_s = Client(
	API_URL_S, headers={"Authorization": f"Bearer {HF_TOKEN}"},
	)

	def wrap_html_code(text):
	pattern = r"<.*?>"
	matches = re.findall(pattern, text)
	if len(matches) > 0:
	return f"```{text}```"
	else:
	return text

	def generate(
	prompt,
	temperature=0.9,
	max_new_tokens=256,
	top_p=0.95,
	repetition_penalty=1.0,
	chat_mode="TA prompt",
	version="StarCoder-gradio",
	):

	temperature = float(temperature)
	if temperature < 1e-2:
	temperature = 1e-2
	top_p = float(top_p)
	fim_mode = False

	generate_kwargs = dict(
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	truncate=7500,
	do_sample=True,
	seed=42,
	stop_sequences=["\nHuman", "\n-----", "Question:", "Answer:"],
	)

	if chat_mode == "HHH prompt":
	base_prompt = HHH_PROMPT
	elif chat_mode == "TA prompt":
	base_prompt = TA_PROMPT
	else :
	base_prompt = NO_PROMPT


	if version == "StarCoder-gradio" :
	chat_prompt = prompt + "\n\nAnswer:"
	prompt = base_prompt + chat_prompt
	print("PROMPT : "+str(prompt))
	stream = client_g.generate_stream(prompt, **generate_kwargs)
	elif version == "StarChat-alpha" :
	chat_prompt = prompt + "\n\nAssistant:"
	prompt = base_prompt + chat_prompt
	stream = client_s.generate_stream(prompt, **generate_kwargs)
	else :
	ValueError("Unsupported version of the Coding assistant")

	output = ""
	previous_token = ""
	#t = 0
	for response in stream:
	#print(f"IN_{t}")
	if (
	(response.token.text in ["Human", "-----", "Question:"] and previous_token in ["\n", "-----"])
	or response.token.text in ["<\|endoftext\|>", "<\|end\|>"]
	):
	print("OUT = "+str(output))
	return wrap_html_code(output.strip())
	else:
	output += response.token.text
	#print(f"Out_{t} : {output}")
	#t += 1
	previous_token = response.token.text
	print("Output = "+str(output))
	return wrap_html_code(output.strip())


	# chatbot mode
	def user(user_message, history):
	return "", history + [[user_message, None]]


	def bot(
	history,
	temperature=0.9,
	max_new_tokens=256,
	top_p=0.95,
	repetition_penalty=1.0,
	chat_mode=None,
	version="StarChat",
	):
	# concat history of prompts with answers expect for last empty answer only add prompt
	if version == "StarCoder-gradio" :
	prompt = "\n".join(
	[f"Question: {prompt}\n\nAnswer: {answer}" for prompt, answer in history[:-1]] + [f"\nQuestion: {history[-1][0]}"]
	)
	else :
	prompt = "\n".join(
	[f"Human: {prompt}\n\nAssistant: {answer}" for prompt, answer in history[:-1]] + [f"\nHuman: {history[-1][0]}"]
	)

	bot_message = generate(
	prompt,
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	chat_mode=chat_mode,
	version=version


	)
	history[-1][1] = bot_message
	return history


	examples = [
	"def print_hello_world():",
	"def fibonacci(n):",
	"class TransformerDecoder(nn.Module):",
	"class ComplexNumbers:",
	"How to install gradio"
	]


	def process_example(args):
	for x in generate(args):
	pass
	return x


	css = ".generating {visibility: hidden}" + share_btn_css

	with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
	with gr.Column():
	gr.Markdown(
	"""\
	#Gradio Assistant powered by ‍💫 StarCoder
	_Note:_ this is an internal chat playground - please do not share. The deployment can also change and thus the space not work as we continue development.\
	"""
	)
	with gr.Row():
	column_1, column_2 = gr.Column(scale=3), gr.Column(scale=1)
	with column_2:
	chat_mode = gr.Dropdown(
	["NO prompt","TA prompt", "HHH prompt"],
	value="NO prompt",
	label="Chat mode",
	info="Use Anthropic's HHH prompt or our custom tech prompt to turn the model into an assistant.",
	)
	temperature = gr.Slider(
	label="Temperature",
	value=0.2,
	minimum=0.0,
	maximum=2.0,
	step=0.1,
	interactive=True,
	info="Higher values produce more diverse outputs",
	)
	max_new_tokens = gr.Slider(
	label="Max new tokens",
	value=512,
	minimum=0,
	maximum=8192,
	step=64,
	interactive=True,
	info="The maximum numbers of new tokens",
	)
	top_p = gr.Slider(
	label="Top-p (nucleus sampling)",
	value=0.95,
	minimum=0.0,
	maximum=1,
	step=0.05,
	interactive=True,
	info="Higher values sample more low-probability tokens",
	)
	repetition_penalty = gr.Slider(
	label="Repetition penalty",
	value=1.2,
	minimum=1.0,
	maximum=2.0,
	step=0.05,
	interactive=True,
	info="Penalize repeated tokens",
	)
	version = gr.Dropdown(
	["StarCoder-gradio", "StarChat-alpha"],
	value="StarCoder-gradio",
	label="Version",
	info="",
	)
	with column_1:
	# output = gr.Code(elem_id="q-output")
	# add visibl=False and update if chat_mode True
	chatbot = gr.Chatbot()
	instruction = gr.Textbox(
	placeholder="Enter your prompt here",
	label="Prompt",
	elem_id="q-input",
	)
	with gr.Row():
	with gr.Column():
	clear = gr.Button("Clear Chat")
	with gr.Column():
	submit = gr.Button("Generate", variant="primary")
	with gr.Group(elem_id="share-btn-container"):
	community_icon = gr.HTML(community_icon_html, visible=True)
	loading_icon = gr.HTML(loading_icon_html, visible=True)
	share_button = gr.Button(
	"Share to community", elem_id="share-btn", visible=True
	)
	# examples of non-chat mode
	#gr.Examples(
	# examples=examples,
	# inputs=[instruction],
	# cache_examples=False,
	# fn=process_example,
	# outputs=[output],
	# )
	gr.Markdown(FORMATS)


	instruction.submit(
	user, [instruction, chatbot], [instruction, chatbot], queue=False
	).then(
	bot,
	[chatbot, temperature, max_new_tokens, top_p, repetition_penalty, chat_mode, version],
	chatbot,
	)

	submit.click(
	user, [instruction, chatbot], [instruction, chatbot], queue=False
	).then(
	bot,
	[chatbot, temperature, max_new_tokens, top_p, repetition_penalty, chat_mode, version],
	chatbot,
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	share_button.click(None, [], [], _js=share_js)
	demo.queue(concurrency_count=16).launch(debug=True)