Spaces:

ColeGuion
/

Mistral8x7B

Running

App Files Files Community

Mistral8x7B / app.py

ColeGuion

Update app.py

35763c2 verified 8 months ago

raw

history blame contribute delete

No virus

3.88 kB

	from huggingface_hub import InferenceClient
	import gradio as gr

	client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

	# Formats the prompt to hold all of the past messages
	def format_prompt(message, history):
	prompt = "<s>"
	prompt_template = "[INST] {} [/INST]"

	# Iterates through every past user input and response to be added to the prompt
	for user_prompt, bot_response in history:
	prompt += prompt_template.format(user_prompt)
	prompt += f" {bot_response}</s> "

	prompt += prompt_template.format(message)
	return prompt


	# Use for GEC, Doesn't track actual history
	def format_prompt_grammar(message, history):
	prompt = "<s>"

	# String to add before every prompt
	prompt_prefix = "Correct any grammatical errors in the following sentence and provide the corrected version:\n\nSentence:"
	prompt_template = "[INST] " + prompt_prefix + ' {} [/INST]'

	myHistory = [["It is my friends house in England.", "It is my friend's house in England."], ["Every girl must bring their books to school.", "Every girl must bring her books to school."]]

	# Iterates through every past user input and response to be added to the prompt
	for user_prompt, bot_response in myHistory:
	prompt += prompt_template.format(user_prompt)
	prompt += f" {bot_response}</s> \n"

	prompt += prompt_template.format(message)
	return prompt



	def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
	temperature = float(temperature)
	if temperature < 1e-2: temperature = 1e-2
	top_p = float(top_p)

	generate_kwargs = dict(temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42,)

	#formatted_prompt = format_prompt_grammar(f"Corrected Sentence: {prompt}", history)
	formatted_prompt = format_prompt(f"{system_prompt} {prompt}", history)
	print("\nPROMPT: \n\t" + formatted_prompt)

	# Generate text from the HF inference
	stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
	output = ""

	for response in stream:
	output += response.token.text
	yield output
	return output



	additional_inputs=[
	gr.Textbox( label="System Prompt", value="" , max_lines=1, interactive=True, ),
	gr.Slider( label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs", ),
	gr.Slider( label="Max new tokens", value=256, minimum=0, maximum=1048, step=64, interactive=True, info="The maximum numbers of new tokens", ),
	gr.Slider( label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens", ),
	gr.Slider( label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens", )
	]

	examples=['Give me the grammatically correct version of the sentence: "We shood buy an car"', "Give me an example exam question testing students on square roots on basic integers", "Would this block of HTML code run?\n```\n\n```"]
	#examples += ["I have been to New York last summer.", "We shood buy an car.", "People is coming to my party.", "She is more taller.", "Their were lot of sheeps.", "I want to speak English good.", "I must to buy a new cartoon book."]
	examples = [[x, None, None, None, None, None] for x in examples]


	gr.ChatInterface(
	fn=generate,
	chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
	additional_inputs=additional_inputs,
	title="My Mistral Space",
	examples=examples,
	concurrency_limit=20,
	).launch(show_api=False)