Spaces:

fxmarty
/

tgi-mi300-demo-chat

Runtime error

fxmarty

add app

d6e5fcb 2 months ago

No virus

2.22 kB

	import gradio as gr
	import random
	import time

	from huggingface_hub import InferenceClient
	from transformers import AutoTokenizer

	tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")

	client = InferenceClient(model="https://770c-20-63-4-233.ngrok-free.app")

	SYSTEM_COMMAND = {"role": "system", "content": "Context: date: Monday 20th May 2024; location: NYC; running on: 8 AMD Instinct MI300 GPU; model name: Llama 70B. Only provide these information if asked. You are a knowledgeable assistant trained to provide accurate and helpful information. Please respond to the user's queries promptly and politely."}

	IGNORED_TOKENS = {None, "<\|start_header_id\|>", "<\|end_header_id\|>", "<\|eot_id\|>", "<\|reserved_special_token"}
	STOP_TOKENS = ["<\|start_header_id\|>", "<\|end_header_id\|>", "<\|eot_id\|>", "<\|reserved_special_token"]

	with gr.Blocks() as demo:
	tfs_history = gr.State([SYSTEM_COMMAND])
	chatbot = gr.Chatbot()
	msg = gr.Textbox()
	clear = gr.Button("Clear")

	def user(user_message, history, dict_history):
	data = {"role": "user", "content": user_message}
	dict_history.append(data)
	return "", history + [[user_message, None]], dict_history

	def bot(history, dict_history):
	history[-1][1] = ""
	response = {"role": "assistant", "content": ""}
	start_tokenize = time.perf_counter()
	text_input = tokenizer.apply_chat_template(dict_history, tokenize=False, add_generation_prompt=True)
	end_tokenize = time.perf_counter()

	try:
	for token in client.text_generation(prompt=text_input, max_new_tokens=100, stop_sequences=STOP_TOKENS, stream=True):
	if token not in IGNORED_TOKENS:
	history[-1][1] += token
	response["content"] += token
	yield history
	finally:
	dict_history.append(response)

	msg.submit(
	user,
	inputs=[msg, chatbot, tfs_history],
	outputs=[msg, chatbot, tfs_history],
	queue=False).then(
	bot,
	[chatbot, tfs_history],
	chatbot
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.queue()
	demo.launch()