Spaces:

aiwithankit
/

chatwithllama

Sleeping

App Files Files Community

chatwithllama / app.py

aiwithankit

Update app.py

d0f00bb verified 27 days ago

raw

history blame contribute delete

2.07 kB

	import gradio as gr
	# from huggingface_hub import InferenceClient

	# """
	# For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
	# """
	# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")


	# def respond(
	# message,
	# history: list[tuple[str, str]],
	# system_message,
	# max_tokens,
	# temperature,
	# top_p,
	# ):
	# messages = [{"role": "system", "content": system_message}]

	# for val in history:
	# if val[0]:
	# messages.append({"role": "user", "content": val[0]})
	# if val[1]:
	# messages.append({"role": "assistant", "content": val[1]})

	# messages.append({"role": "user", "content": message})

	# response = ""

	# for message in client.chat_completion(
	# messages,
	# max_tokens=max_tokens,
	# stream=True,
	# temperature=temperature,
	# top_p=top_p,
	# ):
	# token = message.choices[0].delta.content

	# response += token
	# yield response

	from transformers import pipeline
	import torch

	model_id = "unsloth/Llama-3.2-1B-Instruct" # You can switch to 3B if needed
	text_pipeline = pipeline(
	"text-generation",
	model=model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto"
	)
	# prompt= input("Please enter your query: ")
	# outputs = text_pipeline(prompt, max_new_tokens=150)
	# response = outputs[0]["generated_text"]
	# print(response)

	import gradio as gr

	def generated_response(prompt,history):
	response = text_pipeline(prompt, max_new_tokens=150)
	return response[0]["generated_text"]

	"""
	For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
	"""

	demo = gr.ChatInterface(generated_response,
	title="This model is running on cpu so it will effect reasoning and inference time will be slow" # This sets the header title
	)

	if __name__ == "__main__":
	demo.launch(share=True)