Spaces:

SurendraKumarDhaka
/

ChatWithMe

Build error

App Files Files Community

ChatWithMe / app.py

SurendraKumarDhaka

Rename AssignmentTask.py to app.py

383c4d7 verified 5 months ago

raw history blame contribute delete

No virus

2.24 kB

	import gradio as gr
	from typing import Any
	from queue import Queue, Empty
	from langchain.llms import LlamaCpp
	from langchain.callbacks.base import BaseCallbackHandler
	from langchain.prompts import PromptTemplate
	from threading import Thread


	q = Queue()
	job_done = object()


	class QueueCallback(BaseCallbackHandler):
	"""Callback handler for streaming LLM responses to a queue."""

	def __init__(self, q):
	self.q = q

	def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
	self.q.put(token)

	def on_llm_end(self, args, *kwargs: Any) -> None:
	return self.q.empty()



	callbacks = [QueueCallback(q)]
	template = """Question: {question}

	Answer: Let's work this out in a step by step way to be sure we have the right answer."""

	prompt = PromptTemplate(template=template, input_variables=["question"])


	# [Download the gguf file locally from here to run it on CPU using llama_cpp](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/blob/main/llama-2-7b-chat.Q4_K_M.gguf)

	# ## we can download any llama-2 model gguf file locally and load it using llamaCpp to use it on cpu.

	# In[5]:


	llm = LlamaCpp(
	model_path="llama-2-7b-chat.Q4_K_M.gguf",
	temperature=0.75,
	max_tokens=2000,
	top_p=1,
	callbacks=callbacks,
	verbose=True,
	)

	def answer(question):
	def task():
	response = llm(question)
	q.put(job_done)

	t = Thread(target=task)
	t.start()



	with gr.Blocks() as demo:
	chatbot = gr.Chatbot()
	msg = gr.Textbox()
	clear = gr.Button("Clear")

	def user(user_message, history):
	return "", history + [[user_message, None]]

	def bot(history):
	question = history[-1][0]
	print("Question: ", question)
	history[-1][1] = ""
	answer(question=question)
	while True:
	try:
	next_token = q.get(True, timeout=1)
	if next_token is job_done:
	break
	history[-1][1] += next_token
	yield history
	except Empty:
	continue

	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.queue()
	demo.launch(share=True,debug=True)