ChatWithMe / app.py
SurendraKumarDhaka's picture
Rename AssignmentTask.py to app.py
383c4d7 verified
import gradio as gr
from typing import Any
from queue import Queue, Empty
from langchain.llms import LlamaCpp
from langchain.callbacks.base import BaseCallbackHandler
from langchain.prompts import PromptTemplate
from threading import Thread
q = Queue()
job_done = object()
class QueueCallback(BaseCallbackHandler):
"""Callback handler for streaming LLM responses to a queue."""
def __init__(self, q):
self.q = q
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
self.q.put(token)
def on_llm_end(self, *args, **kwargs: Any) -> None:
return self.q.empty()
callbacks = [QueueCallback(q)]
template = """Question: {question}
Answer: Let's work this out in a step by step way to be sure we have the right answer."""
prompt = PromptTemplate(template=template, input_variables=["question"])
# [Download the gguf file locally from here to run it on CPU using llama_cpp](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/blob/main/llama-2-7b-chat.Q4_K_M.gguf)
# ## we can download any llama-2 model gguf file locally and load it using llamaCpp to use it on cpu.
# In[5]:
llm = LlamaCpp(
model_path="llama-2-7b-chat.Q4_K_M.gguf",
temperature=0.75,
max_tokens=2000,
top_p=1,
callbacks=callbacks,
verbose=True,
)
def answer(question):
def task():
response = llm(question)
q.put(job_done)
t = Thread(target=task)
t.start()
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
msg = gr.Textbox()
clear = gr.Button("Clear")
def user(user_message, history):
return "", history + [[user_message, None]]
def bot(history):
question = history[-1][0]
print("Question: ", question)
history[-1][1] = ""
answer(question=question)
while True:
try:
next_token = q.get(True, timeout=1)
if next_token is job_done:
break
history[-1][1] += next_token
yield history
except Empty:
continue
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue()
demo.launch(share=True,debug=True)