Spaces:
Build error
Build error
import gradio as gr | |
from typing import Any | |
from queue import Queue, Empty | |
from langchain.llms import LlamaCpp | |
from langchain.callbacks.base import BaseCallbackHandler | |
from langchain.prompts import PromptTemplate | |
from threading import Thread | |
q = Queue() | |
job_done = object() | |
class QueueCallback(BaseCallbackHandler): | |
"""Callback handler for streaming LLM responses to a queue.""" | |
def __init__(self, q): | |
self.q = q | |
def on_llm_new_token(self, token: str, **kwargs: Any) -> None: | |
self.q.put(token) | |
def on_llm_end(self, *args, **kwargs: Any) -> None: | |
return self.q.empty() | |
callbacks = [QueueCallback(q)] | |
template = """Question: {question} | |
Answer: Let's work this out in a step by step way to be sure we have the right answer.""" | |
prompt = PromptTemplate(template=template, input_variables=["question"]) | |
# [Download the gguf file locally from here to run it on CPU using llama_cpp](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/blob/main/llama-2-7b-chat.Q4_K_M.gguf) | |
# ## we can download any llama-2 model gguf file locally and load it using llamaCpp to use it on cpu. | |
# In[5]: | |
llm = LlamaCpp( | |
model_path="llama-2-7b-chat.Q4_K_M.gguf", | |
temperature=0.75, | |
max_tokens=2000, | |
top_p=1, | |
callbacks=callbacks, | |
verbose=True, | |
) | |
def answer(question): | |
def task(): | |
response = llm(question) | |
q.put(job_done) | |
t = Thread(target=task) | |
t.start() | |
with gr.Blocks() as demo: | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox() | |
clear = gr.Button("Clear") | |
def user(user_message, history): | |
return "", history + [[user_message, None]] | |
def bot(history): | |
question = history[-1][0] | |
print("Question: ", question) | |
history[-1][1] = "" | |
answer(question=question) | |
while True: | |
try: | |
next_token = q.get(True, timeout=1) | |
if next_token is job_done: | |
break | |
history[-1][1] += next_token | |
yield history | |
except Empty: | |
continue | |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
demo.queue() | |
demo.launch(share=True,debug=True) | |