Spaces:
Sleeping
Sleeping
from langchain.callbacks.manager import CallbackManager | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
from langchain_community.llms import LlamaCpp | |
import gradio as gr | |
import os | |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" | |
REPO = "TheBloke/Llama-2-7B-Chat-GGUF" | |
MODEL_NAME = "llama-2-7b-chat.Q5_K_M.gguf" | |
DOWNLOAD_MODEL = f"huggingface-cli download {REPO} {MODEL_NAME} --local-dir . --local-dir-use-symlinks False" | |
MODEL_PATH = "llama-2-7b-chat.Q5_K_M.gguf" | |
if not os.path.exists(MODEL_PATH): | |
os.system(DOWNLOAD_MODEL) | |
TEMPLATE = """ | |
You are a helpful AI Assistant created by Mohammed Vasim. Mohammed Vasim is an AI Engineer. | |
Question: {question} | |
Answer: helpful answer""" | |
prompt = PromptTemplate.from_template(TEMPLATE) | |
# Callbacks support token-wise streaming | |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
# Make sure the model path is correct for your system! | |
llm = LlamaCpp( | |
model_path=MODEL_PATH, | |
temperature=0.75, | |
max_tokens=2000, | |
top_p=1, | |
callback_manager=callback_manager, | |
verbose=True, # Verbose is required to pass to the callback manager | |
) | |
llm_chain = LLMChain(prompt=prompt, llm=llm) | |
# question = "What NFL team won the Super Bowl in the year Justin Bieber was born?" | |
# llm_chain.run(question) | |
title = "Welcome to Open Source LLM" | |
description = "This is a Llama-2-GGUF" | |
def answer_query(message, history): | |
print(message) | |
message = llm_chain.invoke(message) | |
print(message, history) | |
return message | |
# Gradio chat interface | |
gr.ChatInterface( | |
fn=answer_query, | |
title=title, | |
description=description, | |
examples=[ | |
["What is a Large Language Model?"], | |
["What's 9+2-1?"], | |
["Write Python code to print the Fibonacci sequence"] | |
] | |
).queue().launch(server_name="0.0.0.0") | |