llama-2-gguf / app.py
md-vasim's picture
printing in logs
381c14d
raw
history blame
1.92 kB
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import LlamaCpp
import gradio as gr
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
REPO = "TheBloke/Llama-2-7B-Chat-GGUF"
MODEL_NAME = "llama-2-7b-chat.Q5_K_M.gguf"
DOWNLOAD_MODEL = f"huggingface-cli download {REPO} {MODEL_NAME} --local-dir . --local-dir-use-symlinks False"
MODEL_PATH = "llama-2-7b-chat.Q5_K_M.gguf"
if not os.path.exists(MODEL_PATH):
os.system(DOWNLOAD_MODEL)
TEMPLATE = """
You are a helpful AI Assistant created by Mohammed Vasim. Mohammed Vasim is an AI Engineer.
Question: {question}
Answer: helpful answer"""
prompt = PromptTemplate.from_template(TEMPLATE)
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Make sure the model path is correct for your system!
llm = LlamaCpp(
model_path=MODEL_PATH,
temperature=0.75,
max_tokens=2000,
top_p=1,
callback_manager=callback_manager,
verbose=True, # Verbose is required to pass to the callback manager
)
llm_chain = LLMChain(prompt=prompt, llm=llm)
# question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
# llm_chain.run(question)
title = "Welcome to Open Source LLM"
description = "This is a Llama-2-GGUF"
def answer_query(message, history):
print(message)
message = llm_chain.invoke(message)
print(message, history)
return message
# Gradio chat interface
gr.ChatInterface(
fn=answer_query,
title=title,
description=description,
examples=[
["What is a Large Language Model?"],
["What's 9+2-1?"],
["Write Python code to print the Fibonacci sequence"]
]
).queue().launch(server_name="0.0.0.0")