ai-chatbot / main.py
lalanikarim's picture
fixed some typos
567fa5d
raw
history blame contribute delete
No virus
5.15 kB
import streamlit as st
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
# from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.base import BaseCallbackHandler
from huggingface_hub import hf_hub_download
# StreamHandler to intercept streaming output from the LLM.
# This makes it appear that the Language Model is "typing"
# in realtime.
class StreamHandler(BaseCallbackHandler):
def __init__(self, container, initial_text=""):
self.container = container
self.text = initial_text
def on_llm_new_token(self, token: str, **kwargs) -> None:
self.text += token
self.container.markdown(self.text)
@st.cache_resource
def create_chain(system_prompt):
# A stream handler to direct streaming output on the chat screen.
# This will need to be handled somewhat differently.
# But it demonstrates what potential it carries.
# stream_handler = StreamHandler(st.empty())
# Callback manager is a way to intercept streaming output from the
# LLM and take some action on it. Here we are giving it our custom
# stream handler to make it appear that the LLM is typing the
# responses in real-time.
# callback_manager = CallbackManager([stream_handler])
(repo_id, model_file_name) = ("TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
"mistral-7b-instruct-v0.1.Q4_0.gguf")
model_path = hf_hub_download(repo_id=repo_id,
filename=model_file_name,
repo_type="model")
# initialize LlamaCpp LLM model
# n_gpu_layers, n_batch, and n_ctx are for GPU support.
# When not set, CPU will be used.
# set 1 for Mac m2, and higher numbers based on your GPU support
llm = LlamaCpp(
model_path=model_path,
temperature=0,
max_tokens=512,
top_p=1,
# callback_manager=callback_manager,
# n_gpu_layers=1,
# n_batch=512,
# n_ctx=4096,
stop=["[INST]"],
verbose=False,
streaming=True,
)
# Template you will use to structure your user input before converting
# into a prompt. Here, my template first injects the personality I wish to
# give to the LLM before in the form of system_prompt pushing the actual
# prompt from the user. Note that this chatbot doesn't have any memory of
# the conversation. So we will inject the system prompt for each message.
template = """
<s>[INST]{}[/INST]</s>
[INST]{}[/INST]
""".format(system_prompt, "{question}")
# We create a prompt from the template so we can use it with Langchain
prompt = PromptTemplate(template=template, input_variables=["question"])
# We create an llm chain with our LLM and prompt
# llm_chain = LLMChain(prompt=prompt, llm=llm) # Legacy
llm_chain = prompt | llm # LCEL
return llm_chain
# Set the webpage title
st.set_page_config(
page_title="Your own aiChat!"
)
# Create a header element
st.header("Your own aiChat!")
# This sets the LLM's personality for each prompt.
# The initial personality provided is basic.
# Try something interesting and notice how the LLM responses are affected.
system_prompt = st.text_area(
label="System Prompt",
value="You are a helpful AI assistant who answers questions in short sentences.",
key="system_prompt")
# Create LLM chain to use for our chatbot.
llm_chain = create_chain(system_prompt)
# We store the conversation in the session state.
# This will be used to render the chat conversation.
# We initialize it with the first message we want to be greeted with.
if "messages" not in st.session_state:
st.session_state.messages = [
{"role": "assistant", "content": "How may I help you today?"}
]
if "current_response" not in st.session_state:
st.session_state.current_response = ""
# We loop through each message in the session state and render it as
# a chat message.
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# We take questions/instructions from the chat input to pass to the LLM
if user_prompt := st.chat_input("Your message here", key="user_input"):
# Add our input to the session state
st.session_state.messages.append(
{"role": "user", "content": user_prompt}
)
# Add our input to the chat window
with st.chat_message("user"):
st.markdown(user_prompt)
# Pass our input to the LLM chain and capture the final responses.
# It is worth noting that the Stream Handler is already receiving the
# streaming response as the llm is generating. We get our response
# here once the LLM has finished generating the complete response.
response = llm_chain.invoke({"question": user_prompt})
# Add the response to the session state
st.session_state.messages.append(
{"role": "assistant", "content": response}
)
# Add the response to the chat window
with st.chat_message("assistant"):
st.markdown(response)