Spaces:
Runtime error
Runtime error
import streamlit as st | |
# from langchain_community.llms import HuggingFaceTextGenInference | |
import os | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
from langchain.schema import StrOutputParser | |
from custom_llm import CustomLLM, custom_chain_with_history | |
API_TOKEN = os.getenv('HF_INFER_API') | |
from typing import Optional | |
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder | |
from langchain_community.chat_models import ChatAnthropic | |
from langchain_core.chat_history import BaseChatMessageHistory | |
from langchain.memory import ConversationBufferMemory | |
from langchain_core.runnables.history import RunnableWithMessageHistory | |
if 'memory' not in st.session_state: | |
st.session_state['memory'] = ConversationBufferMemory(return_messages=True) | |
if 'chain' not in st.session_state: | |
# st.session_state['chain'] = custom_chain_with_history(llm=CustomLLM(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", model_type='text-generation', api_token=API_TOKEN, stop=["\n<|","<|"]), memory=st.session_state.memory) | |
st.session_state['chain'] = custom_chain_with_history(llm=InferenceClient("https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1", headers = {"Authorization": f"Bearer {API_TOKEN}"}, stream=True, max_new_tokens=512, temperature=0.01), memory=st.session_state.memory) | |
st.title("LMD Chatbot V2 Sample") | |
st.subheader("Knowledge-base from web scrapping and FAQ") | |
# Initialize chat history | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Display chat messages from history on app rerun | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# React to user input | |
if prompt := st.chat_input("Ask me anything.."): | |
# Display user message in chat message container | |
st.chat_message("User").markdown(prompt) | |
# Add user message to chat history | |
st.session_state.messages.append({"role": "User", "content": prompt}) | |
# full_response = st.session_state.chain.invoke(prompt).split("\n<|")[0] | |
# Display assistant response in chat message container | |
with st.chat_message("assistant"): | |
message_placeholder = st.empty() | |
full_response = "" | |
for chunk in st.session_state.chain.stream(prompt): | |
full_response += chunk + " " | |
message_placeholder.markdown(full_response + " ") | |
if full_response[-4:] == "\n<|": | |
break | |
# st.markdown(full_response) | |
st.session_state.memory.save_context({"question":prompt}, {"output":full_response}) | |
st.session_state.memory.chat_memory.messages = st.session_state.memory.chat_memory.messages[-15:] | |
# Add assistant response to chat history | |
st.session_state.messages.append({"role": "assistant", "content": response}) |