Spaces:

isl-research
/

KCE_NVIDIA

Sleeping

File size: 5,026 Bytes

import streamlit as st
import os

from langchain_community.vectorstores import FAISS
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import FlashrankRerank
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_nvidia_ai_endpoints import ChatNVIDIA


def format_docs(docs):
    print("-------- Documents ------------")
    print(docs)
    return "\n\n".join(doc.page_content for doc in docs)


embeddings = NVIDIAEmbeddings(model="nvidia/nv-embedqa-mistral-7b-v2")
db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
retriever = db.as_retriever()
compressor = FlashrankRerank()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

st.title("KCE Chatbot")
with st.expander("Disclaimer", icon="ℹ️"):
    st.info("""
    We appreciate your engagement with our chatbot! We hope this chatbot can help you with the questions you have regarding with the KCE company.
    This chatbot is a demonstration preview. While the system is designed to provide helpful and informative responses by retrieving and generating relevant information, it is important to note the following:
    1. Potential for Inaccuracies: The chatbot may sometimes produce incorrect or misleading information. The responses generated by the LLM are based on patterns in the data it has been trained on and the information retrieved, which might not always be accurate or up-to-date.
    2. Hallucinations: The LLM might generate responses that seem plausible but are entirely fabricated. These "hallucinations" are a known limitation of current LLM technology and can occur despite the retrieval mechanism.\n
    By interacting with this chatbot, you acknowledge and accept these limitations and agree to use the information provided responsibly.
    """)

models_dict = {
    "meta/llama-3.1-405b": "meta/llama-3.1-405b-instruct",
    "meta/llama-3.1-70b": "meta/llama-3.1-70b-instruct",
    "meta/llama3.1-8b": "meta/llama-3.1-8b-instruct",
    "google/gemma-2-27b": "google/gemma-2-27b-it",
    "google/gemma-7b": "google/gemma-7b",
    "microsoft/phi-3-mini-128k": "microsoft/phi-3-mini-128k-instruct",
    "microsoft/phi-3-medium-4k": "microsoft/phi-3-medium-4k-instruct"
}


# openai_api_key = st.sidebar.text_input("OpenAI API Key", type="password")
model = st.sidebar.selectbox(
    "Choose model",
    tuple(models_dict.keys()),
    label_visibility="visible",
)
st.sidebar.write(f"Selected model: {model}")



def response_generator(message):
    llm = ChatNVIDIA(model=models_dict[model])
    prompt = ChatPromptTemplate.from_messages([
        ('system',
        "You are a KCE chatbot, and you are assisting customers with the inquires about the company."
        "Answer the questions witht the provided context. Do not include based on the context or based on the documents in your answer."
        "Remember that your job is to represent KCE company."
        "Please say you do not know if you do not know or cannot find the information needed."
        "\n Question: {question} \nContext: {context}"),
        ('user', "{question}")
    ])

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    # response = f"Response to: {message}"
    # for word in response.split():
    #     yield word + " "
    #     time.sleep(0.5)
    partial_message=""
    for chunk in rag_chain.stream(message):
        # partial_message = partial_message + chunk
        yield partial_message + chunk
    # response = random.choice(
    #     [
    #         'Hello there! How can I asist you today?',
    #         'Hi, human! Is there anything I can help you with?',
    #         'Do you need any help?'
    #     ]
    # )
    
    # for word in response.split():
    #     yield word + " "
    #     time.sleep(0.05)


# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Accept user input
if prompt := st.chat_input("Please type your question here"):
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})
    # Display user message in chat message container
    with st.chat_message("user"):
        st.markdown(prompt)

    # Display assistant response in chat message container
    with st.chat_message("assistant"):
        response = st.write_stream(response_generator(prompt))
    # Add assistant response to chat history
    st.session_state.messages.append({"role": "assistant", "content": response})