import os import pathlib import re import streamlit as st from streamlit_chat import message from langchain.docstore.document import Document from langchain.document_loaders import TextLoader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS st.set_page_config( page_title="LLM Chatbot" ) st.header(" LLM Chatbot on Custom data") st.sidebar.header("Instructions") st.sidebar.info( '''This is a web application that allows you to interact with your custom data ''' ) st.sidebar.info('''Enter a query in the text box and press enter to receive a response''') st.sidebar.info(''' This project works perfectly even on your own data ''') os.environ["OPENAI_API_KEY"] = "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" from langchain.prompts.chat import ( ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ) from langchain.chat_models import ChatOpenAI from langchain.chains import RetrievalQAWithSourcesChain # Initialize Streamlit st.title("Stanlito AI Chatbot") # Set the data store directory DATA_STORE_DIR = "data_store" # Upload the files `$DATA_STORE_DIR/index.faiss` and `$DATA_STORE_DIR/index.pkl` to local if os.path.exists(DATA_STORE_DIR): vector_store = FAISS.load_local( DATA_STORE_DIR, OpenAIEmbeddings() ) else: st.write(f"Missing files. Upload index.faiss and index.pkl files to {DATA_STORE_DIR} directory first") # Define system template system_template = """Use the following pieces of context to answer the user's question. Take note of the sources and include them in the answer in the format: "SOURCES: source1", use "SOURCES" in capital letters regardless of the number of sources. If you don't know the answer, just say "I don't know", don't try to make up an answer. ---------------- {summaries}""" # Create the prompt messages = [ SystemMessagePromptTemplate.from_template(system_template), HumanMessagePromptTemplate.from_template("{question}") ] prompt = ChatPromptTemplate.from_messages(messages) # Load the language model llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, max_tokens=256) # Modify model_name if you have access to GPT-4 # Create the chain chain_type_kwargs = {"prompt": prompt} chain = RetrievalQAWithSourcesChain.from_chain_type( llm=llm, chain_type="stuff", retriever=vector_store.as_retriever(), return_source_documents=True, chain_type_kwargs=chain_type_kwargs ) # Define function to print the result def print_result(result): output_text = f"""### Question: {query} Answer: {result['answer']} """ st.markdown(output_text) # Get user input query = st.text_input("Ask a question") # Process user input if query: result = chain(query) print_result(result)