import os import pathlib import re import gradio as gr from langchain.docstore.document import Document from langchain.document_loaders import TextLoader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS os.environ["OPENAI_API_KEY"] = "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" from langchain.prompts.chat import ( ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ) from langchain.chat_models import ChatOpenAI from langchain.chains import RetrievalQAWithSourcesChain # Set the data store directory DATA_STORE_DIR = "data_store" if os.path.exists(DATA_STORE_DIR): vector_store = FAISS.load_local( DATA_STORE_DIR, OpenAIEmbeddings() ) else: print(f"Missing files. Upload index.faiss and index.pkl files to {DATA_STORE_DIR} directory first") system_template = """Use the following pieces of context to answer the user's question. Take note of the sources and include them in the answer in the format: "SOURCES: source1", use "SOURCES" in capital letters regardless of the number of sources. If you don't know the answer, just say "I don't know", don't try to make up an answer. ---------------- {summaries}""" messages = [ SystemMessagePromptTemplate.from_template(system_template), HumanMessagePromptTemplate.from_template("{question}") ] prompt = ChatPromptTemplate.from_messages(messages) llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, max_tokens=256) # Modify model_name if you have access to GPT-4 chain_type_kwargs = {"prompt": prompt} chain = RetrievalQAWithSourcesChain.from_chain_type( llm=llm, chain_type="stuff", retriever=vector_store.as_retriever(), return_source_documents=True, chain_type_kwargs=chain_type_kwargs ) def chatbot_interface(query): result = chain(query) return result['answer'] # Create a Gradio interface gr.Interface( fn=chatbot_interface, inputs="text", outputs="text", title="LLM Chatbot", description="Chat with the LLM Chatbot on Custom Data" ).launch()