import os import streamlit as st from langchain.chat_models import ChatOpenAI from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores.chroma import Chroma from langchain.chains import ConversationalRetrievalChain from langchain.document_loaders import ( PyPDFLoader, Docx2txtLoader, TextLoader, ) from apikey import openai_api_key os.environ["OPENAI_API_KEY"] = openai_api_key def load_and_process_file(file_data): """ Load and process the uploaded file. Returns a vector store containing the embedded chunks of the file. """ file_name = os.path.join("./", file_data.name) with open(file_name, "wb") as f: f.write(file_data.getvalue()) name, extension = os.path.splitext(file_name) # Load the file using the appropriate loader if extension == ".pdf": loader = PyPDFLoader(file_name) elif extension == ".docx": loader = Docx2txtLoader(file_name) elif extension == ".txt": loader = TextLoader(file_name) else: st.write("This document format is not supported!") return None documents = loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, ) chunks = text_splitter.split_documents(documents) embeddings = OpenAIEmbeddings() vector_store = Chroma.from_documents(chunks, embeddings) return vector_store def initialize_chat_model(vector_store): """ Initialize the chat model with the given vector store. Returns a ConversationalRetrievalChain instance. """ llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) retriever = vector_store.as_retriever() return ConversationalRetrievalChain.from_llm(llm, retriever) def main(): """ The main function that runs the Streamlit app. """ st.set_page_config(page_title="Chat with Document", page_icon="📚") st.title("📚 Chat with Document") st.write("Upload a document and ask questions related to its content.") uploaded_file = st.file_uploader( "Upload a file", type=["pdf", "docx", "txt"], key="file_uploader" ) add_file = st.button( "Process File", on_click=clear_history, key="process_button", ) if uploaded_file and add_file: with st.spinner("Processing file..."): vector_store = load_and_process_file(uploaded_file) if vector_store: crc = initialize_chat_model(vector_store) st.session_state.crc = crc st.success("File processed successfully!") st.markdown("## Ask a Question") question = st.text_area("Enter your question", height=200, key="question_input") submit_button = st.button("Submit", key="submit_button") if submit_button and "crc" in st.session_state: handle_question(question) display_chat_history() def handle_question(question): """ Handles the user's question by generating a response and updating the chat history. """ crc = st.session_state.crc if "history" not in st.session_state: st.session_state["history"] = [] with st.spinner("Generating response..."): response = crc.run( { "question": question, "chat_history": st.session_state["history"], } ) st.session_state["history"].append((question, response)) st.write(response) def display_chat_history(): """ Displays the chat history in the Streamlit app. """ if "history" in st.session_state: st.markdown("## Chat History") for q, a in st.session_state["history"]: st.markdown(f"**Question:** {q}") st.write(a) st.write("---") def clear_history(): """ Clear the chat history stored in the session state. """ if "history" in st.session_state: del st.session_state["history"] if __name__ == "__main__": main()