import streamlit as st from dotenv import load_dotenv import pickle from PyPDF2 import PdfReader from streamlit_extras.add_vertical_space import add_vertical_space from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.llms import OpenAI from langchain.chains.question_answering import load_qa_chain from langchain.callbacks import get_openai_callback import os with st.sidebar: st.title('PDF Chat App') st.markdown(''' ## About This app is an LLM-powered PDF chatbot built using: - [Streamlit](https://streamlit.io/) - [LangChain](https://python.langchain.com/) - [OpenAI](https://platform.openai.com/docs/models) LLM model ## How it works - Load up a PDF file - Extract the text from the PDF file - Split the text into chunks - Create embeddings using OpenAI, which are vectors of floating-point numbers that measure the relatedness of text strings - Save these embeddings as vectors in a vector store, such as FAISS - Use a similarity search to ask a question - Get the answer and tokens used from OpenAI ''') st.write('Made with 🤖 by [Cazimir Roman](https://cazimir.dev)') def load_app(): # upload a PDF file pdf = st.file_uploader("Upload your PDF", type='pdf') if pdf is not None: pdf_reader = PdfReader(pdf) text = "" for page in pdf_reader.pages: text += page.extract_text() text_splitter = RecursiveCharacterTextSplitter( chunk_size = 1000, chunk_overlap=200, length_function=len ) chunks = text_splitter.split_text(text=text) store_name = pdf.name[:-4] # check if vector store exists. if not, create one if os.path.exists(f"{store_name}.pkl"): with open(f"{store_name}.pkl", "rb") as f: vectorStore = pickle.load(f) st.success('Text embeddings loaded from disk') else: with st.spinner("Creating vector store embeddings..."): embeddings = OpenAIEmbeddings() vectorStore = FAISS.from_texts(chunks, embeddings) with open(f"{store_name}.pkl", "wb") as f: pickle.dump(vectorStore, f) st.success('Embeddings computation completed') # Accept user question/query st.divider() query = st.text_input("Ask a question about your PDF file") if query: st.write(f"You asked: {query}") with st.spinner("Thinking..."): # top 3 that are most similar to our query docs = vectorStore.similarity_search(query) llm = OpenAI(temperature=0) chain = load_qa_chain(llm=llm, chain_type="stuff") with get_openai_callback() as cb: response = chain.run(input_documents=docs, question=query) st.write(response) def main(): print("Main called") st.header("Chat with your PDF") container = st.container() with container: open_ai_key = os.getenv("OPENAI_API_KEY") api_key = container.text_input("Enter your OpenAI API key", type="password", value="" if open_ai_key == None else open_ai_key) # You can find it here: https://platform.openai.com/account/api-keys submit = container.button("Submit") if open_ai_key: load_app() # submit button is pressed if submit: # check if api key length correct if len(api_key) == 51: os.environ["OPENAI_API_KEY"] = api_key load_app() else: st.error("Api key is not correct") if __name__ == '__main__': main()