Spaces:

CazimirRoman
/

ChatWithYourPdfWithoutOpenAi

Runtime error

App Files Files Community

Cazimir Roman commited on Jun 11, 2023

Commit

4171f86

•

1 Parent(s): 77be341

initial commit

Browse files

Files changed (2) hide show

app.py +111 -0
requirements.txt +11 -0

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import streamlit as st
+from dotenv import load_dotenv
+import pickle
+from PyPDF2 import PdfReader
+from streamlit_extras.add_vertical_space import add_vertical_space
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain import HuggingFaceHub
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.chains.question_answering import load_qa_chain
+import os
+with st.sidebar:
+    st.title('PDF Chat App')
+    st.markdown('''
+    ## About
+    This app is an LLM-powered PDF chatbot built using:
+    - [Streamlit](https://streamlit.io/)
+    - [LangChain](https://python.langchain.com/)
+    - [OpenAI](https://platform.openai.com/docs/models) LLM model
+    ## How it works
+    - Load up a PDF file
+    - Extract the text from the PDF file
+    - Split the text into chunks
+    - Create embeddings using OpenAI, which are vectors of floating-point numbers that measure the relatedness of text strings
+    - Save these embeddings as vectors in a vector store, such as FAISS
+    - Use a similarity search to ask a question
+    - Get the answer and tokens used from OpenAI
+    ''')
+    st.write('Made with 🤖 by [Cazimir Roman](https://cazimir.dev)')
+def load_app():
+    llm = HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512})
+    # upload a PDF file
+    pdf = st.file_uploader("Upload your PDF", type='pdf')
+    if pdf is not None:
+        pdf_reader = PdfReader(pdf)
+        text = ""
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size = 1000,
+            chunk_overlap=200,
+            length_function=len
+        )
+        chunks = text_splitter.split_text(text=text)
+        store_name = pdf.name[:-4]
+        # check if vector store exists. if not, create one
+        if os.path.exists(f"{store_name}.pkl"):
+            with open(f"{store_name}.pkl", "rb") as f:
+                vectorStore = pickle.load(f)
+            st.success('Text embeddings loaded from disk')
+        else:
+            embeddings = HuggingFaceEmbeddings()
+            with st.spinner(f"Creating vector store embeddings..."):
+                vectorStore = FAISS.from_texts(chunks, embeddings)
+                with open(f"{store_name}.pkl", "wb") as f:
+                    pickle.dump(vectorStore, f)
+                st.success('Embeddings computation completed')
+        # Accept user question/query
+        # st.divider()
+        query = st.text_input("Ask a question about your PDF file")
+        if query:
+            st.write(f"You asked: {query}")
+            with st.spinner("Thinking..."):
+                # top 3 that are most similar to our query
+                docs = vectorStore.similarity_search(query)
+                chain = load_qa_chain(llm=llm, chain_type="stuff")
+                response = chain.run(input_documents=docs, question=query)
+                st.write(response)
+def main():
+    print("Main called")
+    st.header("Chat with your PDF")
+    container = st.container()
+    with container:
+        hugging_face_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+        api_key = container.text_input("Enter your HuggingFace API token", type="password", value="" if hugging_face_token == None else hugging_face_token)
+        # You can find it here: https://platform.openai.com/account/api-keys
+        submit = container.button("Submit")
+        if hugging_face_token:
+            load_app()
+        # submit button is pressed
+        if submit:
+            # check if api key length correct
+                if len(api_key) == 37:
+                    os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
+                    load_app()
+                else:
+                    st.error("Api key is not correct")
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+langchain==0.0.137
+PyPDF2
+python-dotenv
+streamlit==1.22.0
+faiss-cpu
+streamlit-extras
+openai
+altair<5
+tiktoken
+huggingface_hub
+sentence_transformers