Spaces:

Stanlito
/

QandA-on-custom-PDF

Sleeping

App Files Files Community

Stanlito commited on Jul 21, 2023

Commit

0d14198

•

1 Parent(s): cba36ea

uploaded

Browse files

Files changed (3) hide show

app.py +73 -0
readme.md +37 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from dotenv import load_dotenv
+import streamlit as st
+import os
+from PyPDF2 import PdfReader
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.chains.question_answering import load_qa_chain
+from langchain.llms import OpenAI
+from langchain.callbacks import get_openai_callback
+from streamlit_chat import message
+os.environ["OPENAI_API_KEY"] = "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"
+def main():
+    load_dotenv()
+    st.header(" LLM CHATBOT ON PFD FILES")
+    st.sidebar.header("Instructions")
+    st.sidebar.info(
+        '''This is a web application that allows you to interact with
+        your PDF Files
+        '''
+    )
+    st.sidebar.info('''Enter a query in the text box and press enter
+        to receive a response''')
+    st.sidebar.info('''
+    This project works perfectly even on your own data
+    ''')
+    # st.set_page_config(page_title="Ask your PDF")
+    st.header("Ask your PDF files some questions 💬")
+    # upload file
+    pdf = st.file_uploader("Upload your PDF File Below", type="pdf")
+    # extract the text
+    if pdf is not None:
+        pdf_reader = PdfReader(pdf)
+        text = ""
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+        # split into chunks
+        text_splitter = CharacterTextSplitter(
+            separator="\n",
+            chunk_size=1000,
+            chunk_overlap=200,
+            length_function=len
+        )
+        chunks = text_splitter.split_text(text)
+        # create embeddings
+        embeddings = OpenAIEmbeddings()
+        knowledge_base = FAISS.from_texts(chunks, embeddings)
+        # show user input
+        user_question = st.text_input("Ask a question about your PDF:")
+        if user_question:
+            docs = knowledge_base.similarity_search(user_question)
+            llm = OpenAI()
+            chain = load_qa_chain(llm, chain_type="stuff")
+            with get_openai_callback() as cb:
+                response = chain.run(input_documents=docs, question=user_question)
+                print(cb)
+            # st.write(response)
+            message(response)
+if __name__ == '__main__':
+    main()

readme.md ADDED Viewed

	@@ -0,0 +1,37 @@

+# Langchain Ask PDF (Tutorial)
+>You may find the step-by-step video tutorial to build this application [on Youtube](https://youtu.be/wUAUdEw5oxM).
+This is a Python application that allows you to load a PDF and ask questions about it using natural language. The application uses a LLM to generate a response about your PDF. The LLM will not answer questions unrelated to the document.
+## How it works
+The application reads the PDF and splits the text into smaller chunks that can be then fed into a LLM. It uses OpenAI embeddings to create vector representations of the chunks. The application then finds the chunks that are semantically similar to the question that the user asked and feeds those chunks to the LLM to generate a response.
+The application uses Streamlit to create the GUI and Langchain to deal with the LLM.
+## Installation
+To install the repository, please clone this repository and install the requirements:
+```
+pip install -r requirements.txt
+```
+You will also need to add your OpenAI API key to the `.env` file.
+## Usage
+To use the application, run the `main.py` file with the streamlit CLI (after having installed streamlit):
+```
+streamlit run app.py
+```
+## Contributing
+This repository is for educational purposes only and is not intended to receive further contributions. It is supposed to be used as support material for the YouTube tutorial that shows how to build the project.

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+langchain==0.0.154
+PyPDF2==3.0.1
+python-dotenv==1.0.0
+streamlit==1.18.1
+faiss-cpu==1.7.4
+altair<5