Stanlito commited on
Commit
0d14198
1 Parent(s): cba36ea
Files changed (3) hide show
  1. app.py +73 -0
  2. readme.md +37 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import streamlit as st
3
+ import os
4
+ from PyPDF2 import PdfReader
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from langchain.llms import OpenAI
10
+ from langchain.callbacks import get_openai_callback
11
+ from streamlit_chat import message
12
+
13
+ os.environ["OPENAI_API_KEY"] = "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"
14
+
15
+
16
+ def main():
17
+ load_dotenv()
18
+ st.header(" LLM CHATBOT ON PFD FILES")
19
+ st.sidebar.header("Instructions")
20
+ st.sidebar.info(
21
+ '''This is a web application that allows you to interact with
22
+ your PDF Files
23
+ '''
24
+ )
25
+ st.sidebar.info('''Enter a query in the text box and press enter
26
+ to receive a response''')
27
+
28
+ st.sidebar.info('''
29
+ This project works perfectly even on your own data
30
+ ''')
31
+ # st.set_page_config(page_title="Ask your PDF")
32
+ st.header("Ask your PDF files some questions 💬")
33
+
34
+ # upload file
35
+ pdf = st.file_uploader("Upload your PDF File Below", type="pdf")
36
+
37
+ # extract the text
38
+ if pdf is not None:
39
+ pdf_reader = PdfReader(pdf)
40
+ text = ""
41
+ for page in pdf_reader.pages:
42
+ text += page.extract_text()
43
+
44
+ # split into chunks
45
+ text_splitter = CharacterTextSplitter(
46
+ separator="\n",
47
+ chunk_size=1000,
48
+ chunk_overlap=200,
49
+ length_function=len
50
+ )
51
+ chunks = text_splitter.split_text(text)
52
+
53
+ # create embeddings
54
+ embeddings = OpenAIEmbeddings()
55
+ knowledge_base = FAISS.from_texts(chunks, embeddings)
56
+
57
+ # show user input
58
+ user_question = st.text_input("Ask a question about your PDF:")
59
+ if user_question:
60
+ docs = knowledge_base.similarity_search(user_question)
61
+
62
+ llm = OpenAI()
63
+ chain = load_qa_chain(llm, chain_type="stuff")
64
+ with get_openai_callback() as cb:
65
+ response = chain.run(input_documents=docs, question=user_question)
66
+ print(cb)
67
+
68
+ # st.write(response)
69
+ message(response)
70
+
71
+
72
+ if __name__ == '__main__':
73
+ main()
readme.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Langchain Ask PDF (Tutorial)
2
+
3
+ >You may find the step-by-step video tutorial to build this application [on Youtube](https://youtu.be/wUAUdEw5oxM).
4
+
5
+ This is a Python application that allows you to load a PDF and ask questions about it using natural language. The application uses a LLM to generate a response about your PDF. The LLM will not answer questions unrelated to the document.
6
+
7
+ ## How it works
8
+
9
+ The application reads the PDF and splits the text into smaller chunks that can be then fed into a LLM. It uses OpenAI embeddings to create vector representations of the chunks. The application then finds the chunks that are semantically similar to the question that the user asked and feeds those chunks to the LLM to generate a response.
10
+
11
+ The application uses Streamlit to create the GUI and Langchain to deal with the LLM.
12
+
13
+
14
+ ## Installation
15
+
16
+ To install the repository, please clone this repository and install the requirements:
17
+
18
+ ```
19
+ pip install -r requirements.txt
20
+ ```
21
+
22
+ You will also need to add your OpenAI API key to the `.env` file.
23
+
24
+ ## Usage
25
+
26
+ To use the application, run the `main.py` file with the streamlit CLI (after having installed streamlit):
27
+
28
+ ```
29
+ streamlit run app.py
30
+ ```
31
+
32
+
33
+ ## Contributing
34
+
35
+ This repository is for educational purposes only and is not intended to receive further contributions. It is supposed to be used as support material for the YouTube tutorial that shows how to build the project.
36
+
37
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain==0.0.154
2
+ PyPDF2==3.0.1
3
+ python-dotenv==1.0.0
4
+ streamlit==1.18.1
5
+ faiss-cpu==1.7.4
6
+ altair<5