steve zhuang commited on
Commit
6dcde1d
1 Parent(s): 8b31aab

added three files

Browse files
Files changed (3) hide show
  1. .env +1 -0
  2. app.py +56 -0
  3. requirements.txt +4 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY=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
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from dotenv import load_dotenv
3
+ import streamlit as st
4
+ from PyPDF2 import PdfReader
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from langchain.llms import OpenAI
10
+ from langchain.callbacks import get_openai_callback
11
+
12
+
13
+ def main():
14
+ load_dotenv()
15
+ st.set_page_config(page_title="Ask your PDF")
16
+ st.header("Ask your PDF 💬")
17
+
18
+ # upload file
19
+ pdf = st.file_uploader("Upload your PDF", type="pdf")
20
+
21
+ # extract the text
22
+ if pdf is not None:
23
+ pdf_reader = PdfReader(pdf)
24
+ text = ""
25
+ for page in pdf_reader.pages:
26
+ text += page.extract_text()
27
+
28
+ # split into chunks
29
+ text_splitter = CharacterTextSplitter(
30
+ separator="\n",
31
+ chunk_size=1000,
32
+ chunk_overlap=200,
33
+ length_function=len
34
+ )
35
+ chunks = text_splitter.split_text(text)
36
+
37
+ # create embeddings
38
+ embeddings = OpenAIEmbeddings()
39
+ knowledge_base = FAISS.from_texts(chunks, embeddings)
40
+
41
+ # show user input
42
+ user_question = st.text_input("Ask a question about your PDF:")
43
+ if user_question:
44
+ docs = knowledge_base.similarity_search(user_question)
45
+
46
+ llm = OpenAI()
47
+ chain = load_qa_chain(llm, chain_type="stuff")
48
+ with get_openai_callback() as cb:
49
+ response = chain.run(input_documents=docs, question=user_question)
50
+ print(cb)
51
+
52
+ st.write(response)
53
+
54
+
55
+ if __name__ == '__main__':
56
+ main()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ langchain==0.0.154
2
+ PyPDF2==3.0.1
3
+ python-dotenv==1.0.0
4
+ streamlit==1.18.1