Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- .gitattributes +1 -0
- Mind is your Business.pdf +0 -0
- Mind is your Business.pkl +3 -0
- app.py +71 -0
- healthy-recipes.pdf +3 -0
- notebook.ipynb +0 -0
- requirements.txt +8 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
healthy-recipes.pdf filter=lfs diff=lfs merge=lfs -text
|
Mind is your Business.pdf
ADDED
Binary file (766 kB). View file
|
|
Mind is your Business.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
3 |
+
size 0
|
app.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Import Libraries
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import pickle
|
6 |
+
from PyPDF2 import PdfReader
|
7 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
9 |
+
from langchain.vectorstores import FAISS
|
10 |
+
from langchain.llms import OpenAI
|
11 |
+
from langchain.chains.question_answering import load_qa_chain
|
12 |
+
from langchain.callbacks import get_openai_callback
|
13 |
+
import os
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
## Reading the PDF
|
17 |
+
|
18 |
+
st.header("Chat with your PDF 💬")
|
19 |
+
|
20 |
+
pdf = st.file_uploader("Upload your PDF", type='pdf') # upload a PDF file
|
21 |
+
if pdf is not None:
|
22 |
+
pdf_reader = PdfReader(pdf) # read the pdf file
|
23 |
+
|
24 |
+
text = "" # collect all text data in this variable
|
25 |
+
for page in pdf_reader.pages:
|
26 |
+
text += page.extract_text()
|
27 |
+
|
28 |
+
#st.write(text)
|
29 |
+
|
30 |
+
## Forming chunks of data
|
31 |
+
|
32 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
33 |
+
chunk_size=1000, # 1000 tokens in each chunk
|
34 |
+
chunk_overlap=200, # 2oo tokens will have overlap in consecutive chunks
|
35 |
+
length_function=len
|
36 |
+
)
|
37 |
+
|
38 |
+
chunks = text_splitter.split_text(text=text) # forming and collecting chunks here
|
39 |
+
# st.write(chunks)
|
40 |
+
|
41 |
+
## Create Embeddings of each chunk of data and store them in the Vector DB
|
42 |
+
|
43 |
+
store_name = pdf.name[:-4] # extract the pdf name
|
44 |
+
embeddings = OpenAIEmbeddings(openai_api_key = os.environ["OpenAI_API_KEY"]) # using OpenAI to create embeddings
|
45 |
+
|
46 |
+
if os.path.exists(f"{store_name}"): # if already the vector db is present then load it
|
47 |
+
#path = f"{store_name}\index.pkl"
|
48 |
+
VectorStore = FAISS.load_local(f"{store_name}",embeddings,allow_dangerous_deserialization=True)
|
49 |
+
|
50 |
+
st.write('Vector Database already exists.')
|
51 |
+
|
52 |
+
else:
|
53 |
+
VectorStore = FAISS.from_texts(chunks, embedding=embeddings) # providing the input chunks to create embeddings
|
54 |
+
|
55 |
+
VectorStore.save_local(f"{store_name}")
|
56 |
+
st.write('Creating new embeddings.')
|
57 |
+
|
58 |
+
## Accepting query from user
|
59 |
+
|
60 |
+
query = st.text_input("Ask questions about your PDF file:")
|
61 |
+
#st.write(query)
|
62 |
+
|
63 |
+
if query:
|
64 |
+
docs = VectorStore.similarity_search(query=query, k=3)
|
65 |
+
|
66 |
+
llm = OpenAI()
|
67 |
+
chain = load_qa_chain(llm=llm, chain_type="stuff")
|
68 |
+
with get_openai_callback() as cb:
|
69 |
+
response = chain.run(input_documents=docs, question=query)
|
70 |
+
print(cb)
|
71 |
+
st.success(response)
|
healthy-recipes.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a506537e14017aef4761e84ceb212f707484170ae7c493b9d7431136a62f83a
|
3 |
+
size 3690108
|
notebook.ipynb
ADDED
File without changes
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
PyPDF2
|
3 |
+
python-dotenv
|
4 |
+
streamlit
|
5 |
+
faiss-cpu
|
6 |
+
streamlit-extras
|
7 |
+
openai
|
8 |
+
tiktoken
|