haiyiwu commited on
Commit
cc3e071
1 Parent(s): 2af041d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import streamlit as st
4
+ from streamlit_extras.add_vertical_space import add_vertical_space
5
+ from PyPDF2 import PdfReader
6
+ from openai.embeddings_utils import get_embedding
7
+ import openai
8
+ from dotenv import load_dotenv
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.embeddings.openai import OpenAIEmbeddings
11
+ from langchain.vectorstores import FAISS
12
+ from langchain.llms import OpenAI
13
+ from langchain.chains.question_answering import load_qa_chain
14
+ from langchain.callbacks import get_openai_callback
15
+ # Sidebar contents
16
+ with st.sidebar:
17
+ st.title('🤗LLM Chat App💬')
18
+ st.markdown('''
19
+ ## About
20
+ OpenAI based LLM-powered chatbot built using:
21
+ - [OpenAI](https://platform.openai.com/docs/models) LLM model
22
+ - [Streamlit](https://streamlit.io/)
23
+ - [LangChain](https://python.langchain.com/)
24
+ ''')
25
+ add_vertical_space(5)
26
+ st.write('Made with ❤️ by Harry')
27
+
28
+
29
+ # Load environment variables
30
+ # load_dotenv()
31
+
32
+ # # Retrieve OpenAI API key
33
+ # openai_api_key = os.getenv("OPENAI_API_KEY")
34
+ # if openai_api_key is None:
35
+ # raise ValueError("The OPENAI_API_KEY environment variable is not set")
36
+
37
+ # # Set the OpenAI API key for the OpenAI library
38
+ # openai.api_key = openai_api_key
39
+
40
+ def extract_text_from_pdf(pdf):
41
+ pdf_reader = PdfReader(pdf)
42
+ text = ""
43
+ for page in pdf_reader.pages:
44
+ text += page.extract_text()
45
+ return text
46
+ def get_embeddings(text_list):
47
+ return [get_embedding(text) for text in text_list]
48
+ def main():
49
+ st.header("Chat with PDF 💬")
50
+ # Upload a PDF file
51
+ pdf = st.file_uploader("Upload your PDF file", type='pdf')
52
+
53
+ if pdf is not None:
54
+ # Extract text from the PDF
55
+
56
+ text = extract_text_from_pdf(pdf)
57
+ # Split text into chunks
58
+ text_splitter = RecursiveCharacterTextSplitter(
59
+ chunk_size=1000,
60
+ chunk_overlap=200,
61
+ length_function=len
62
+ )
63
+ chunks = text_splitter.split_text(text=text)
64
+ # chunks data with langchain
65
+ #chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size - chunk_overlap)]
66
+ st.write("PDF content successfully extracted.")
67
+ #st.write("Below is chunks data")
68
+ #st.write(chunks)
69
+
70
+ # Create or load embeddings
71
+ store_name = pdf.name[:-4]
72
+ st.write(f'Processing: {store_name}')
73
+
74
+ if os.path.exists(f"{store_name}.pkl"):
75
+ with open(f"{store_name}.pkl", "rb") as f:
76
+ VectorStore = pickle.load(f)
77
+ st.write('Embeddings loaded from the disk')
78
+ else:
79
+ embeddings = OpenAIEmbeddings()
80
+ VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
81
+ with open(f"{store_name}.pkl", "wb") as f:
82
+ pickle.dump(VectorStore, f)
83
+ st.write('Embeddings created and saved to disk')
84
+
85
+ # Accept user questions/query
86
+ query = st.text_input("Ask questions about your PDF file:")
87
+
88
+ if query:
89
+ docs = VectorStore.similarity_search(query=query, k=3)
90
+
91
+ llm = OpenAI(model_name="gpt-3.5-turbo")
92
+ chain = load_qa_chain(llm=llm, chain_type="stuff")
93
+ with get_openai_callback() as cb:
94
+ response = chain.run(input_documents=docs, question=query)
95
+ print(cb)
96
+ st.write(response)
97
+ if __name__ == '__main__':
98
+ main()