NickNYU commited on
Commit
a52eecd
1 Parent(s): b154fbc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ # import pickle
4
+ from PyPDF2 import PdfReader
5
+ # from langchain import FAISS
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.embeddings.openai import OpenAIEmbeddings
8
+
9
+ from langchain.vectorstores import Pinecone
10
+ import pinecone
11
+ from langchain.llms import OpenAI
12
+ from langchain.chains.question_answering import load_qa_chain
13
+ from langchain.callbacks import get_openai_callback
14
+
15
+ # Sidebar contents
16
+ with st.sidebar:
17
+ st.title('🤗💬 LLM Chat App')
18
+ st.markdown('''
19
+ ## About
20
+ This app is an LLM-powered chatbot built using:
21
+ - [Streamlit](https://streamlit.io/)
22
+ - [LangChain](https://python.langchain.com/)
23
+ - [OpenAI](https://platform.openai.com/docs/models) LLM model
24
+
25
+ ''')
26
+ # add_vertical_space(5)
27
+ st.write('Made by Nick')
28
+
29
+
30
+ def main():
31
+ st.header("Chat with PDF 💬")
32
+
33
+ # upload a PDF file
34
+ pdf = st.file_uploader("Upload your PDF", type='pdf')
35
+
36
+ if pdf is not None:
37
+ pdf_reader = PdfReader(pdf)
38
+
39
+ text = ""
40
+ for page in pdf_reader.pages:
41
+ text += page.extract_text()
42
+
43
+ text_splitter = RecursiveCharacterTextSplitter(
44
+ chunk_size=512,
45
+ chunk_overlap=128,
46
+ length_function=len
47
+ )
48
+ chunks = text_splitter.split_text(text=text)
49
+
50
+ # # embeddings
51
+ store_name = pdf.name[:-4]
52
+ st.write(f'{store_name}')
53
+
54
+ # if os.path.exists(f"{store_name}.pkl"):
55
+ # with open(f"{store_name}.pkl", "rb") as f:
56
+ # VectorStore = pickle.load(f)
57
+ # st.write('Embeddings Loaded from the Disk')
58
+ # else:
59
+ # st.write('Embeddings calculate to the Pinecone')
60
+ # embeddings = OpenAIEmbeddings()
61
+ # VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
62
+ # with open(f"{store_name}.pkl", "wb") as f:
63
+ # pickle.dump(VectorStore, f)
64
+
65
+ PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', '894d5f1f-df46-4b01-8407-d9977eaee2eb')
66
+ PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV',
67
+ 'asia-southeast1-gcp-free') # You may need to switch with your env
68
+ embeddings = OpenAIEmbeddings()
69
+ # initialize pinecone
70
+ pinecone.init(
71
+ api_key=PINECONE_API_KEY, # find at app.pinecone.io
72
+ environment=PINECONE_API_ENV # next to api key in console
73
+ )
74
+ index_name = "indexer" # put in the name of your pinecone index here
75
+ VectorStore = Pinecone.from_texts([t.page_content for t in chunks], embeddings, index_name=index_name)
76
+
77
+ # Accept user questions/query
78
+ query = st.text_input("Ask questions about your PDF file:")
79
+ # st.write(query)
80
+
81
+ if query:
82
+ docs = VectorStore.similarity_search(query=query, k=3)
83
+
84
+ llm = OpenAI()
85
+ chain = load_qa_chain(llm=llm, chain_type="stuff")
86
+ with get_openai_callback() as cb:
87
+ response = chain.run(input_documents=docs, question=query)
88
+ print(cb)
89
+ st.write(response)
90
+
91
+
92
+ if __name__ == '__main__':
93
+ main()