Spaces:

StarbucksCN
/

demo

Runtime error

App Files Files Community

NickNYU commited on Jun 18, 2023

Commit

bb37df0

•

1 Parent(s): e013e04

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -54

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import streamlit as st
 import os
 import pickle
 from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.vectorstores import FAISS
@@ -29,65 +30,42 @@ with st.sidebar:
 def main():
     st.header("Chat with PDF 💬")
-    # upload a PDF file
-    pdf = st.file_uploader("Upload your PDF", type='pdf')
-    if pdf is not None:
-        pdf_reader = PdfReader(pdf)
-        text = ""
-        for page in pdf_reader.pages:
-            text += page.extract_text()
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=512,
             chunk_overlap=128,
             length_function=len
         )
-        chunks = text_splitter.split_text(text=text)
-        # # embeddings
-        store_name = pdf.name[:-4]
-        st.write(f'{store_name}')
-        if os.path.exists(f"{store_name}.pkl"):
-            with open(f"{store_name}.pkl", "rb") as f:
-                VectorStore = pickle.load(f)
-            st.write('Embeddings Loaded from the Disk')
-        else:
-            st.write('Embeddings calculate to the Pinecone')
-            embeddings = OpenAIEmbeddings()
-            VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
-            print(VectorStore)
-            with open(f"{store_name}.pkl", "wb") as f:
-                pickle.dump(VectorStore, f)
-        # PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', '894d5f1f-df46-4b01-8407-d9977eaee2eb')
-        # PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV',
-        #                                   'asia-southeast1-gcp-free')  # You may need to switch with your env
-        # embeddings = OpenAIEmbeddings()
-        # # initialize pinecone
-        # pinecone.init(
-        #     api_key=PINECONE_API_KEY,  # find at app.pinecone.io
-        #     environment=PINECONE_API_ENV  # next to api key in console
-        # )
-        # index_name = "indexer"  # put in the name of your pinecone index here
-        # VectorStore = Pinecone.from_texts(chunks, embeddings, index_name=index_name)
-        # Accept user questions/query
-        query = st.text_input("Ask questions about your PDF file:")
-        # st.write(query)
-        if query:
-            docs = VectorStore.similarity_search(query=query, k=3)
-            llm = OpenAI()
-            chain = load_qa_chain(llm=llm, chain_type="stuff")
-            with get_openai_callback() as cb:
-                response = chain.run(input_documents=docs, question=query)
-                print(cb)
-            st.write(response)
 if __name__ == '__main__':

 import os
 import pickle
 from PyPDF2 import PdfReader
+from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.vectorstores import FAISS
 def main():
     st.header("Chat with PDF 💬")
+    # # embeddings
+    store_name = "coffee"
+    if os.path.exists(f"{store_name}.pkl"):
+        with open(f"{store_name}.pkl", "rb") as f:
+            VectorStore = pickle.load(f)
+        st.write('Embeddings Loaded from the Disk')
+    else:
+        st.write('Reading from prompt ...')
+        loader = PyPDFLoader("./咖啡语料.pdf")
+        data = loader.load()
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=512,
             chunk_overlap=128,
             length_function=len
         )
+        texts = text_splitter.split_documents(data)
+        embeddings = OpenAIEmbeddings()
+        VectorStore = FAISS.from_texts([t.page_content for t in texts], embedding=embeddings)
+        with open(f"{store_name}.pkl", "wb") as f:
+            pickle.dump(VectorStore, f)
+    query = st.text_input("Ask questions about Starbucks coffee:")
+    if query:
+        docs = VectorStore.similarity_search(query=query, k=3)
+        llm = OpenAI()
+        chain = load_qa_chain(llm=llm, chain_type="stuff")
+        with get_openai_callback() as cb:
+            response = chain.run(input_documents=docs, question=query)
+            print(cb)
+        st.write(response)
 if __name__ == '__main__':