Spaces:

haiyiwu
/

ChatPDF

Sleeping

App Files Files Community

haiyiwu commited on May 26

Commit

2af041d

•

1 Parent(s): b7157b4

Delete chatpdf.py

Browse files

Files changed (1) hide show

chatpdf.py +0 -98

chatpdf.py DELETED Viewed

@@ -1,98 +0,0 @@
-import os
-import pickle
-import streamlit as st
-from streamlit_extras.add_vertical_space import add_vertical_space
-from PyPDF2 import PdfReader
-from openai.embeddings_utils import get_embedding
-import openai
-from dotenv import load_dotenv
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.vectorstores import FAISS
-from langchain.llms import OpenAI
-from langchain.chains.question_answering import load_qa_chain
-from langchain.callbacks import get_openai_callback
-# Sidebar contents
-with st.sidebar:
-    st.title('🤗LLM Chat App💬')
-    st.markdown('''
-    ## About
-    OpenAI based LLM-powered chatbot built using:
-    - [OpenAI](https://platform.openai.com/docs/models) LLM model
-    - [Streamlit](https://streamlit.io/)
-    - [LangChain](https://python.langchain.com/)
-    ''')
-    add_vertical_space(5)
-    st.write('Made with ❤️ by Harry')
-# Load environment variables
-# load_dotenv()
-# # Retrieve OpenAI API key
-# openai_api_key = os.getenv("OPENAI_API_KEY")
-# if openai_api_key is None:
-#     raise ValueError("The OPENAI_API_KEY environment variable is not set")
-# # Set the OpenAI API key for the OpenAI library
-# openai.api_key = openai_api_key
-def extract_text_from_pdf(pdf):
-    pdf_reader = PdfReader(pdf)
-    text = ""
-    for page in pdf_reader.pages:
-        text += page.extract_text()
-    return text
-def get_embeddings(text_list):
-    return [get_embedding(text) for text in text_list]
-def main():
-    st.header("Chat with PDF 💬")
-    # Upload a PDF file
-    pdf = st.file_uploader("Upload your PDF file", type='pdf')
-    if pdf is not None:
-        # Extract text from the PDF
-        text = extract_text_from_pdf(pdf)
-        # Split text into chunks
-        text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=1000,
-            chunk_overlap=200,
-            length_function=len
-            )
-        chunks = text_splitter.split_text(text=text)
-        # chunks data with langchain
-        #chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size - chunk_overlap)]
-        st.write("PDF content successfully extracted.")
-        #st.write("Below is chunks data")
-        #st.write(chunks)
-        # Create or load embeddings
-        store_name = pdf.name[:-4]
-        st.write(f'Processing: {store_name}')
-        if os.path.exists(f"{store_name}.pkl"):
-            with open(f"{store_name}.pkl", "rb") as f:
-                VectorStore = pickle.load(f)
-            st.write('Embeddings loaded from the disk')
-        else:
-            embeddings = OpenAIEmbeddings()
-            VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
-            with open(f"{store_name}.pkl", "wb") as f:
-                pickle.dump(VectorStore, f)
-            st.write('Embeddings created and saved to disk')
-        # Accept user questions/query
-        query = st.text_input("Ask questions about your PDF file:")
-        if query:
-            docs = VectorStore.similarity_search(query=query, k=3)
-            llm = OpenAI(model_name="gpt-3.5-turbo")
-            chain = load_qa_chain(llm=llm, chain_type="stuff")
-            with get_openai_callback() as cb:
-                response = chain.run(input_documents=docs, question=query)
-                print(cb)
-            st.write(response)
-if __name__ == '__main__':
-    main()