haiyiwu commited on
Commit
2af041d
1 Parent(s): b7157b4

Delete chatpdf.py

Browse files
Files changed (1) hide show
  1. chatpdf.py +0 -98
chatpdf.py DELETED
@@ -1,98 +0,0 @@
1
- import os
2
- import pickle
3
- import streamlit as st
4
- from streamlit_extras.add_vertical_space import add_vertical_space
5
- from PyPDF2 import PdfReader
6
- from openai.embeddings_utils import get_embedding
7
- import openai
8
- from dotenv import load_dotenv
9
- from langchain.text_splitter import RecursiveCharacterTextSplitter
10
- from langchain.embeddings.openai import OpenAIEmbeddings
11
- from langchain.vectorstores import FAISS
12
- from langchain.llms import OpenAI
13
- from langchain.chains.question_answering import load_qa_chain
14
- from langchain.callbacks import get_openai_callback
15
- # Sidebar contents
16
- with st.sidebar:
17
- st.title('🤗LLM Chat App💬')
18
- st.markdown('''
19
- ## About
20
- OpenAI based LLM-powered chatbot built using:
21
- - [OpenAI](https://platform.openai.com/docs/models) LLM model
22
- - [Streamlit](https://streamlit.io/)
23
- - [LangChain](https://python.langchain.com/)
24
- ''')
25
- add_vertical_space(5)
26
- st.write('Made with ❤️ by Harry')
27
-
28
-
29
- # Load environment variables
30
- # load_dotenv()
31
-
32
- # # Retrieve OpenAI API key
33
- # openai_api_key = os.getenv("OPENAI_API_KEY")
34
- # if openai_api_key is None:
35
- # raise ValueError("The OPENAI_API_KEY environment variable is not set")
36
-
37
- # # Set the OpenAI API key for the OpenAI library
38
- # openai.api_key = openai_api_key
39
-
40
- def extract_text_from_pdf(pdf):
41
- pdf_reader = PdfReader(pdf)
42
- text = ""
43
- for page in pdf_reader.pages:
44
- text += page.extract_text()
45
- return text
46
- def get_embeddings(text_list):
47
- return [get_embedding(text) for text in text_list]
48
- def main():
49
- st.header("Chat with PDF 💬")
50
- # Upload a PDF file
51
- pdf = st.file_uploader("Upload your PDF file", type='pdf')
52
-
53
- if pdf is not None:
54
- # Extract text from the PDF
55
-
56
- text = extract_text_from_pdf(pdf)
57
- # Split text into chunks
58
- text_splitter = RecursiveCharacterTextSplitter(
59
- chunk_size=1000,
60
- chunk_overlap=200,
61
- length_function=len
62
- )
63
- chunks = text_splitter.split_text(text=text)
64
- # chunks data with langchain
65
- #chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size - chunk_overlap)]
66
- st.write("PDF content successfully extracted.")
67
- #st.write("Below is chunks data")
68
- #st.write(chunks)
69
-
70
- # Create or load embeddings
71
- store_name = pdf.name[:-4]
72
- st.write(f'Processing: {store_name}')
73
-
74
- if os.path.exists(f"{store_name}.pkl"):
75
- with open(f"{store_name}.pkl", "rb") as f:
76
- VectorStore = pickle.load(f)
77
- st.write('Embeddings loaded from the disk')
78
- else:
79
- embeddings = OpenAIEmbeddings()
80
- VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
81
- with open(f"{store_name}.pkl", "wb") as f:
82
- pickle.dump(VectorStore, f)
83
- st.write('Embeddings created and saved to disk')
84
-
85
- # Accept user questions/query
86
- query = st.text_input("Ask questions about your PDF file:")
87
-
88
- if query:
89
- docs = VectorStore.similarity_search(query=query, k=3)
90
-
91
- llm = OpenAI(model_name="gpt-3.5-turbo")
92
- chain = load_qa_chain(llm=llm, chain_type="stuff")
93
- with get_openai_callback() as cb:
94
- response = chain.run(input_documents=docs, question=query)
95
- print(cb)
96
- st.write(response)
97
- if __name__ == '__main__':
98
- main()