Alex5666 commited on
Commit
80d0cdb
1 Parent(s): 1dd04c5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import required modules from 'langchain' for document processing, embeddings, Q&A, etc.
2
+ from langchain.document_loaders import PyPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.embeddings import OpenAIEmbeddings
6
+ from langchain.chat_models import ChatOpenAI
7
+ from langchain.chains import RetrievalQA
8
+
9
+ # Importing Streamlit for creating the web app, and other necessary modules for file handling.
10
+ import streamlit as st
11
+ import tempfile
12
+ import os
13
+
14
+ # Import a handler for streaming outputs.
15
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
16
+
17
+ # Set the title of the Streamlit web application.
18
+ st.title("ChatPDF")
19
+ # Create a horizontal line for better visual separation in the app.
20
+ st.write("---")
21
+
22
+ # Provide an input box for users to enter their OpenAI API key.
23
+ openai_key = st.text_input('Enter OPEN_AI_API_KEY', type="password")
24
+
25
+ # Provide a file upload widget to let users upload their PDF files.
26
+ uploaded_file = st.file_uploader("Upload your PDF file!", type=['pdf'])
27
+ # Another visual separation after the file uploader.
28
+ st.write("---")
29
+
30
+ # Define a function that converts the uploaded PDF into a document format.
31
+ def pdf_to_document(uploaded_file):
32
+ # Create a temporary directory to store the uploaded PDF file temporarily.
33
+ temp_dir = tempfile.TemporaryDirectory()
34
+ # Join the directory path with the uploaded file name to get the complete path.
35
+ temp_filepath = os.path.join(temp_dir.name, uploaded_file.name)
36
+
37
+ # Write the content of the uploaded file into the temporary file path.
38
+ with open(temp_filepath, "wb") as f:
39
+ f.write(uploaded_file.getvalue())
40
+
41
+ # Use PyPDFLoader to read and split the PDF into individual pages.
42
+ loader = PyPDFLoader(temp_filepath)
43
+ pages = loader.load_and_split()
44
+ return pages
45
+
46
+ # Check if a file has been uploaded by the user.
47
+ if uploaded_file is not None:
48
+ # Convert the uploaded PDF into a document format.
49
+ pages = pdf_to_document(uploaded_file)
50
+
51
+ # Initialize a text splitter to break the document into smaller chunks.
52
+ text_splitter = RecursiveCharacterTextSplitter(
53
+ # Define parameters for the splitter: chunk size, overlap, etc.
54
+ chunk_size = 300,
55
+ chunk_overlap = 20,
56
+ length_function = len
57
+ )
58
+ # Split the document pages into chunks.
59
+ texts = text_splitter.split_documents(pages)
60
+
61
+ # Initialize the OpenAIEmbeddings model for creating embeddings from texts using the provided API key.
62
+ embeddings_model = OpenAIEmbeddings(openai_api_key=openai_key)
63
+
64
+ # Load the textual chunks into Chroma after creating embeddings.
65
+ db = Chroma.from_documents(texts, embeddings_model)
66
+
67
+ # Define a custom handler to stream outputs to the Streamlit app.
68
+ from langchain.callbacks.base import BaseCallbackHandler
69
+ class StreamHandler(BaseCallbackHandler):
70
+ def __init__(self, container, initial_text=""):
71
+ self.container = container
72
+ self.text=initial_text
73
+ def on_llm_new_token(self, token: str, **kwargs) -> None:
74
+ self.text+=token
75
+ self.container.markdown(self.text)
76
+
77
+ # Display a header for the question section of the web app.
78
+ st.header("Ask the PDF a question!")
79
+ # Provide an input box for users to type in their questions.
80
+ question = st.text_input('Type your question')
81
+
82
+ # Check if the user has clicked on the 'Ask' button.
83
+ if st.button('Ask'):
84
+ # Show a spinner animation while processing the user's question.
85
+ with st.spinner('Processing...'):
86
+ # Create a space to display the answer.
87
+ chat_box = st.empty()
88
+ # Initialize a handler to stream outputs.
89
+ stream_hander = StreamHandler(chat_box)
90
+ # Initialize the ChatOpenAI model for Q&A tasks with streaming enabled.
91
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key=openai_key, streaming=True, callbacks=[stream_hander])
92
+ # Create a RetrievalQA chain that uses the ChatOpenAI model and Chroma retriever to answer the question.
93
+ qa_chain = RetrievalQA.from_chain_type(llm, retriever=db.as_retriever())
94
+ # Fetch the answer to the user's question.
95
+ qa_chain({"query": question})