Madhumitha19 commited on
Commit
284841b
·
verified ·
1 Parent(s): 7ab0913

update error handling

Browse files
Files changed (1) hide show
  1. app.py +79 -76
app.py CHANGED
@@ -1,76 +1,79 @@
1
- import streamlit as st
2
- import os
3
- from dotenv import load_dotenv
4
- from PyPDF2 import PdfReader
5
- from langchain.text_splitter import CharacterTextSplitter
6
- from langchain_community.embeddings import OpenAIEmbeddings
7
- from langchain_community.vectorstores import FAISS
8
- from langchain.memory import ConversationBufferMemory
9
- from langchain.chains import ConversationalRetrievalChain
10
- from langchain.chat_models import ChatOpenAI
11
-
12
- def read_pdf(pdf):
13
- text = ""
14
- pdf_reader = PdfReader(pdf)
15
- for page in pdf_reader.pages:
16
- text += page.extract_text()
17
- return text
18
-
19
- def get_chunk_data(text):
20
- text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=250, length_function=len)
21
- chunks = text_splitter.split_text(text)
22
- return chunks
23
-
24
- def get_vector_store(text):
25
- api_key = os.getenv("OPENAI_API_KEY")
26
- embeddings = OpenAIEmbeddings(openai_api_key=api_key)
27
- vectorstore = FAISS.from_texts(texts=text, embedding=embeddings)
28
- return vectorstore
29
-
30
- def get_conversation(vectorstore):
31
- api_key = os.getenv("OPENAI_API_KEY")
32
- llm = ChatOpenAI(openai_api_key=api_key)
33
- memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
34
- conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
35
- return conversation_chain
36
-
37
- def handleInput(user_text, conversation_chain):
38
- res = conversation_chain({'question': user_text})
39
- chat_history = res['chat_history']
40
- ans = res['answer']
41
- st.write(ans)
42
-
43
- def main():
44
- load_dotenv()
45
-
46
- st.set_page_config(page_title="Chat with PDF")
47
-
48
- if "conversation" not in st.session_state:
49
- st.session_state.conversation = None
50
- if "chat_history" not in st.session_state:
51
- st.session_state.chat_history = None
52
-
53
- st.header("Chat With PDF")
54
-
55
- user_text = st.text_input("Ask question:")
56
- if user_text and st.session_state.conversation:
57
- handleInput(user_text, st.session_state.conversation)
58
-
59
- with st.sidebar:
60
- st.subheader("Your Documents")
61
- pdf = st.file_uploader("Upload PDF")
62
- if pdf and st.button("Submit"):
63
- with st.spinner("Processing..."):
64
- # Read data from pdf
65
- raw_text = read_pdf(pdf)
66
- # Split data into chunks
67
- load_chunks = get_chunk_data(raw_text)
68
- # Create a vector store
69
- vector_store = get_vector_store(load_chunks)
70
-
71
- # Create conversation chain
72
- conversation_chain = get_conversation(vector_store)
73
- st.session_state.conversation = conversation_chain # Save the conversation chain to session state
74
-
75
- if __name__ == '__main__':
76
- main()
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from PyPDF2 import PdfReader
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain_community.embeddings import OpenAIEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.chains import ConversationalRetrievalChain
10
+ from langchain.chat_models import ChatOpenAI
11
+
12
+ def read_pdf(pdf):
13
+ text = ""
14
+ try:
15
+ pdf_reader = PdfReader(pdf, strict = False)
16
+ for page in pdf_reader.pages:
17
+ text += page.extract_text()
18
+ except Exception as e:
19
+ st.write("Error Reading PDF : {e}")
20
+ return text
21
+
22
+ def get_chunk_data(text):
23
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=250, length_function=len)
24
+ chunks = text_splitter.split_text(text)
25
+ return chunks
26
+
27
+ def get_vector_store(text):
28
+ api_key = os.getenv("OPENAI_API_KEY")
29
+ embeddings = OpenAIEmbeddings(openai_api_key=api_key)
30
+ vectorstore = FAISS.from_texts(texts=text, embedding=embeddings)
31
+ return vectorstore
32
+
33
+ def get_conversation(vectorstore):
34
+ api_key = os.getenv("OPENAI_API_KEY")
35
+ llm = ChatOpenAI(openai_api_key=api_key)
36
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
37
+ conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
38
+ return conversation_chain
39
+
40
+ def handleInput(user_text, conversation_chain):
41
+ res = conversation_chain({'question': user_text})
42
+ chat_history = res['chat_history']
43
+ ans = res['answer']
44
+ st.write(ans)
45
+
46
+ def main():
47
+ load_dotenv()
48
+
49
+ st.set_page_config(page_title="Chat with PDF")
50
+
51
+ if "conversation" not in st.session_state:
52
+ st.session_state.conversation = None
53
+ if "chat_history" not in st.session_state:
54
+ st.session_state.chat_history = None
55
+
56
+ st.header("Chat With PDF")
57
+
58
+ user_text = st.text_input("Ask question:")
59
+ if user_text and st.session_state.conversation:
60
+ handleInput(user_text, st.session_state.conversation)
61
+
62
+ with st.sidebar:
63
+ st.subheader("Your Documents")
64
+ pdf = st.file_uploader("Upload PDF")
65
+ if pdf and st.button("Submit"):
66
+ with st.spinner("Processing..."):
67
+ # Read data from pdf
68
+ raw_text = read_pdf(pdf)
69
+ # Split data into chunks
70
+ load_chunks = get_chunk_data(raw_text)
71
+ # Create a vector store
72
+ vector_store = get_vector_store(load_chunks)
73
+
74
+ # Create conversation chain
75
+ conversation_chain = get_conversation(vector_store)
76
+ st.session_state.conversation = conversation_chain # Save the conversation chain to session state
77
+
78
+ if __name__ == '__main__':
79
+ main()