mikepastor11 commited on
Commit
56250cf
1 Parent(s): efc9f3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -18
app.py CHANGED
@@ -33,7 +33,7 @@ from langchain.chains import ConversationalRetrievalChain
33
  # from langchain.llms import HuggingFaceHub
34
  from langchain_community.llms import HuggingFaceHub
35
 
36
- def get_pdf_text(pdf_docs):
37
  text = ""
38
  for pdf in pdf_docs:
39
  pdf_reader = PdfReader(pdf)
@@ -43,7 +43,7 @@ def get_pdf_text(pdf_docs):
43
 
44
  # Chunk size and overlap must not exceed the models capacity!
45
  #
46
- def get_text_chunks(text):
47
  text_splitter = CharacterTextSplitter(
48
  separator="\n",
49
  chunk_size=800, # 1000
@@ -54,7 +54,7 @@ def get_text_chunks(text):
54
  return chunks
55
 
56
 
57
- def get_vectorstore(text_chunks):
58
 
59
  st.write('Here in vector store....', unsafe_allow_html=True)
60
  # embeddings = OpenAIEmbeddings()
@@ -81,7 +81,7 @@ def get_vectorstore(text_chunks):
81
 
82
  return vectorstore
83
 
84
- def get_conversation_chain(vectorstore):
85
  # llm = ChatOpenAI()
86
  # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
87
  # google/bigbird-roberta-base facebook/bart-large
@@ -96,13 +96,12 @@ def get_conversation_chain(vectorstore):
96
  )
97
  return conversation_chain
98
 
99
- def handle_userinput(user_question):
100
 
101
  response = st.session_state.conversation({'question': user_question})
102
  # response = st.session_state.conversation({'summarization': user_question})
103
  st.session_state.chat_history = response['chat_history']
104
 
105
-
106
  # st.empty()
107
 
108
  for i, message in enumerate(st.session_state.chat_history):
@@ -114,17 +113,14 @@ def handle_userinput(user_question):
114
  st.write(bot_template.replace(
115
  "{{MSG}}", message.content), unsafe_allow_html=True)
116
 
117
-
118
-
119
-
120
  def main():
121
 
122
-
123
-
124
  # load_dotenv()
125
  # st.set_page_config(page_title="Pennwick PDF Analyzer", page_icon=":books:")
126
- im = Image.open("robot_icon.ico")
127
- st.set_page_config(page_title="Pennwick PDF Analyzer", page_icon=im )
 
128
 
129
  st.write(css, unsafe_allow_html=True)
130
 
@@ -138,7 +134,7 @@ def main():
138
 
139
  user_question = st.text_input("Ask the Model a question about your uploaded documents:")
140
  if user_question:
141
- handle_userinput(user_question)
142
 
143
  # st.write( user_template, unsafe_allow_html=True)
144
  # st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
@@ -164,18 +160,18 @@ def main():
164
  st.write("Vectorizing Files - Current Time =", global_current_time)
165
 
166
  # get pdf text
167
- raw_text = get_pdf_text(pdf_docs)
168
  # st.write(raw_text)
169
 
170
  # # get the text chunks
171
- text_chunks = get_text_chunks(raw_text)
172
  # st.write(text_chunks)
173
 
174
  # # create vector store
175
- vectorstore = get_vectorstore(text_chunks)
176
 
177
  # # create conversation chain
178
- st.session_state.conversation = get_conversation_chain(vectorstore)
179
 
180
  # Mission Complete!
181
  global_later = datetime.now()
 
33
  # from langchain.llms import HuggingFaceHub
34
  from langchain_community.llms import HuggingFaceHub
35
 
36
+ def extract_pdf_text(pdf_docs):
37
  text = ""
38
  for pdf in pdf_docs:
39
  pdf_reader = PdfReader(pdf)
 
43
 
44
  # Chunk size and overlap must not exceed the models capacity!
45
  #
46
+ def extract_bitesize_pieces(text):
47
  text_splitter = CharacterTextSplitter(
48
  separator="\n",
49
  chunk_size=800, # 1000
 
54
  return chunks
55
 
56
 
57
+ def prepare_embedding_vectors(text_chunks):
58
 
59
  st.write('Here in vector store....', unsafe_allow_html=True)
60
  # embeddings = OpenAIEmbeddings()
 
81
 
82
  return vectorstore
83
 
84
+ def prepare_conversation(vectorstore):
85
  # llm = ChatOpenAI()
86
  # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
87
  # google/bigbird-roberta-base facebook/bart-large
 
96
  )
97
  return conversation_chain
98
 
99
+ def process_user_question(user_question):
100
 
101
  response = st.session_state.conversation({'question': user_question})
102
  # response = st.session_state.conversation({'summarization': user_question})
103
  st.session_state.chat_history = response['chat_history']
104
 
 
105
  # st.empty()
106
 
107
  for i, message in enumerate(st.session_state.chat_history):
 
113
  st.write(bot_template.replace(
114
  "{{MSG}}", message.content), unsafe_allow_html=True)
115
 
116
+ ###################################################################################
 
 
117
  def main():
118
 
 
 
119
  # load_dotenv()
120
  # st.set_page_config(page_title="Pennwick PDF Analyzer", page_icon=":books:")
121
+ # im = Image.open("robot_icon.ico")
122
+ # st.set_page_config(page_title="Pennwick PDF Analyzer", page_icon=im )
123
+ st.set_page_config(page_title="Pennwick PDF Analyzer")
124
 
125
  st.write(css, unsafe_allow_html=True)
126
 
 
134
 
135
  user_question = st.text_input("Ask the Model a question about your uploaded documents:")
136
  if user_question:
137
+ process_user_question(user_question)
138
 
139
  # st.write( user_template, unsafe_allow_html=True)
140
  # st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
 
160
  st.write("Vectorizing Files - Current Time =", global_current_time)
161
 
162
  # get pdf text
163
+ raw_text = extract_pdf_text(pdf_docs)
164
  # st.write(raw_text)
165
 
166
  # # get the text chunks
167
+ text_chunks = extract_bitesize_pieces(raw_text)
168
  # st.write(text_chunks)
169
 
170
  # # create vector store
171
+ vectorstore = prepare_embedding_vectors(text_chunks)
172
 
173
  # # create conversation chain
174
+ st.session_state.conversation = prepare_conversation(vectorstore)
175
 
176
  # Mission Complete!
177
  global_later = datetime.now()