mikepastor11 commited on
Commit
46158ec
1 Parent(s): 5b7b180

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -17
app.py CHANGED
@@ -4,7 +4,7 @@
4
  # HuggingFace Spaces application to anlayze uploaded PDF files
5
  # with open-source models ( hkunlp/instructor-xl )
6
  #
7
- # Mike Pastor February 16, 2024
8
 
9
 
10
  import streamlit as st
@@ -25,16 +25,14 @@ from langchain_community.embeddings import HuggingFaceInstructEmbeddings
25
 
26
  # from langchain.vectorstores import FAISS
27
  from langchain_community.vectorstores import FAISS
28
-
29
  from langchain.text_splitter import CharacterTextSplitter
30
-
31
  from langchain.memory import ConversationBufferMemory
32
  from langchain.chains import ConversationalRetrievalChain
33
 
34
-
35
  # from langchain.llms import HuggingFaceHub
36
  from langchain_community.llms import HuggingFaceHub
37
 
 
38
  def extract_pdf_text(pdf_docs):
39
  text = ""
40
  for pdf in pdf_docs:
@@ -43,6 +41,7 @@ def extract_pdf_text(pdf_docs):
43
  text += page.extract_text()
44
  return text
45
 
 
46
  # Chunk size and overlap must not exceed the models capacity!
47
  #
48
  def extract_bitesize_pieces(text):
@@ -55,7 +54,7 @@ def extract_bitesize_pieces(text):
55
  chunks = text_splitter.split_text(text)
56
  return chunks
57
 
58
-
59
  def prepare_embedding_vectors(text_chunks):
60
 
61
  st.write('Here in vector store....', unsafe_allow_html=True)
@@ -82,7 +81,8 @@ def prepare_embedding_vectors(text_chunks):
82
  st.write('FAISS succeeds: ')
83
 
84
  return vectorstore
85
-
 
86
  def prepare_conversation(vectorstore):
87
  # llm = ChatOpenAI()
88
  # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
@@ -98,6 +98,7 @@ def prepare_conversation(vectorstore):
98
  )
99
  return conversation_chain
100
 
 
101
  def process_user_question(user_question):
102
 
103
  print('process_user_question called: \n')
@@ -169,19 +170,22 @@ def main():
169
  # st.set_page_config(page_title="Pennwick PDF Analyzer", page_icon=im )
170
  # st.set_page_config(page_title="Pennwick PDF Analyzer")
171
 
172
- import base64
173
- from PIL import Image
174
 
175
- # Open your image
176
- image = Image.open("robot_icon.ico")
177
 
178
- # Convert image to base64 string
179
- with open("robot_icon.ico", "rb") as f:
180
- encoded_string = base64.b64encode(f.read()).decode()
181
 
182
- # Set page config with base64 string
183
- st.set_page_config(page_title="Pennwick File Analyzer 2", page_icon=f"data:image/ico;base64,{encoded_string}")
 
184
 
 
 
185
  print( 'prepared page...\n')
186
 
187
 
@@ -194,8 +198,11 @@ def main():
194
  if "chat_history" not in st.session_state:
195
  st.session_state.chat_history = None
196
 
197
- # st.header("Pennwick File Analyzer :books:")
198
- st.header("Pennwick File Analyzer 2")
 
 
 
199
 
200
  user_question = None
201
  user_question = st.text_input("Ask the Open Source - Flan-t5 Model a question about your uploaded documents:")
 
4
  # HuggingFace Spaces application to anlayze uploaded PDF files
5
  # with open-source models ( hkunlp/instructor-xl )
6
  #
7
+ # Mike Pastor February 17, 2024
8
 
9
 
10
  import streamlit as st
 
25
 
26
  # from langchain.vectorstores import FAISS
27
  from langchain_community.vectorstores import FAISS
 
28
  from langchain.text_splitter import CharacterTextSplitter
 
29
  from langchain.memory import ConversationBufferMemory
30
  from langchain.chains import ConversationalRetrievalChain
31
 
 
32
  # from langchain.llms import HuggingFaceHub
33
  from langchain_community.llms import HuggingFaceHub
34
 
35
+ ##################################################################################
36
  def extract_pdf_text(pdf_docs):
37
  text = ""
38
  for pdf in pdf_docs:
 
41
  text += page.extract_text()
42
  return text
43
 
44
+ ##################################################################################
45
  # Chunk size and overlap must not exceed the models capacity!
46
  #
47
  def extract_bitesize_pieces(text):
 
54
  chunks = text_splitter.split_text(text)
55
  return chunks
56
 
57
+ ##################################################################################
58
  def prepare_embedding_vectors(text_chunks):
59
 
60
  st.write('Here in vector store....', unsafe_allow_html=True)
 
81
  st.write('FAISS succeeds: ')
82
 
83
  return vectorstore
84
+
85
+ ##################################################################################
86
  def prepare_conversation(vectorstore):
87
  # llm = ChatOpenAI()
88
  # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
 
98
  )
99
  return conversation_chain
100
 
101
+ ##################################################################################
102
  def process_user_question(user_question):
103
 
104
  print('process_user_question called: \n')
 
170
  # st.set_page_config(page_title="Pennwick PDF Analyzer", page_icon=im )
171
  # st.set_page_config(page_title="Pennwick PDF Analyzer")
172
 
173
+ # import base64
174
+ # from PIL import Image
175
 
176
+ # # Open your image
177
+ # image = Image.open("robot_icon.ico")
178
 
179
+ # # Convert image to base64 string
180
+ # with open("robot_icon.ico", "rb") as f:
181
+ # encoded_string = base64.b64encode(f.read()).decode()
182
 
183
+ # # Set page config with base64 string
184
+ # st.set_page_config(page_title="Pennwick File Analyzer 2", page_icon=f"data:image/ico;base64,{encoded_string}")
185
+
186
 
187
+ st.set_page_config(page_title="Pennwick File Analyzer", page_icon="./robot_icon.ico")
188
+
189
  print( 'prepared page...\n')
190
 
191
 
 
198
  if "chat_history" not in st.session_state:
199
  st.session_state.chat_history = None
200
 
201
+ # st.header("Pennwick File Analyzer :shark:")
202
+ # st.header("Pennwick File Analyzer 2")
203
+
204
+ st.image("robot_icon.png", width=96 )
205
+ st.header(f"Pennwick File Analyzer")
206
 
207
  user_question = None
208
  user_question = st.text_input("Ask the Open Source - Flan-t5 Model a question about your uploaded documents:")