JPBianchi commited on
Commit
677bca6
1 Parent(s): bbf4302

Showing banner when issue with Weaviate

Browse files
Files changed (2) hide show
  1. app.py +81 -62
  2. backend.py +0 -1
app.py CHANGED
@@ -9,7 +9,7 @@ from app_features import (convert_seconds, generate_prompt_series, search_result
9
  expand_content)
10
  from retrieval_evaluation import execute_evaluation, calc_hit_rate_scores
11
  from llama_index.finetuning import EmbeddingQAFinetuneDataset
12
- from weaviate_interface import WeaviateClient
13
  from openai import BadRequestError
14
  from reranker import ReRanker
15
  from loguru import logger
@@ -147,7 +147,7 @@ def download_model(model_name_or_path, model_local_path):
147
 
148
  #%%
149
  # for streamlit online, we must download the model from google drive
150
-
151
  def check_model(model_name_or_path):
152
 
153
  model_name = model_name_or_path.split('/')[-1] # remove 'sentence-transformers'
@@ -163,18 +163,27 @@ def check_model(model_name_or_path):
163
 
164
  #%% instantiate Weaviate client
165
  def get_weaviate_client(api_key, url, model_name_or_path, openai_api_key):
166
- client = WeaviateClient(api_key, url,
167
- model_name_or_path=model_name_or_path,
168
- openai_api_key=openai_api_key)
169
- client.display_properties.append('summary')
170
- # available_classes = sorted(client.show_classes()) # doesn't work anymore
171
- # print(available_classes)
172
- available_classes = sorted([c['class'] for c in client.schema.get()['classes']])
173
- # print(available_classes)
174
- # st.write(f"Available classes: {available_classes}")
175
- # st.write(f"Available classes type: {type(available_classes)}")
176
- logger.info(available_classes)
177
- return client, available_classes
 
 
 
 
 
 
 
 
 
178
 
179
 
180
  ##############
@@ -260,56 +269,60 @@ def main():
260
  st.write("Finetuning not available on Streamlit online because of space limitations")
261
 
262
  check_model(model_name_or_path)
263
- try:
264
- client, available_classes = get_weaviate_client(Wapi_key, url, model_name_or_path, openai_api_key)
265
- print(available_classes)
266
-
267
- except Exception as e:
268
- # Weaviate doesn't know this model, maybe we're just finetuning a model
269
- st.sidebar.write(f"Model unknown to Weaviate")
270
- st.stop()
271
-
272
- start_class = 'Impact_theory_all_mpnet_base_v2_finetuned'
273
-
274
- class_name = st.selectbox(
275
- label='Class Name:',
276
- options=available_classes,
277
- index=available_classes.index(start_class),
278
- placeholder='Select Class Name'
279
- )
280
 
281
- st.write("----------")
282
-
283
- if we_are_not_online:
284
- c1,c2 = st.columns([8,1])
285
- with c1:
286
- show_metrics = st.button('Show Metrics on Golden set')
287
- if show_metrics:
288
- # we must add it because the hybrid search toggle could hide it
289
- alpha_input2 = st.slider(label='Alpha',min_value=0.00, max_value=1.00, value=0.40, step=0.05, key=2)
290
-
291
- # _, center, _ = st.columns([3, 5, 3])
292
- # with center:
293
- # st.text("Metrics")
294
- with c2:
295
- with st.spinner(''):
296
- metrics = execute_evaluation(golden_dataset, class_name, client, alpha=alpha_input2)
297
- with c1:
298
- kw_hit_rate = metrics['kw_hit_rate']
299
- kw_mrr = metrics['kw_mrr']
300
- hybrid_hit_rate = metrics['hybrid_hit_rate']
301
- vector_hit_rate = metrics['vector_hit_rate']
302
- vector_mrr = metrics['vector_mrr']
303
- total_misses = metrics['total_misses']
304
-
305
- st.text(f"KW hit rate: {kw_hit_rate}")
306
- st.text(f"Vector hit rate: {vector_hit_rate}")
307
- st.text(f"Hybrid hit rate: {hybrid_hit_rate}")
308
- st.text(f"Hybrid MRR: {vector_mrr}")
309
- st.text(f"Total misses: {total_misses}")
310
 
311
- st.write("----------")
 
312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  st.title("Chat with the Impact Theory podcasts!")
314
  # st.image('./assets/impact-theory-logo.png', width=400)
315
  st.image('assets/it_tom_bilyeu.png', use_column_width=True)
@@ -317,9 +330,15 @@ def main():
317
  st.write('\n')
318
  # st.stop()
319
 
320
-
321
  st.write("\u21D0 Open the sidebar to change Search settings \n ") # https://home.unicode.org also 21E0, 21B0 B2 D0
322
 
 
 
 
 
 
 
 
323
  if not hybrid_search:
324
  st.stop()
325
 
 
9
  expand_content)
10
  from retrieval_evaluation import execute_evaluation, calc_hit_rate_scores
11
  from llama_index.finetuning import EmbeddingQAFinetuneDataset
12
+
13
  from openai import BadRequestError
14
  from reranker import ReRanker
15
  from loguru import logger
 
147
 
148
  #%%
149
  # for streamlit online, we must download the model from google drive
150
+ # because github LFS doesn't work on forked repos
151
  def check_model(model_name_or_path):
152
 
153
  model_name = model_name_or_path.split('/')[-1] # remove 'sentence-transformers'
 
163
 
164
  #%% instantiate Weaviate client
165
  def get_weaviate_client(api_key, url, model_name_or_path, openai_api_key):
166
+ try:
167
+ client = WeaviateClient(api_key, url,
168
+ model_name_or_path=model_name_or_path,
169
+ openai_api_key=openai_api_key)
170
+ except Exception:
171
+ # client not available, wrong key, expired free sandbox etc
172
+ return None, None
173
+
174
+ try:
175
+ client.display_properties.append('summary')
176
+ # available_classes = sorted(client.show_classes()) # doesn't work anymore
177
+ # print(available_classes)
178
+ available_classes = sorted([c['class'] for c in client.schema.get()['classes']])
179
+ # print(available_classes)
180
+ # st.write(f"Available classes: {available_classes}")
181
+ # st.write(f"Available classes type: {type(available_classes)}")
182
+ logger.info(available_classes)
183
+ return client, available_classes
184
+
185
+ except Exception:
186
+ return client, []
187
 
188
 
189
  ##############
 
269
  st.write("Finetuning not available on Streamlit online because of space limitations")
270
 
271
  check_model(model_name_or_path)
272
+ client, available_classes = get_weaviate_client(Wapi_key, url, model_name_or_path, openai_api_key)
273
+ print("Available classes:", available_classes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
+ if client is None:
276
+ # maybe the free sandbox has expired, or the api key is wrong
277
+ st.sidebar.write(f"Weaviate sandbox not accessible or expired")
278
+ # st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
+ elif available_classes:
281
+ start_class = 'Impact_theory_all_mpnet_base_v2_finetuned'
282
 
283
+ class_name = st.selectbox(
284
+ label='Class Name:',
285
+ options=available_classes,
286
+ index=available_classes.index(start_class),
287
+ placeholder='Select Class Name'
288
+ )
289
+
290
+ st.write("----------")
291
+
292
+ if we_are_not_online:
293
+ c1,c2 = st.columns([8,1])
294
+ with c1:
295
+ show_metrics = st.button('Show Metrics on Golden set')
296
+ if show_metrics:
297
+ # we must add it because the hybrid search toggle could hide it
298
+ alpha_input2 = st.slider(label='Alpha',min_value=0.00, max_value=1.00, value=0.40, step=0.05, key=2)
299
+
300
+ # _, center, _ = st.columns([3, 5, 3])
301
+ # with center:
302
+ # st.text("Metrics")
303
+ with c2:
304
+ with st.spinner(''):
305
+ metrics = execute_evaluation(golden_dataset, class_name, client, alpha=alpha_input2)
306
+ with c1:
307
+ kw_hit_rate = metrics['kw_hit_rate']
308
+ kw_mrr = metrics['kw_mrr']
309
+ hybrid_hit_rate = metrics['hybrid_hit_rate']
310
+ vector_hit_rate = metrics['vector_hit_rate']
311
+ vector_mrr = metrics['vector_mrr']
312
+ total_misses = metrics['total_misses']
313
+
314
+ st.text(f"KW hit rate: {kw_hit_rate}")
315
+ st.text(f"Vector hit rate: {vector_hit_rate}")
316
+ st.text(f"Hybrid hit rate: {hybrid_hit_rate}")
317
+ st.text(f"Hybrid MRR: {vector_mrr}")
318
+ st.text(f"Total misses: {total_misses}")
319
+
320
+ st.write("----------")
321
+ else:
322
+ # Weaviate doesn't know this model, maybe we're just finetuning a model
323
+ st.sidebar.write(f"Model Unknown to Weaviate")
324
+
325
+
326
  st.title("Chat with the Impact Theory podcasts!")
327
  # st.image('./assets/impact-theory-logo.png', width=400)
328
  st.image('assets/it_tom_bilyeu.png', use_column_width=True)
 
330
  st.write('\n')
331
  # st.stop()
332
 
 
333
  st.write("\u21D0 Open the sidebar to change Search settings \n ") # https://home.unicode.org also 21E0, 21B0 B2 D0
334
 
335
+ if client is None:
336
+ st.write("Weaviate sandbox not accessible or expired!!! Stopping execution!")
337
+ st.stop()
338
+ elif not available_classes:
339
+ # we have to stop here, to exit the 'with st.sidebar' block and display the banner at least
340
+ st.stop()
341
+
342
  if not hybrid_search:
343
  st.stop()
344
 
backend.py CHANGED
@@ -9,7 +9,6 @@ from typing import List, Dict, Tuple, Union, Callable
9
  # remote_path="./data",
10
  # )
11
 
12
-
13
  stub = modal.Stub("vector-search-project")
14
  vector_search = modal.Image.debian_slim().pip_install(
15
  "sentence_transformers==2.2.2", "llama_index==0.9.6.post1", "angle_emb==0.1.5"
 
9
  # remote_path="./data",
10
  # )
11
 
 
12
  stub = modal.Stub("vector-search-project")
13
  vector_search = modal.Image.debian_slim().pip_install(
14
  "sentence_transformers==2.2.2", "llama_index==0.9.6.post1", "angle_emb==0.1.5"