Chris4K commited on
Commit
54a0f5c
1 Parent(s): af523e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -49,6 +49,8 @@ def download_nltk_resources():
49
 
50
  download_nltk_resources()
51
 
 
 
52
  FILES_DIR = './files'
53
 
54
  # Model Management
@@ -159,9 +161,9 @@ def phonetic_match(text, query, method='levenshtein_distance', apply_phonetic=Tr
159
 
160
  def optimize_query(query, llm_model):
161
  llm = HuggingFacePipeline.from_model_id(
162
- model_id=llm_model,
163
  task="text2text-generation",
164
- model_kwargs={"temperature": 0, "max_length": 64},
165
  )
166
  multi_query_retriever = MultiQueryRetriever.from_llm(
167
  retriever=get_retriever(vector_store, search_type, search_kwargs),
@@ -391,6 +393,8 @@ def visualize_results(results_df, stats_df):
391
 
392
  sns.barplot(x='model', y='search_time', data=stats_df, ax=axs[0, 0])
393
  axs[0, 0].set_title('Search Time by Model')
 
 
394
  axs[0, 0].set_xticklabels(axs[0, 0].get_xticklabels(), rotation=45, ha='right')
395
 
396
  sns.scatterplot(x='result_diversity', y='rank_correlation', hue='model', data=stats_df, ax=axs[0, 1])
@@ -398,6 +402,7 @@ def visualize_results(results_df, stats_df):
398
 
399
  sns.boxplot(x='model', y='avg_content_length', data=stats_df, ax=axs[1, 0])
400
  axs[1, 0].set_title('Distribution of Result Content Lengths')
 
401
  axs[1, 0].set_xticklabels(axs[1, 0].get_xticklabels(), rotation=45, ha='right')
402
 
403
  embeddings = np.array([embedding for embedding in results_df['embedding'] if isinstance(embedding, np.ndarray)])
@@ -514,6 +519,8 @@ def compare_embeddings(file, query, embedding_models, custom_embedding_model, sp
514
 
515
  stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, top_k, expected_result)
516
  stats["model"] = f"{model_type} - {model_name}"
 
 
517
  stats.update(settings)
518
 
519
  formatted_results = format_results(results_raw, stats)
@@ -605,6 +612,8 @@ def automated_testing(file, query, test_params, expected_result=None):
605
 
606
  stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, params['top_k'], expected_result)
607
  stats["model"] = f"{params['model_type']} - {params['model_name']}"
 
 
608
  stats.update(params)
609
 
610
  all_results.extend(format_results(results_raw, stats))
@@ -705,7 +714,7 @@ Provide your suggestions in a Python dictionary format."""
705
  llm = HuggingFacePipeline.from_model_id(
706
  model_id="google/flan-t5-large",
707
  task="text2text-generation",
708
- model_kwargs={"temperature": 0.7, "max_length": 512}, # Changed max_length to max_new_tokens
709
  )
710
 
711
  # Generate suggestions
 
49
 
50
  download_nltk_resources()
51
 
52
+ nltk.download('punkt')
53
+
54
  FILES_DIR = './files'
55
 
56
  # Model Management
 
161
 
162
  def optimize_query(query, llm_model):
163
  llm = HuggingFacePipeline.from_model_id(
164
+ model_id="google/flan-t5-large",
165
  task="text2text-generation",
166
+ model_kwargs={"do_sample": True, "temperature": 0.7, "max_new_tokens": 512},
167
  )
168
  multi_query_retriever = MultiQueryRetriever.from_llm(
169
  retriever=get_retriever(vector_store, search_type, search_kwargs),
 
393
 
394
  sns.barplot(x='model', y='search_time', data=stats_df, ax=axs[0, 0])
395
  axs[0, 0].set_title('Search Time by Model')
396
+ axs[0, 0].set_xticks(range(len(axs[0, 0].get_xticklabels())))
397
+
398
  axs[0, 0].set_xticklabels(axs[0, 0].get_xticklabels(), rotation=45, ha='right')
399
 
400
  sns.scatterplot(x='result_diversity', y='rank_correlation', hue='model', data=stats_df, ax=axs[0, 1])
 
402
 
403
  sns.boxplot(x='model', y='avg_content_length', data=stats_df, ax=axs[1, 0])
404
  axs[1, 0].set_title('Distribution of Result Content Lengths')
405
+ axs[1, 0].set_xticks(range(len(axs[0, 0].get_xticklabels())))
406
  axs[1, 0].set_xticklabels(axs[1, 0].get_xticklabels(), rotation=45, ha='right')
407
 
408
  embeddings = np.array([embedding for embedding in results_df['embedding'] if isinstance(embedding, np.ndarray)])
 
519
 
520
  stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, top_k, expected_result)
521
  stats["model"] = f"{model_type} - {model_name}"
522
+ stats["model_type"] = model_type
523
+ stats["model_name"] = model_name
524
  stats.update(settings)
525
 
526
  formatted_results = format_results(results_raw, stats)
 
612
 
613
  stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, params['top_k'], expected_result)
614
  stats["model"] = f"{params['model_type']} - {params['model_name']}"
615
+ stats["model_type"] = model_type
616
+ stats["model_name"] = model_name
617
  stats.update(params)
618
 
619
  all_results.extend(format_results(results_raw, stats))
 
714
  llm = HuggingFacePipeline.from_model_id(
715
  model_id="google/flan-t5-large",
716
  task="text2text-generation",
717
+ model_kwargs={"do_sample": True, "temperature": 0.7, "max_new_tokens": 512},
718
  )
719
 
720
  # Generate suggestions