Update app.py
Browse files
app.py
CHANGED
@@ -605,8 +605,15 @@ def analyze_results(stats_df):
|
|
605 |
'silhouette_score': 0.2
|
606 |
}
|
607 |
|
|
|
|
|
|
|
|
|
608 |
# Calculate weighted score for each configuration
|
609 |
-
stats_df['weighted_score'] = sum(
|
|
|
|
|
|
|
610 |
|
611 |
# Get the best configuration
|
612 |
best_config = stats_df.loc[stats_df['weighted_score'].idxmax()]
|
@@ -616,25 +623,25 @@ def analyze_results(stats_df):
|
|
616 |
'best_model': f"{best_config['model_type']} - {best_config['model_name']}",
|
617 |
'best_settings': {
|
618 |
'split_strategy': best_config['split_strategy'],
|
619 |
-
'chunk_size': best_config['chunk_size'],
|
620 |
-
'overlap_size': best_config['overlap_size'],
|
621 |
'vector_store_type': best_config['vector_store_type'],
|
622 |
'search_type': best_config['search_type'],
|
623 |
-
'top_k': best_config['top_k'],
|
624 |
-
'optimize_vocab': best_config['optimize_vocab'],
|
625 |
-
'use_query_optimization': best_config['use_query_optimization'],
|
626 |
-
'use_reranking': best_config['use_reranking']
|
627 |
},
|
628 |
'performance_summary': {
|
629 |
-
'search_time': best_config['search_time'],
|
630 |
-
'result_diversity': best_config['result_diversity'],
|
631 |
-
'rank_correlation': best_config['rank_correlation'],
|
632 |
-
'silhouette_score': best_config['silhouette_score']
|
633 |
}
|
634 |
}
|
635 |
|
636 |
return recommendations
|
637 |
-
####
|
638 |
|
639 |
# Gradio Interface
|
640 |
def launch_interface(share=True):
|
@@ -696,7 +703,6 @@ def launch_interface(share=True):
|
|
696 |
],
|
697 |
outputs=[results_output, stats_output, plot_output]
|
698 |
)
|
699 |
-
|
700 |
####
|
701 |
with gr.Tab("Automated"):
|
702 |
auto_file_input = gr.File(label="Upload File (Optional)")
|
|
|
605 |
'silhouette_score': 0.2
|
606 |
}
|
607 |
|
608 |
+
# Convert relevant columns to numeric type
|
609 |
+
for metric in metric_weights.keys():
|
610 |
+
stats_df[metric] = pd.to_numeric(stats_df[metric], errors='coerce')
|
611 |
+
|
612 |
# Calculate weighted score for each configuration
|
613 |
+
stats_df['weighted_score'] = sum(
|
614 |
+
stats_df[metric].fillna(0) * weight
|
615 |
+
for metric, weight in metric_weights.items()
|
616 |
+
)
|
617 |
|
618 |
# Get the best configuration
|
619 |
best_config = stats_df.loc[stats_df['weighted_score'].idxmax()]
|
|
|
623 |
'best_model': f"{best_config['model_type']} - {best_config['model_name']}",
|
624 |
'best_settings': {
|
625 |
'split_strategy': best_config['split_strategy'],
|
626 |
+
'chunk_size': int(best_config['chunk_size']),
|
627 |
+
'overlap_size': int(best_config['overlap_size']),
|
628 |
'vector_store_type': best_config['vector_store_type'],
|
629 |
'search_type': best_config['search_type'],
|
630 |
+
'top_k': int(best_config['top_k']),
|
631 |
+
'optimize_vocab': bool(best_config['optimize_vocab']),
|
632 |
+
'use_query_optimization': bool(best_config['use_query_optimization']),
|
633 |
+
'use_reranking': bool(best_config['use_reranking'])
|
634 |
},
|
635 |
'performance_summary': {
|
636 |
+
'search_time': float(best_config['search_time']),
|
637 |
+
'result_diversity': float(best_config['result_diversity']),
|
638 |
+
'rank_correlation': float(best_config['rank_correlation']),
|
639 |
+
'silhouette_score': float(best_config['silhouette_score'])
|
640 |
}
|
641 |
}
|
642 |
|
643 |
return recommendations
|
644 |
+
####
|
645 |
|
646 |
# Gradio Interface
|
647 |
def launch_interface(share=True):
|
|
|
703 |
],
|
704 |
outputs=[results_output, stats_output, plot_output]
|
705 |
)
|
|
|
706 |
####
|
707 |
with gr.Tab("Automated"):
|
708 |
auto_file_input = gr.File(label="Upload File (Optional)")
|