Update app.py
Browse files
app.py
CHANGED
@@ -74,6 +74,8 @@ FILES_DIR = './files'
|
|
74 |
# Model Management
|
75 |
class ModelManager:
|
76 |
def __init__(self):
|
|
|
|
|
77 |
self.models = {
|
78 |
'HuggingFace': {
|
79 |
'e5-base-de': "danielheinz/e5-base-sts-en-de",
|
@@ -90,6 +92,28 @@ class ModelManager:
|
|
90 |
}
|
91 |
}
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
def add_model(self, provider, name, model_path):
|
94 |
if provider not in self.models:
|
95 |
self.models[provider] = {}
|
@@ -197,7 +221,7 @@ class FileHandler:
|
|
197 |
def simple_tokenize(text):
|
198 |
return text.split()
|
199 |
|
200 |
-
def preprocess_text(text, lang='german', apply_preprocessing=
|
201 |
if not apply_preprocessing:
|
202 |
return text
|
203 |
|
@@ -225,7 +249,7 @@ def preprocess_text(text, lang='german', apply_preprocessing=True):
|
|
225 |
|
226 |
return ' '.join(tokens)
|
227 |
|
228 |
-
def phonetic_match(text, query, method='levenshtein_distance', apply_phonetic=
|
229 |
if not apply_phonetic:
|
230 |
return 0
|
231 |
if method == 'levenshtein_distance':
|
@@ -390,7 +414,7 @@ def _create_vector_store(vector_store_type, chunks_tuple, embedding_model):
|
|
390 |
|
391 |
|
392 |
# Main Processing Functions
|
393 |
-
def process_files(file_path, model_type, model_name, split_strategy, chunk_size, overlap_size, custom_separators, lang='german', apply_preprocessing=
|
394 |
if file_path:
|
395 |
text = FileHandler.extract_text(file_path)
|
396 |
else:
|
@@ -412,7 +436,7 @@ def process_files(file_path, model_type, model_name, split_strategy, chunk_size,
|
|
412 |
|
413 |
return chunks, embedding_model, len(text.split())
|
414 |
|
415 |
-
def search_embeddings(chunks, embedding_model, vector_store_type, search_type, query, top_k, expected_result=None, lang='german', apply_phonetic=
|
416 |
preprocessed_query = preprocess_text(query, lang) if apply_phonetic else query
|
417 |
|
418 |
vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
@@ -421,6 +445,7 @@ def search_embeddings(chunks, embedding_model, vector_store_type, search_type, q
|
|
421 |
start_time = time.time()
|
422 |
results = retriever.invoke(preprocessed_query)
|
423 |
|
|
|
424 |
def score_result(doc):
|
425 |
base_score = vector_store.similarity_search_with_score(doc.page_content, k=1)[0][1]
|
426 |
|
@@ -452,68 +477,83 @@ def search_embeddings(chunks, embedding_model, vector_store_type, search_type, q
|
|
452 |
|
453 |
return results_df, end_time - start_time, vector_store, results
|
454 |
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
def calculate_statistics(results, search_time, vector_store, num_tokens, embedding_model, query,
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
# Safely get vector store size
|
471 |
-
try:
|
472 |
-
if hasattr(vector_store, '_index'):
|
473 |
-
stats["vector_store_size"] = vector_store._index.ntotal
|
474 |
-
elif hasattr(vector_store, '_collection'):
|
475 |
-
stats["vector_store_size"] = len(vector_store._collection.get())
|
476 |
-
else:
|
477 |
-
stats["vector_store_size"] = "N/A"
|
478 |
-
except:
|
479 |
-
stats["vector_store_size"] = "N/A"
|
480 |
-
|
481 |
-
# Safely get document count
|
482 |
-
try:
|
483 |
-
if hasattr(vector_store, 'docstore'):
|
484 |
-
stats["num_documents"] = len(vector_store.docstore._dict)
|
485 |
-
elif hasattr(vector_store, '_collection'):
|
486 |
-
stats["num_documents"] = len(vector_store._collection.get())
|
487 |
-
else:
|
488 |
-
stats["num_documents"] = len(results)
|
489 |
-
except:
|
490 |
-
stats["num_documents"] = len(results)
|
491 |
-
|
492 |
-
|
493 |
-
if expected_result:
|
494 |
-
stats["contains_expected"] = any(expected_result in doc.page_content for doc in results)
|
495 |
-
stats["expected_result_rank"] = next((i for i, doc in enumerate(results) if expected_result in doc.page_content), -1) + 1
|
496 |
-
|
497 |
-
if len(results) > 1000:
|
498 |
-
embeddings = [embedding_model.embed_query(doc.page_content) for doc in results]
|
499 |
-
pairwise_similarities = np.inner(embeddings, embeddings)
|
500 |
-
stats["result_diversity"] = 1 - np.mean(pairwise_similarities[np.triu_indices(len(embeddings), k=1)])
|
501 |
|
502 |
-
|
503 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
else:
|
|
|
505 |
stats["silhouette_score"] = "N/A"
|
506 |
-
|
507 |
-
|
508 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
509 |
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
|
|
515 |
|
516 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
517 |
# Visualization
|
518 |
def visualize_results(results_df, stats_df):
|
519 |
# Add model column if not present
|
@@ -688,7 +728,7 @@ def compare_embeddings(file, query, embedding_models, custom_embedding_model, sp
|
|
688 |
|
689 |
result_embeddings = [doc.metadata.get('embedding', None) for doc in results_raw]
|
690 |
|
691 |
-
stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, top_k, expected_result)
|
692 |
stats["model"] = f"{model_type} - {model_name}"
|
693 |
stats["model_type"] = model_type
|
694 |
stats["model_name"] = model_name
|
@@ -783,7 +823,7 @@ def automated_testing(file, query, test_params, expected_result=None):
|
|
783 |
reranker = pipeline("text-classification", model="cross-encoder/ms-marco-MiniLM-L-12-v2")
|
784 |
results_raw = rerank_results(results_raw, query, reranker)
|
785 |
|
786 |
-
stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, params['top_k'], expected_result)
|
787 |
stats["model"] = f"{params['model_type']} - {params['model_name']}"
|
788 |
stats["model_type"] = params['model_type']
|
789 |
stats["model_name"] = params['model_name']
|
@@ -989,28 +1029,55 @@ def launch_interface(share=True):
|
|
989 |
search_type_input = gr.Radio(choices=["similarity", "mmr", "custom"], label="Search Type", value="similarity")
|
990 |
lang_input = gr.Dropdown(choices=["german", "english", "french"], label="Language", value="german")
|
991 |
|
992 |
-
with gr.Tab("
|
993 |
-
apply_preprocessing_input = gr.Checkbox(label="Apply Text Preprocessing", value=
|
994 |
optimize_vocab_input = gr.Checkbox(label="Optimize Vocabulary", value=False)
|
995 |
-
apply_phonetic_input = gr.Checkbox(label="Apply Phonetic Matching", value=
|
996 |
phonetic_weight_input = gr.Slider(0, 1, step=0.1, value=0.3, label="Phonetic Matching Weight")
|
997 |
custom_tokenizer_file_input = gr.File(label="Custom Tokenizer File (Optional)")
|
998 |
custom_tokenizer_model_input = gr.Textbox(label="Custom Tokenizer Model (e.g., WordLevel, BPE, Unigram)")
|
999 |
custom_tokenizer_vocab_size_input = gr.Textbox(label="Custom Tokenizer Vocab Size", value="10000")
|
1000 |
custom_tokenizer_special_tokens_input = gr.Textbox(label="Custom Tokenizer Special Tokens (comma-separated)")
|
1001 |
use_query_optimization_input = gr.Checkbox(label="Use Query Optimization", value=False)
|
1002 |
-
query_optimization_model_input = gr.Textbox(label="Query Optimization Model
|
1003 |
use_reranking_input = gr.Checkbox(label="Use Reranking", value=False)
|
1004 |
|
1005 |
with gr.Tab("Automation"):
|
1006 |
-
|
1007 |
-
|
1008 |
-
|
1009 |
-
|
1010 |
-
|
1011 |
-
|
1012 |
-
)
|
1013 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1014 |
auto_split_strategies = gr.CheckboxGroup(
|
1015 |
choices=["token", "recursive"],
|
1016 |
label="Split Strategies to Test"
|
@@ -1030,6 +1097,36 @@ def launch_interface(share=True):
|
|
1030 |
auto_use_query_optimization = gr.Checkbox(label="Test Query Optimization", value=True)
|
1031 |
auto_use_reranking = gr.Checkbox(label="Test Reranking", value=True)
|
1032 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1033 |
with gr.Tab("LLM Suggestions"):
|
1034 |
llm_file_input = gr.File(label="Upload File for LLM Suggestions")
|
1035 |
llm_num_chunks = gr.Slider(1, 10, step=1, value=5, label="Number of Sample Chunks")
|
@@ -1072,22 +1169,6 @@ def launch_interface(share=True):
|
|
1072 |
outputs=[results_output, stats_output, plot_output, best_settings_output]
|
1073 |
)
|
1074 |
|
1075 |
-
auto_results_output = gr.Dataframe(label="Automated Test Results", interactive=False)
|
1076 |
-
auto_stats_output = gr.Dataframe(label="Automated Test Statistics", interactive=False)
|
1077 |
-
recommendations_output = gr.JSON(label="Recommendations")
|
1078 |
-
|
1079 |
-
auto_submit_button = gr.Button("Run Automated Tests")
|
1080 |
-
auto_submit_button.click(
|
1081 |
-
fn=lambda *args: run_automated_tests_and_analyze(*args),
|
1082 |
-
inputs=[
|
1083 |
-
auto_file_input, auto_query_input, auto_expected_result_input, auto_model_types, auto_model_names,
|
1084 |
-
auto_split_strategies, auto_chunk_sizes, auto_overlap_sizes,
|
1085 |
-
auto_vector_store_types, auto_search_types, auto_top_k,
|
1086 |
-
auto_optimize_vocab, auto_use_query_optimization, auto_use_reranking
|
1087 |
-
],
|
1088 |
-
outputs=[auto_results_output, auto_stats_output, recommendations_output]
|
1089 |
-
)
|
1090 |
-
###
|
1091 |
|
1092 |
|
1093 |
use_case_md = """
|
@@ -1491,33 +1572,132 @@ if __name__ == "__main__":
|
|
1491 |
|
1492 |
iface.launch(share=share)
|
1493 |
|
1494 |
-
|
1495 |
-
|
1496 |
-
|
1497 |
-
|
1498 |
-
|
1499 |
-
|
1500 |
-
|
1501 |
-
|
1502 |
-
|
1503 |
-
|
1504 |
-
|
1505 |
-
|
1506 |
-
|
1507 |
-
|
1508 |
-
|
1509 |
-
|
1510 |
-
|
1511 |
-
'
|
1512 |
-
'
|
1513 |
-
|
1514 |
-
|
1515 |
-
|
1516 |
-
|
1517 |
-
|
1518 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1519 |
|
1520 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1521 |
|
1522 |
if __name__ == "__main__":
|
1523 |
launch_interface()
|
|
|
74 |
# Model Management
|
75 |
class ModelManager:
|
76 |
def __init__(self):
|
77 |
+
self.rankings: Dict[str, float] = {}
|
78 |
+
self.model_stats: Dict[str, Dict[str, Any]] = {}
|
79 |
self.models = {
|
80 |
'HuggingFace': {
|
81 |
'e5-base-de': "danielheinz/e5-base-sts-en-de",
|
|
|
92 |
}
|
93 |
}
|
94 |
|
95 |
+
|
96 |
+
def update_model_ranking(self, model_id: str, score: float, feedback: Optional[str] = None):
|
97 |
+
"""Update model ranking based on performance and optional feedback"""
|
98 |
+
current_score = self.rankings.get(model_id, 0.0)
|
99 |
+
# Weighted average of current score and new score
|
100 |
+
self.rankings[model_id] = 0.7 * current_score + 0.3 * score
|
101 |
+
|
102 |
+
if feedback:
|
103 |
+
if model_id not in self.model_stats:
|
104 |
+
self.model_stats[model_id] = {"feedback_count": 0, "feedback": []}
|
105 |
+
self.model_stats[model_id]["feedback_count"] += 1
|
106 |
+
self.model_stats[model_id]["feedback"].append(feedback)
|
107 |
+
|
108 |
+
def get_top_models(self, n: int = 5) -> List[Tuple[str, float]]:
|
109 |
+
"""Get top n ranked models"""
|
110 |
+
return sorted(self.rankings.items(), key=lambda x: x[1], reverse=True)[:n]
|
111 |
+
|
112 |
+
def get_model_stats(self, model_id: str) -> Dict[str, Any]:
|
113 |
+
"""Get statistics for a specific model"""
|
114 |
+
return self.model_stats.get(model_id, {})
|
115 |
+
|
116 |
+
|
117 |
def add_model(self, provider, name, model_path):
|
118 |
if provider not in self.models:
|
119 |
self.models[provider] = {}
|
|
|
221 |
def simple_tokenize(text):
|
222 |
return text.split()
|
223 |
|
224 |
+
def preprocess_text(text, lang='german', apply_preprocessing=False):
|
225 |
if not apply_preprocessing:
|
226 |
return text
|
227 |
|
|
|
249 |
|
250 |
return ' '.join(tokens)
|
251 |
|
252 |
+
def phonetic_match(text, query, method='levenshtein_distance', apply_phonetic=False):
|
253 |
if not apply_phonetic:
|
254 |
return 0
|
255 |
if method == 'levenshtein_distance':
|
|
|
414 |
|
415 |
|
416 |
# Main Processing Functions
|
417 |
+
def process_files(file_path, model_type, model_name, split_strategy, chunk_size, overlap_size, custom_separators, lang='german', apply_preprocessing=False, custom_tokenizer_file=None, custom_tokenizer_model=None, custom_tokenizer_vocab_size=10000, custom_tokenizer_special_tokens=None):
|
418 |
if file_path:
|
419 |
text = FileHandler.extract_text(file_path)
|
420 |
else:
|
|
|
436 |
|
437 |
return chunks, embedding_model, len(text.split())
|
438 |
|
439 |
+
def search_embeddings(chunks, embedding_model, vector_store_type, search_type, query, top_k, expected_result=None, lang='german', apply_phonetic=False, phonetic_weight=0.3):
|
440 |
preprocessed_query = preprocess_text(query, lang) if apply_phonetic else query
|
441 |
|
442 |
vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
|
|
445 |
start_time = time.time()
|
446 |
results = retriever.invoke(preprocessed_query)
|
447 |
|
448 |
+
#this should be optional
|
449 |
def score_result(doc):
|
450 |
base_score = vector_store.similarity_search_with_score(doc.page_content, k=1)[0][1]
|
451 |
|
|
|
477 |
|
478 |
return results_df, end_time - start_time, vector_store, results
|
479 |
|
480 |
+
# Enhanced Result Analysis
|
481 |
+
class ResultAnalyzer:
|
482 |
+
@staticmethod
|
483 |
+
def calculate_statistics(results, search_time, vector_store, num_tokens, embedding_model, query,
|
484 |
+
top_k, expected_result=None, model_feedback=None):
|
485 |
+
stats = {
|
486 |
+
"num_results": len(results),
|
487 |
+
"avg_content_length": np.mean([len(doc.page_content) for doc in results]) if results else 0,
|
488 |
+
"min_content_length": min([len(doc.page_content) for doc in results]) if results else 0,
|
489 |
+
"max_content_length": max([len(doc.page_content) for doc in results]) if results else 0,
|
490 |
+
"search_time": search_time,
|
491 |
+
"num_tokens": num_tokens,
|
492 |
+
"embedding_dimension": len(embedding_model.embed_query(query)),
|
493 |
+
"top_k": top_k,
|
494 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
|
496 |
+
# Add vector store statistics
|
497 |
+
try:
|
498 |
+
if hasattr(vector_store, '_index'):
|
499 |
+
stats["vector_store_size"] = vector_store._index.ntotal
|
500 |
+
elif hasattr(vector_store, '_collection'):
|
501 |
+
stats["vector_store_size"] = len(vector_store._collection.get())
|
502 |
+
except:
|
503 |
+
stats["vector_store_size"] = "N/A"
|
504 |
+
|
505 |
+
# Add expected result statistics if provided
|
506 |
+
if expected_result:
|
507 |
+
stats["contains_expected"] = any(expected_result in doc.page_content for doc in results)
|
508 |
+
stats["expected_result_rank"] = next((i for i, doc in enumerate(results)
|
509 |
+
if expected_result in doc.page_content), -1) + 1
|
510 |
+
|
511 |
+
# Calculate diversity metrics for larger result sets
|
512 |
+
if len(results) > 3: # Changed from 1000 to make it more practical
|
513 |
+
embeddings = [embedding_model.embed_query(doc.page_content) for doc in results]
|
514 |
+
stats["result_diversity"] = ResultAnalyzer._calculate_diversity(embeddings)
|
515 |
+
stats["silhouette_score"] = ResultAnalyzer._calculate_silhouette(embeddings)
|
516 |
else:
|
517 |
+
stats["result_diversity"] = "N/A"
|
518 |
stats["silhouette_score"] = "N/A"
|
519 |
+
|
520 |
+
# Add ranking correlation
|
521 |
+
query_embedding = embedding_model.embed_query(query)
|
522 |
+
result_embeddings = [embedding_model.embed_query(doc.page_content) for doc in results]
|
523 |
+
similarities = [np.inner(query_embedding, emb) for emb in result_embeddings]
|
524 |
+
if len(similarities) > 1:
|
525 |
+
rank_correlation, _ = spearmanr(similarities, range(len(similarities)))
|
526 |
+
stats["rank_correlation"] = rank_correlation
|
527 |
+
else:
|
528 |
+
stats["rank_correlation"] = "N/A"
|
529 |
+
|
530 |
+
# Add model feedback if provided
|
531 |
+
if model_feedback:
|
532 |
+
stats["model_feedback"] = model_feedback
|
533 |
+
|
534 |
+
return stats
|
535 |
|
536 |
+
@staticmethod
|
537 |
+
def _calculate_diversity(embeddings: List[np.ndarray]) -> float:
|
538 |
+
"""Calculate diversity score for embeddings"""
|
539 |
+
embeddings_array = np.array(embeddings)
|
540 |
+
pairwise_similarities = np.inner(embeddings_array, embeddings_array)
|
541 |
+
return 1 - np.mean(pairwise_similarities[np.triu_indices(len(embeddings), k=1)])
|
542 |
|
543 |
+
@staticmethod
|
544 |
+
def _calculate_silhouette(embeddings: List[np.ndarray]) -> float:
|
545 |
+
"""Calculate silhouette score for embeddings"""
|
546 |
+
if len(embeddings) < 3:
|
547 |
+
return 0.0
|
548 |
+
try:
|
549 |
+
return silhouette_score(embeddings, range(len(embeddings)))
|
550 |
+
except:
|
551 |
+
return 0.0
|
552 |
+
|
553 |
+
|
554 |
+
|
555 |
+
|
556 |
+
|
557 |
# Visualization
|
558 |
def visualize_results(results_df, stats_df):
|
559 |
# Add model column if not present
|
|
|
728 |
|
729 |
result_embeddings = [doc.metadata.get('embedding', None) for doc in results_raw]
|
730 |
|
731 |
+
stats = ResultAnalyzer.calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, top_k, expected_result)
|
732 |
stats["model"] = f"{model_type} - {model_name}"
|
733 |
stats["model_type"] = model_type
|
734 |
stats["model_name"] = model_name
|
|
|
823 |
reranker = pipeline("text-classification", model="cross-encoder/ms-marco-MiniLM-L-12-v2")
|
824 |
results_raw = rerank_results(results_raw, query, reranker)
|
825 |
|
826 |
+
stats = ResultAnalyzer.calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, params['top_k'], expected_result)
|
827 |
stats["model"] = f"{params['model_type']} - {params['model_name']}"
|
828 |
stats["model_type"] = params['model_type']
|
829 |
stats["model_name"] = params['model_name']
|
|
|
1029 |
search_type_input = gr.Radio(choices=["similarity", "mmr", "custom"], label="Search Type", value="similarity")
|
1030 |
lang_input = gr.Dropdown(choices=["german", "english", "french"], label="Language", value="german")
|
1031 |
|
1032 |
+
with gr.Tab("Expert"):
|
1033 |
+
apply_preprocessing_input = gr.Checkbox(label="Apply Text Preprocessing", value=False)
|
1034 |
optimize_vocab_input = gr.Checkbox(label="Optimize Vocabulary", value=False)
|
1035 |
+
apply_phonetic_input = gr.Checkbox(label="Apply Phonetic Matching", value=False)
|
1036 |
phonetic_weight_input = gr.Slider(0, 1, step=0.1, value=0.3, label="Phonetic Matching Weight")
|
1037 |
custom_tokenizer_file_input = gr.File(label="Custom Tokenizer File (Optional)")
|
1038 |
custom_tokenizer_model_input = gr.Textbox(label="Custom Tokenizer Model (e.g., WordLevel, BPE, Unigram)")
|
1039 |
custom_tokenizer_vocab_size_input = gr.Textbox(label="Custom Tokenizer Vocab Size", value="10000")
|
1040 |
custom_tokenizer_special_tokens_input = gr.Textbox(label="Custom Tokenizer Special Tokens (comma-separated)")
|
1041 |
use_query_optimization_input = gr.Checkbox(label="Use Query Optimization", value=False)
|
1042 |
+
query_optimization_model_input = gr.Textbox(label="Query Optimization Model (google/flan-t5-base) ", value="")
|
1043 |
use_reranking_input = gr.Checkbox(label="Use Reranking", value=False)
|
1044 |
|
1045 |
with gr.Tab("Automation"):
|
1046 |
+
|
1047 |
+
|
1048 |
+
with gr.Row():
|
1049 |
+
auto_file_input = gr.File(label="Upload File (Optional)")
|
1050 |
+
auto_query_input = gr.Textbox(label="Search Query")
|
1051 |
+
|
1052 |
+
with gr.Row():
|
1053 |
+
auto_expected_result_input = gr.Textbox(
|
1054 |
+
label="Expected Result (Optional)",
|
1055 |
+
placeholder="Enter expected text if you want to evaluate accuracy"
|
1056 |
+
)
|
1057 |
+
model_feedback_input = gr.Textbox(
|
1058 |
+
label="Model Feedback (Optional)",
|
1059 |
+
placeholder="Enter any feedback about model performance"
|
1060 |
+
)
|
1061 |
+
|
1062 |
+
with gr.Row():
|
1063 |
+
with gr.Column():
|
1064 |
+
# Default model selection
|
1065 |
+
default_models_input = gr.CheckboxGroup(
|
1066 |
+
choices=[f"{type}:{name}"
|
1067 |
+
for type, names in DEFAULT_MODELS.items()
|
1068 |
+
for name in names],
|
1069 |
+
label="Default Models",
|
1070 |
+
value=[f"HuggingFace:{DEFAULT_MODELS['HuggingFace'][0]}"]
|
1071 |
+
)
|
1072 |
+
|
1073 |
+
with gr.Column():
|
1074 |
+
# Custom model input
|
1075 |
+
custom_models_input = gr.TextArea(
|
1076 |
+
label="Custom Models (Optional)",
|
1077 |
+
placeholder="Enter one model per line in format: type:name",
|
1078 |
+
lines=3
|
1079 |
+
)
|
1080 |
+
|
1081 |
auto_split_strategies = gr.CheckboxGroup(
|
1082 |
choices=["token", "recursive"],
|
1083 |
label="Split Strategies to Test"
|
|
|
1097 |
auto_use_query_optimization = gr.Checkbox(label="Test Query Optimization", value=True)
|
1098 |
auto_use_reranking = gr.Checkbox(label="Test Reranking", value=True)
|
1099 |
|
1100 |
+
|
1101 |
+
auto_results_output = gr.Dataframe(label="Automated Test Results", interactive=False)
|
1102 |
+
auto_stats_output = gr.Dataframe(label="Automated Test Statistics", interactive=False)
|
1103 |
+
recommendations_output = gr.JSON(label="Recommendations")
|
1104 |
+
|
1105 |
+
auto_submit_button = gr.Button("Run Automated Tests")
|
1106 |
+
auto_submit_button.click(
|
1107 |
+
fn=lambda *args: run_automated_tests(*args),
|
1108 |
+
inputs=[
|
1109 |
+
auto_file_input, auto_query_input, auto_expected_result_input, auto_model_types, auto_model_names,
|
1110 |
+
auto_split_strategies, auto_chunk_sizes, auto_overlap_sizes,
|
1111 |
+
auto_vector_store_types, auto_search_types, auto_top_k,
|
1112 |
+
auto_optimize_vocab, auto_use_query_optimization, auto_use_reranking
|
1113 |
+
],
|
1114 |
+
outputs=[auto_results_output, auto_stats_output, recommendations_output]
|
1115 |
+
)
|
1116 |
+
###
|
1117 |
+
|
1118 |
+
with gr.Tab("Results"):
|
1119 |
+
with gr.Row():
|
1120 |
+
results_output = gr.DataFrame(label="Results")
|
1121 |
+
stats_output = gr.DataFrame(label="Statistics")
|
1122 |
+
|
1123 |
+
with gr.Row():
|
1124 |
+
plot_output = gr.Plot(label="Visualizations")
|
1125 |
+
model_rankings_output = gr.JSON(label="Model Rankings")
|
1126 |
+
|
1127 |
+
with gr.Row():
|
1128 |
+
recommendations_output = gr.JSON(label="Recommendations")
|
1129 |
+
|
1130 |
with gr.Tab("LLM Suggestions"):
|
1131 |
llm_file_input = gr.File(label="Upload File for LLM Suggestions")
|
1132 |
llm_num_chunks = gr.Slider(1, 10, step=1, value=5, label="Number of Sample Chunks")
|
|
|
1169 |
outputs=[results_output, stats_output, plot_output, best_settings_output]
|
1170 |
)
|
1171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1172 |
|
1173 |
|
1174 |
use_case_md = """
|
|
|
1572 |
|
1573 |
iface.launch(share=share)
|
1574 |
|
1575 |
+
# Enhanced Automated Testing
|
1576 |
+
def run_automated_tests(file_path: str, query: str, model_configs: List[Dict[str, str]],
|
1577 |
+
test_params: Dict[str, List[Any]], expected_result: Optional[str] = None,
|
1578 |
+
model_feedback: Optional[str] = None) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
1579 |
+
"""
|
1580 |
+
Enhanced automated testing function with support for custom models and feedback
|
1581 |
+
"""
|
1582 |
+
all_results = []
|
1583 |
+
all_stats = []
|
1584 |
+
model_manager = ModelManager()
|
1585 |
+
|
1586 |
+
# Create parameter grid excluding model configurations
|
1587 |
+
base_params = {k: v for k, v in test_params.items() if k not in ['model_type', 'model_name']}
|
1588 |
+
param_grid = ParameterGrid(base_params)
|
1589 |
+
|
1590 |
+
# Test each model configuration with all parameter combinations
|
1591 |
+
for model_config in tqdm(model_configs, desc="Testing models"):
|
1592 |
+
model_type = model_config['type']
|
1593 |
+
model_name = model_config['name']
|
1594 |
+
|
1595 |
+
for params in tqdm(param_grid, desc=f"Testing parameters for {model_type}:{model_name}"):
|
1596 |
+
try:
|
1597 |
+
# Process files and get chunks
|
1598 |
+
chunks, embedding_model, num_tokens = process_files(
|
1599 |
+
file_path,
|
1600 |
+
model_type,
|
1601 |
+
model_name,
|
1602 |
+
params['split_strategy'],
|
1603 |
+
params['chunk_size'],
|
1604 |
+
params['overlap_size'],
|
1605 |
+
params.get('custom_separators'),
|
1606 |
+
params['lang'],
|
1607 |
+
params['apply_preprocessing']
|
1608 |
+
)
|
1609 |
+
|
1610 |
+
# Apply vocabulary optimization if specified
|
1611 |
+
if params['optimize_vocab']:
|
1612 |
+
tokenizer, chunks = optimize_vocabulary(chunks)
|
1613 |
+
|
1614 |
+
# Apply query optimization if specified
|
1615 |
+
current_query = query
|
1616 |
+
if params['use_query_optimization']:
|
1617 |
+
optimized_queries = optimize_query(
|
1618 |
+
query,
|
1619 |
+
params['query_optimization_model'],
|
1620 |
+
chunks,
|
1621 |
+
embedding_model,
|
1622 |
+
params['vector_store_type'],
|
1623 |
+
params['search_type'],
|
1624 |
+
params['top_k']
|
1625 |
+
)
|
1626 |
+
current_query = " ".join(optimized_queries)
|
1627 |
+
|
1628 |
+
# Perform search
|
1629 |
+
results, search_time, vector_store, raw_results = search_embeddings(
|
1630 |
+
chunks,
|
1631 |
+
embedding_model,
|
1632 |
+
params['vector_store_type'],
|
1633 |
+
params['search_type'],
|
1634 |
+
current_query,
|
1635 |
+
params['top_k'],
|
1636 |
+
expected_result,
|
1637 |
+
params['lang'],
|
1638 |
+
params['apply_phonetic'],
|
1639 |
+
params['phonetic_weight']
|
1640 |
+
)
|
1641 |
+
|
1642 |
+
# Apply reranking if specified
|
1643 |
+
if params['use_reranking']:
|
1644 |
+
reranker = pipeline("text-classification",
|
1645 |
+
model="cross-encoder/ms-marco-MiniLM-L-12-v2")
|
1646 |
+
raw_results = rerank_results(raw_results, current_query, reranker)
|
1647 |
+
|
1648 |
+
# Calculate statistics
|
1649 |
+
stats = ResultAnalyzer.calculate_statistics(
|
1650 |
+
raw_results, search_time, vector_store, num_tokens,
|
1651 |
+
embedding_model, current_query, params['top_k'],
|
1652 |
+
expected_result, model_feedback
|
1653 |
+
)
|
1654 |
+
|
1655 |
+
# Update model rankings
|
1656 |
+
model_id = f"{model_type}:{model_name}"
|
1657 |
+
ranking_score = calculate_model_ranking_score(stats)
|
1658 |
+
model_manager.update_model_ranking(model_id, ranking_score, model_feedback)
|
1659 |
+
|
1660 |
+
# Add model information to stats
|
1661 |
+
stats.update({
|
1662 |
+
"model_type": model_type,
|
1663 |
+
"model_name": model_name,
|
1664 |
+
"model": f"{model_type} - {model_name}",
|
1665 |
+
**params
|
1666 |
+
})
|
1667 |
+
|
1668 |
+
# Format and store results
|
1669 |
+
all_results.extend(format_results(raw_results, stats))
|
1670 |
+
all_stats.append(stats)
|
1671 |
+
|
1672 |
+
except Exception as e:
|
1673 |
+
print(f"Error testing {model_type}:{model_name} with parameters {params}: {str(e)}")
|
1674 |
+
continue
|
1675 |
+
|
1676 |
+
return pd.DataFrame(all_results), pd.DataFrame(all_stats)
|
1677 |
|
1678 |
+
# Helper function to calculate model ranking score
|
1679 |
+
def calculate_model_ranking_score(stats: Dict[str, Any]) -> float:
|
1680 |
+
"""Calculate a composite score for model ranking"""
|
1681 |
+
weights = {
|
1682 |
+
'search_time': -0.2, # Negative weight because lower is better
|
1683 |
+
'result_diversity': 0.2,
|
1684 |
+
'rank_correlation': 0.3,
|
1685 |
+
'contains_expected': 0.3,
|
1686 |
+
'expected_result_rank': -0.2 # Negative weight because lower rank is better
|
1687 |
+
}
|
1688 |
+
|
1689 |
+
score = 0.0
|
1690 |
+
for metric, weight in weights.items():
|
1691 |
+
if metric in stats and not isinstance(stats[metric], str):
|
1692 |
+
if metric == 'contains_expected':
|
1693 |
+
value = float(stats[metric])
|
1694 |
+
elif metric == 'expected_result_rank':
|
1695 |
+
value = 1.0 / max(stats[metric], 1) # Convert rank to score (higher is better)
|
1696 |
+
else:
|
1697 |
+
value = float(stats[metric])
|
1698 |
+
score += weight * value
|
1699 |
+
|
1700 |
+
return score
|
1701 |
|
1702 |
if __name__ == "__main__":
|
1703 |
launch_interface()
|