More_Advanced_Embeddings_Comparator

Running

App Files Files Community

Chris4K commited on Oct 22, 2024

Commit

4b5f1bf

verified ·

1 Parent(s): a9006e9

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -7

app.py CHANGED Viewed

@@ -510,6 +510,10 @@ import numpy as np
 from transformers import TextClassificationPipeline
 from typing import List, Union, Any
 def rerank_results(
     results: List[Any],
     query: str,
@@ -522,23 +526,24 @@ def rerank_results(
         results: List of documents/results to rerank
         query: Search query string
         reranker: Either a HuggingFace TextClassificationPipeline or a custom reranker
-                 with a rerank() method
     Returns:
         List of reranked results
     """
     if not results:
         return results
     if not hasattr(reranker, 'rerank'):
         # For TextClassificationPipeline
         try:
             pairs = [[query, doc.page_content] for doc in results]
-            # Standard classification without specific function
             predictions = reranker(pairs)
-            # Extract scores, defaulting to 'score' key but falling back to other common keys
             scores = []
             for pred in predictions:
                 if isinstance(pred, dict):
@@ -549,21 +554,23 @@ def rerank_results(
                     score = float(pred)
                 scores.append(score)
-            # Sort in descending order (higher scores = better matches)
             reranked_idx = np.argsort(scores)[::-1]
             return [results[i] for i in reranked_idx]
         except Exception as e:
             print(f"Warning: Reranking failed with error: {str(e)}")
             return results
     else:
-        # For models with dedicated rerank method
         try:
             return reranker.rerank(query, [doc.page_content for doc in results])
         except Exception as e:
             print(f"Warning: Custom reranking failed with error: {str(e)}")
             return results
 # Main Comparison Function
 def compare_embeddings(file, query, embedding_models, custom_embedding_model, split_strategy, chunk_size, overlap_size, custom_separators, vector_store_type, search_type, top_k, expected_result=None, lang='german', apply_preprocessing=True, optimize_vocab=False, apply_phonetic=True, phonetic_weight=0.3, custom_tokenizer_file=None, custom_tokenizer_model=None, custom_tokenizer_vocab_size=10000, custom_tokenizer_special_tokens=None, use_query_optimization=False, query_optimization_model="google/flan-t5-base", use_reranking=False):
     all_results = []

 from transformers import TextClassificationPipeline
 from typing import List, Union, Any
+import numpy as np
+from transformers import pipeline, TextClassificationPipeline
+from typing import List, Any, Union
 def rerank_results(
     results: List[Any],
     query: str,
         results: List of documents/results to rerank
         query: Search query string
         reranker: Either a HuggingFace TextClassificationPipeline or a custom reranker
+                 with a rerank() method.
     Returns:
         List of reranked results
     """
     if not results:
         return results
     if not hasattr(reranker, 'rerank'):
         # For TextClassificationPipeline
         try:
+            # Create pairs of query and document content
             pairs = [[query, doc.page_content] for doc in results]
+            # Get predictions from the reranker pipeline
             predictions = reranker(pairs)
+            # Extract scores with proper fallback options
             scores = []
             for pred in predictions:
                 if isinstance(pred, dict):
                     score = float(pred)
                 scores.append(score)
+            # Sort the results based on scores in descending order
             reranked_idx = np.argsort(scores)[::-1]
+            # Return reranked results based on the sorted indices
             return [results[i] for i in reranked_idx]
         except Exception as e:
             print(f"Warning: Reranking failed with error: {str(e)}")
             return results
     else:
+        # For custom rerankers with a dedicated rerank method
         try:
             return reranker.rerank(query, [doc.page_content for doc in results])
         except Exception as e:
             print(f"Warning: Custom reranking failed with error: {str(e)}")
             return results
 # Main Comparison Function
 def compare_embeddings(file, query, embedding_models, custom_embedding_model, split_strategy, chunk_size, overlap_size, custom_separators, vector_store_type, search_type, top_k, expected_result=None, lang='german', apply_preprocessing=True, optimize_vocab=False, apply_phonetic=True, phonetic_weight=0.3, custom_tokenizer_file=None, custom_tokenizer_model=None, custom_tokenizer_vocab_size=10000, custom_tokenizer_special_tokens=None, use_query_optimization=False, query_optimization_model="google/flan-t5-base", use_reranking=False):
     all_results = []