Sarthak commited on May 26

Commit

eb5363b

1 Parent(s): b82c1c9

feat: added MTEB evaluation scripts

Files changed (29) hide show

MTEB_evaluate.py +268 -275
README.md +50 -0
analyze_mteb_results.py +311 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/AmazonCounterfactualClassification.json +1 -1
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/AmazonReviewsClassification.json +73 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/AppsRetrieval.json +159 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/ArguAna.json +158 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/AskUbuntuDupQuestions.json +26 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/BIOSSES.json +26 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/Banking77Classification.json +0 -73
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/BiorxivClusteringS2S.json +32 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/COIRCodeSearchNetRetrieval.json +8 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/CQADupstackProgrammersRetrieval.json +0 -158
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/CodeFeedbackMT.json +158 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/CodeSearchNetCCRetrieval.json +8 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/CodeTransOceanContest.json +159 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/CodeTransOceanDL.json +158 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/CosQA.json +159 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/STSBenchmark.json +0 -26
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/SprintDuplicateQuestions.json +0 -58
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/StackExchangeClustering.json +0 -47
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/StackOverflowQA.json +158 -0
mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/SyntheticText2SQL.json +159 -0
mteb_results/mteb_parsed_results.json +0 -3
mteb_results/mteb_raw_results.json +0 -7
mteb_results/mteb_report.txt +0 -21
mteb_results/mteb_summary.json +0 -20
pyproject.toml +1 -0
uv.lock +2 -0

MTEB_evaluate.py CHANGED Viewed

@@ -1,349 +1,342 @@
 #!/usr/bin/env python
 """
-MTEB Evaluation Script for Distilled Model - Code-Focused Tasks.
-This script evaluates the distilled gte-Qwen2-7B-instruct model using MTEB
-(Massive Text Embedding Benchmark) with a focus on tasks relevant for code:
-- Classification: Tests ability to distinguish between different categories (e.g., programming languages)
-- Clustering: Tests ability to group similar code by functionality
-- STS: Tests semantic similarity understanding between code snippets
-- Retrieval: Tests code search and duplicate detection capabilities
 Features:
-- Incremental evaluation: Skips tasks that already have results in mteb_results/
-- Combines existing and new results automatically
-- Saves results in multiple formats for analysis
-Usage:
-    python MTEB_evaluate.py
-Configuration:
-    - Set EVAL_ALL_TASKS = False to use only CODE_SPECIFIC_TASKS
-    - Modify CODE_SPECIFIC_TASKS for granular task selection
 """
 import json
 import logging
 import sys
 import time
 from pathlib import Path
-import mteb
-from model2vec import StaticModel
-from mteb import ModelMeta
-from evaluation import (
-	CustomMTEB,
-	get_tasks,
-	make_leaderboard,
-	parse_mteb_results,
-	summarize_results,
-)
 # =============================================================================
-# CONFIGURATION CONSTANTS
 # =============================================================================
-# Model Configuration
-MODEL_PATH = "."  # Path to the distilled model directory
-MODEL_NAME = "gte-Qwen2-7B-instruct-M2V-Distilled"  # Name for the model in results
-# Evaluation Configuration
-OUTPUT_DIR = "mteb_results"  # Directory to save evaluation results
-EVAL_ALL_TASKS = True
-# Specific tasks most relevant for code evaluation (focused selection)
-CODE_SPECIFIC_TASKS = [
-	# Classification - Programming language/category classification
-	"Banking77Classification",  # Fine-grained classification (77 classes)
-	# Clustering - Code grouping by functionality
-	"StackExchangeClustering.v2",  # Technical Q&A clustering (most relevant)
-	# STS - Code similarity understanding
-	"STSBenchmark",  # Standard semantic similarity benchmark
-	# Retrieval - Code search capabilities
-	"CQADupstackProgrammersRetrieval",  # Programming Q&A retrieval
-	# PairClassification - Duplicate/similar code detection
-	"SprintDuplicateQuestions",  # Duplicate question detection
-]
-# Evaluation settings
-EVAL_SPLITS = ["test"]  # Dataset splits to evaluate on
-VERBOSITY = 2  # MTEB verbosity level
-# =============================================================================
 # Configure logging
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
-def check_existing_results(output_path: Path, tasks: list) -> list:
-	"""Check for existing task results and filter out completed tasks."""
-	remaining_tasks = []
-	completed_tasks = []
-	for task in tasks:
-		task_name = task.metadata.name
-		# MTEB saves results as {model_name}__{task_name}.json
-		result_file = output_path / MODEL_NAME / f"{task_name}.json"
-		if result_file.exists():
-			completed_tasks.append(task_name)
-			logger.info(f"Skipping {task_name} - results already exist")
-		else:
-			remaining_tasks.append(task)
-	if completed_tasks:
-		logger.info(f"Found existing results for {len(completed_tasks)} tasks: {completed_tasks}")
-	return remaining_tasks
-def load_existing_parsed_results(output_path: Path) -> dict:
-	"""Load existing parsed results if they exist."""
-	parsed_results_file = output_path / "mteb_parsed_results.json"
-	if parsed_results_file.exists():
-		try:
-			with parsed_results_file.open("r") as f:
-				return json.load(f)
-		except (json.JSONDecodeError, OSError) as e:
-			logger.warning(f"Could not load existing parsed results: {e}")
-	return {}
-def load_and_display_existing_results(output_path: Path) -> None:
-	"""Load and display existing MTEB results."""
-	summary_file = output_path / "mteb_summary.json"
-	if summary_file.exists():
-		with summary_file.open("r") as f:
-			summary = json.load(f)
-		logger.info("=" * 80)
-		logger.info("EXISTING MTEB EVALUATION RESULTS:")
-		logger.info("=" * 80)
-		stats = summary.get("summary_stats")
-		if stats:
-			logger.info(f"Total Datasets: {stats.get('total_datasets', 'N/A')}")
-			logger.info(f"Average Score: {stats.get('average_score', 0):.4f}")
-			logger.info(f"Median Score: {stats.get('median_score', 0):.4f}")
-		logger.info("=" * 80)
-	else:
-		logger.info("No existing summary found. Individual task results may still exist.")
-def run_mteb_evaluation() -> None:
-	"""Run MTEB evaluation using the evaluation package."""
-	output_path = Path(OUTPUT_DIR)
-	output_path.mkdir(parents=True, exist_ok=True)
-	logger.info(f"Loading model from {MODEL_PATH}")
-	model = StaticModel.from_pretrained(MODEL_PATH)
-	logger.info("Model loaded successfully")
-	# Set up model metadata for MTEB
-	model.mteb_model_meta = ModelMeta(  # type: ignore[attr-defined]
-		name=MODEL_NAME, revision="distilled", release_date=None, languages=["eng"]
-	)
-	# Get specific code-relevant tasks (focused selection)
-	logger.info("Getting focused code-relevant MTEB tasks")
-	logger.info(f"Selected specific tasks: {CODE_SPECIFIC_TASKS}")
-	if EVAL_ALL_TASKS:
-		all_tasks = get_tasks()
-	else:
-		all_tasks = [mteb.get_task(task_name, languages=["eng"]) for task_name in CODE_SPECIFIC_TASKS]
-	logger.info(f"Found {len(all_tasks)} total tasks")
-	# Check for existing results and filter out completed tasks
-	tasks = check_existing_results(output_path, all_tasks)
-	logger.info(f"Will evaluate {len(tasks)} remaining tasks")
-	if not tasks:
-		logger.info("No new tasks to evaluate - all tasks already completed!")
-		# Load and display existing results
-		logger.info("Loading existing results...")
 		try:
-			load_and_display_existing_results(output_path)
-		except (json.JSONDecodeError, OSError, KeyError) as e:
-			logger.warning(f"Could not load existing results: {e}")
-		return
-	# Define the CustomMTEB object with the specified tasks
-	evaluation = CustomMTEB(tasks=tasks)
-	# Run the evaluation
-	logger.info("Starting MTEB evaluation...")
-	start_time = time.time()
-	results = evaluation.run(model, eval_splits=EVAL_SPLITS, output_folder=str(output_path), verbosity=VERBOSITY)
-	end_time = time.time()
-	evaluation_time = end_time - start_time
-	logger.info(f"Evaluation completed in {evaluation_time:.2f} seconds")
-	# Parse the results and summarize them
-	logger.info("Parsing and summarizing results...")
-	parsed_results = parse_mteb_results(mteb_results=results, model_name=MODEL_NAME)
-	# Load existing results if any and combine them
-	existing_results = load_existing_parsed_results(output_path)
-	if existing_results:
-		logger.info("Combining with existing results...")
-		# Convert to dict for merging
-		parsed_dict = dict(parsed_results) if hasattr(parsed_results, "items") else {}
-		# Simple merge - existing results take precedence to avoid overwriting
-		for key, value in existing_results.items():
-			if key not in parsed_dict:
-				parsed_dict[key] = value
-		parsed_results = parsed_dict
-	task_scores = summarize_results(parsed_results)
-	# Save results in different formats
-	save_results(output_path, results, parsed_results, task_scores, evaluation_time)
-	# Print the results in a leaderboard format
-	logger.info("MTEB Evaluation Results:")
-	logger.info("=" * 80)
-	leaderboard = make_leaderboard(task_scores)  # type: ignore[arg-type]
-	logger.info(leaderboard.to_string(index=False))
-	logger.info("=" * 80)
-	logger.info(f"Evaluation completed successfully. Results saved to {OUTPUT_DIR}")
-def save_results(
-	output_path: Path, raw_results: list, parsed_results: dict, task_scores: dict, evaluation_time: float
-) -> None:
-	"""Save evaluation results in multiple formats."""
-	# Save raw results
-	raw_results_file = output_path / "mteb_raw_results.json"
-	with raw_results_file.open("w") as f:
-		json.dump(raw_results, f, indent=2, default=str)
-	logger.info(f"Raw results saved to {raw_results_file}")
-	# Save parsed results
-	parsed_results_file = output_path / "mteb_parsed_results.json"
-	with parsed_results_file.open("w") as f:
-		json.dump(parsed_results, f, indent=2, default=str)
-	logger.info(f"Parsed results saved to {parsed_results_file}")
-	# Generate summary statistics
-	summary_stats = generate_summary_stats(task_scores)
-	# Save task scores summary
-	summary = {
-		"model_name": MODEL_NAME,
-		"evaluation_time_seconds": evaluation_time,
-		"task_scores": task_scores,
-		"summary_stats": summary_stats,
-	}
-	summary_file = output_path / "mteb_summary.json"
-	with summary_file.open("w") as f:
-		json.dump(summary, f, indent=2, default=str)
-	logger.info(f"Summary saved to {summary_file}")
-	# Save human-readable report
-	report_file = output_path / "mteb_report.txt"
-	generate_report(output_path, task_scores, summary_stats, evaluation_time)
-	logger.info(f"Report saved to {report_file}")
-def generate_summary_stats(task_scores: dict) -> dict:
-	"""Generate summary statistics from task scores."""
-	if not task_scores:
-		return {}
-	# Extract all individual dataset scores
-	all_scores = []
-	for model_data in task_scores.values():
-		if isinstance(model_data, dict) and "dataset_scores" in model_data:
-			dataset_scores = model_data["dataset_scores"]
-			if isinstance(dataset_scores, dict):
-				all_scores.extend(
-					[
-						float(score)
-						for score in dataset_scores.values()
-						if isinstance(score, int | float) and str(score).lower() != "nan"
-					]
-				)
-	if not all_scores:
-		return {}
-	import numpy as np
-	return {
-		"total_datasets": len(all_scores),
-		"average_score": float(np.mean(all_scores)),
-		"median_score": float(np.median(all_scores)),
-		"std_dev": float(np.std(all_scores)),
-		"min_score": float(np.min(all_scores)),
-		"max_score": float(np.max(all_scores)),
-	}
-def generate_report(output_path: Path, task_scores: dict, summary_stats: dict, evaluation_time: float) -> None:
-	"""Generate human-readable evaluation report."""
-	report_file = output_path / "mteb_report.txt"
-	with report_file.open("w") as f:
-		f.write("=" * 80 + "\n")
-		f.write("MTEB Evaluation Report\n")
-		f.write("=" * 80 + "\n\n")
-		f.write(f"Model: {MODEL_NAME}\n")
-		f.write(f"Model Path: {MODEL_PATH}\n")
-		f.write(f"Evaluation Time: {evaluation_time:.2f} seconds\n")
-		# Write summary stats
-		if summary_stats:
-			f.write(f"Total Datasets: {summary_stats['total_datasets']}\n\n")
-			f.write("Summary Statistics:\n")
-			f.write(f"  Average Score: {summary_stats['average_score']:.4f}\n")
-			f.write(f"  Median Score: {summary_stats['median_score']:.4f}\n")
-			f.write(f"  Standard Deviation: {summary_stats['std_dev']:.4f}\n")
-			f.write(f"  Score Range: {summary_stats['min_score']:.4f} - {summary_stats['max_score']:.4f}\n\n")
-		else:
-			f.write("Summary Statistics: No valid results found\n\n")
-		# Write leaderboard
-		f.write("Detailed Results:\n")
-		f.write("-" * 50 + "\n")
-		if task_scores:
-			leaderboard = make_leaderboard(task_scores)  # type: ignore[arg-type]
-			f.write(leaderboard.to_string(index=False))
-		else:
-			f.write("No results available\n")
-		f.write("\n\n" + "=" * 80 + "\n")
-def main() -> None:
-	"""Main evaluation function."""
-	logger.info(f"Starting MTEB evaluation for {MODEL_NAME}")
-	logger.info(f"Model path: {MODEL_PATH}")
-	logger.info(f"Output directory: {OUTPUT_DIR}")
-	logger.info("Running focused MTEB evaluation on code-relevant tasks:")
-	logger.info("  - Classification: Programming language classification")
-	logger.info("  - Clustering: Code clustering by functionality")
-	logger.info("  - STS: Semantic similarity between code snippets")
-	logger.info("  - Retrieval: Code search and retrieval")
-	try:
-		run_mteb_evaluation()
-		logger.info("Evaluation pipeline completed successfully!")
-	except Exception:
-		logger.exception("Evaluation failed")
-		sys.exit(1)
 if __name__ == "__main__":

 #!/usr/bin/env python
 """
+MTEB Evaluation Script with Subprocess Isolation (Code Information Retrieval Tasks).
+This script evaluates models using MTEB with subprocess isolation to prevent
+memory issues and process killing.
 Features:
+- Each task runs in a separate subprocess to isolate memory
+- 1-minute timeout per task
+- No retries - if task fails or times out, move to next one
+- Memory monitoring and cleanup
+Note: Multi-threading is NOT used here because:
+1. Memory is the main bottleneck, not CPU
+2. Running multiple tasks simultaneously would increase memory pressure
+3. Many tasks are being killed (return code -9) due to OOM conditions
+4. Sequential processing with subprocess isolation is more stable
 """
+import contextlib
 import json
 import logging
+import subprocess
 import sys
+import tempfile
 import time
 from pathlib import Path
+import psutil
 # =============================================================================
+# CONFIGURATION
 # =============================================================================
+MODEL_PATH = "."
+MODEL_NAME = "gte-Qwen2-7B-instruct-M2V-Distilled"
+OUTPUT_DIR = "mteb_results"
+TASK_TIMEOUT = 30  # 30 seconds timeout per task
+MAX_RETRIES = 0  # No retries - move to next task if failed/timeout
+# Constants
+SIGKILL_RETURN_CODE = -9  # Process killed by SIGKILL (usually OOM)
 # Configure logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
+# =============================================================================
+# SINGLE TASK RUNNER SCRIPT
+# =============================================================================
+TASK_RUNNER_SCRIPT = """
+import sys
+import os
+import json
+import tempfile
+import traceback
+from pathlib import Path
+# Add current directory to path
+sys.path.insert(0, ".")
+try:
+    import mteb
+    from model2vec import StaticModel
+    from mteb import ModelMeta
+    from evaluation import CustomMTEB
+    def run_single_task():
+        # Get arguments
+        model_path = sys.argv[1]
+        task_name = sys.argv[2]
+        output_dir = sys.argv[3]
+        model_name = sys.argv[4]
+        # Load model
+        model = StaticModel.from_pretrained(model_path)
+        model.mteb_model_meta = ModelMeta(
+            name=model_name, revision="distilled", release_date=None, languages=["eng"]
+        )
+        # Get and run task
+        task = mteb.get_task(task_name, languages=["eng"])
+        evaluation = CustomMTEB(tasks=[task])
+        results = evaluation.run(
+            model,
+            eval_splits=["test"],
+            output_folder=output_dir,
+            verbosity=0
+        )
+        # Save results to temp file for parent process
+        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f:
+            json.dump({
+                "success": True,
+                "task_name": task_name,
+                "results": results
+            }, f)
+            temp_file = f.name
+        print(f"RESULT_FILE:{temp_file}")
+        return 0
+    if __name__ == "__main__":
+        exit(run_single_task())
+except Exception as e:
+    print(f"ERROR: {str(e)}")
+    print(f"TRACEBACK: {traceback.format_exc()}")
+    exit(1)
+"""
+def get_available_tasks() -> list[str]:
+	"""Get list of available tasks."""
+	try:
+		import mteb
+		import mteb.benchmarks
+		# Use main MTEB benchmark for comprehensive evaluation
+		benchmark = mteb.benchmarks.CoIR
+		return [str(task) for task in benchmark.tasks]  # All tasks
+	except Exception:
+		logger.exception("Failed to get tasks")
+		return []
+def check_existing_results(output_path: Path, task_names: list[str]) -> list[str]:
+	"""Check for existing results and return remaining tasks."""
+	remaining_tasks = []
+	for task_name in task_names:
+		result_file = output_path / MODEL_NAME / "distilled" / f"{task_name}.json"
+		if result_file.exists():
+			logger.info(f"Skipping {task_name} - results already exist")
+		else:
+			remaining_tasks.append(task_name)
+	return remaining_tasks
+def run_task_subprocess(task_name: str, output_dir: str) -> tuple[bool, str, float]:
+	"""Run a single task in a subprocess with memory and time limits."""
+	# Create temporary script file
+	with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
+		f.write(TASK_RUNNER_SCRIPT)
+		script_path = f.name
+	try:
+		logger.info(f"Running task: {task_name}")
+		start_time = time.time()
+		# Run subprocess with timeout
+		# subprocess security: We control all inputs (script path and known arguments)
+		cmd = [sys.executable, script_path, MODEL_PATH, task_name, output_dir, MODEL_NAME]
+		process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)  # noqa: S603
+		try:
+			stdout, stderr = process.communicate(timeout=TASK_TIMEOUT)
+			duration = time.time() - start_time
+			if process.returncode == 0:
+				# Check for result file
+				for line in stdout.split("\n"):
+					if line.startswith("RESULT_FILE:"):
+						result_file = line.split(":", 1)[1]
+						try:
+							with Path(result_file).open() as f:
+								json.load(f)
+							Path(result_file).unlink()  # Clean up temp file
+							logger.info(f"✓ Completed {task_name} in {duration:.2f}s")
+							return True, task_name, duration
+						except (json.JSONDecodeError, OSError):
+							logger.exception("Failed to read result file")
+				logger.info(f"✓ Completed {task_name} in {duration:.2f}s")
+				return True, task_name, duration
+			if process.returncode == SIGKILL_RETURN_CODE:
+				logger.error(f"✗ Task {task_name} killed (OOM) - return code {process.returncode}")
+			else:
+				logger.error(f"✗ Task {task_name} failed with return code {process.returncode}")
+			if stderr:
+				logger.error(f"Error output: {stderr}")
+			return False, task_name, duration
+		except subprocess.TimeoutExpired:
+			logger.warning(f"⏱ Task {task_name} timed out after {TASK_TIMEOUT}s")
+			process.kill()
+			process.wait()
+			return False, task_name, TASK_TIMEOUT
+	except Exception:
+		logger.exception(f"✗ Failed to run task {task_name}")
+		return False, task_name, 0.0
+	finally:
+		# Clean up script file
+		with contextlib.suppress(Exception):
+			Path(script_path).unlink()
+def collect_results(output_path: Path) -> dict:
+	"""Collect all results from completed tasks."""
+	results_dir = output_path / MODEL_NAME / "distilled"
+	if not results_dir.exists():
+		return {}
+	task_results = {}
+	for result_file in results_dir.glob("*.json"):
+		if result_file.name == "model_meta.json":
+			continue
 		try:
+			with result_file.open() as f:
+				data = json.load(f)
+				task_name = result_file.stem
+				task_results[task_name] = data
+		except (json.JSONDecodeError, OSError) as e:
+			logger.warning(f"Could not load {result_file}: {e}")
+	return task_results
+def save_summary(output_path: Path, results: dict, stats: dict) -> None:
+	"""Save evaluation summary."""
+	summary = {
+		"model_name": MODEL_NAME,
+		"timestamp": time.time(),
+		"task_timeout": TASK_TIMEOUT,
+		"stats": stats,
+		"task_results": results,
+	}
+	summary_file = output_path / "mteb_summary.json"
+	with summary_file.open("w") as f:
+		json.dump(summary, f, indent=2, default=str)
+	logger.info(f"Summary saved to {summary_file}")
+def main() -> None:
+	"""Main evaluation function."""
+	logger.info(f"Starting MTEB evaluation for {MODEL_NAME}")
+	logger.info(f"Task timeout: {TASK_TIMEOUT}s (no retries)")
+	logger.info("Memory isolation: Each task runs in separate subprocess")
+	# Log system info
+	memory_info = psutil.virtual_memory()
+	logger.info(f"System memory: {memory_info.total / (1024**3):.1f} GB total")
+	output_path = Path(OUTPUT_DIR)
+	output_path.mkdir(parents=True, exist_ok=True)
+	# Get tasks
+	all_tasks = get_available_tasks()
+	if not all_tasks:
+		logger.error("No tasks found!")
+		return
+	logger.info(f"Found {len(all_tasks)} tasks")
+	# Check existing results
+	remaining_tasks = check_existing_results(output_path, all_tasks)
+	logger.info(f"Will evaluate {len(remaining_tasks)} remaining tasks")
+	if not remaining_tasks:
+		logger.info("All tasks already completed!")
+		return
+	# Process tasks sequentially (no retries)
+	start_time = time.time()
+	successful_tasks = []
+	failed_tasks = []
+	timed_out_tasks = []
+	for i, task_name in enumerate(remaining_tasks):
+		logger.info(f"[{i + 1}/{len(remaining_tasks)}] Processing: {task_name}")
+		# Run task once (no retries)
+		success, name, duration = run_task_subprocess(task_name, str(output_path))
+		if success:
+			successful_tasks.append((name, duration))
+		elif duration == TASK_TIMEOUT:
+			timed_out_tasks.append(name)
+		else:
+			failed_tasks.append(name)
+			# Check if it was OOM killed (this is logged in run_task_subprocess)
+		# Progress update
+		progress = ((i + 1) / len(remaining_tasks)) * 100
+		logger.info(f"Progress: {i + 1}/{len(remaining_tasks)} ({progress:.1f}%)")
+		# Brief pause between tasks
+		time.sleep(1)
+	total_time = time.time() - start_time
+	# Log final summary
+	logger.info("=" * 80)
+	logger.info("EVALUATION SUMMARY")
+	logger.info("=" * 80)
+	logger.info(f"Total tasks: {len(remaining_tasks)}")
+	logger.info(f"Successful: {len(successful_tasks)}")
+	logger.info(f"Failed: {len(failed_tasks)}")
+	logger.info(f"Timed out: {len(timed_out_tasks)}")
+	logger.info(f"Total time: {total_time:.2f}s")
+	if successful_tasks:
+		avg_time = sum(duration for _, duration in successful_tasks) / len(successful_tasks)
+		logger.info(f"Average successful task time: {avg_time:.2f}s")
+	if failed_tasks:
+		logger.warning(f"Failed tasks: {failed_tasks}")
+	if timed_out_tasks:
+		logger.warning(f"Timed out tasks: {timed_out_tasks}")
+	logger.info("=" * 80)
+	# Collect and save results
+	all_results = collect_results(output_path)
+	stats = {
+		"total_tasks": len(remaining_tasks),
+		"successful": len(successful_tasks),
+		"failed": len(failed_tasks),
+		"timed_out": len(timed_out_tasks),
+		"total_time": total_time,
+		"avg_time": avg_time if successful_tasks else 0,
+		"successful_task_details": successful_tasks,
+		"failed_tasks": failed_tasks,
+		"timed_out_tasks": timed_out_tasks,
+	}
+	save_summary(output_path, all_results, stats)
+	logger.info("Evaluation completed!")
 if __name__ == "__main__":

README.md CHANGED Viewed

@@ -134,6 +134,56 @@ Detailed evaluation results, including similarity plots and performance metrics,
 - `trained_code_classifier/` - Directory containing trained classification model
 - `mteb_results/` - Directory containing MTEB evaluation results
 ## Acknowledgments
 This project is built upon the following technologies:

 - `trained_code_classifier/` - Directory containing trained classification model
 - `mteb_results/` - Directory containing MTEB evaluation results
+## MTEB Benchmark Results (Partial)
+**Overall Average Score: 0.1962**
+| Category | Task | Score |
+|----------|------|-------|
+| **Classification** | **Average** | **0.4164** |
+| | AmazonCounterfactualClassification | 0.5690 |
+| | AmazonReviewsClassification | 0.2637 |
+| | | |
+| **Clustering** | **Average** | **0.0775** |
+| | BiorxivClusteringS2S | 0.0775 |
+| | | |
+| **Reranking** | **Average** | **0.4643** |
+| | AskUbuntuDupQuestions | 0.4643 |
+| | | |
+| **Retrieval** | **Average** | **0.1509** |
+| | ArguAna | 0.1509 |
+| | | |
+| **CodeRetrieval** | **Average** | **0.1034** |
+| | AppsRetrieval | 0.0008 |
+| | COIRCodeSearchNetRetrieval | Failed |
+| | CodeFeedbackMT | 0.1594 |
+| | CodeSearchNetCCRetrieval | Failed |
+| | CodeTransOceanContest | 0.0951 |
+| | CodeTransOceanDL | 0.2780 |
+| | CosQA | 0.0097 |
+| | StackOverflowQA | 0.1762 |
+| | SyntheticText2SQL | 0.0049 |
+| | | |
+| **STS** | **Average** | **0.3016** |
+| | BIOSSES | 0.3016 |
+| | | |
+### Summary Statistics
+- **Total Tasks**: 15
+- **Successful Tasks**: 13
+- **Failed Tasks**: 2
+- **Overall Average**: 0.1962
+### Category Averages
+- **Classification**: 0.4164 (2 tasks)
+- **Clustering**: 0.0775 (1 tasks)
+- **Reranking**: 0.4643 (1 tasks)
+- **Retrieval**: 0.1509 (1 tasks)
+- **CodeRetrieval**: 0.1034 (7 tasks)
+- **STS**: 0.3016 (1 tasks)
 ## Acknowledgments
 This project is built upon the following technologies:

analyze_mteb_results.py ADDED Viewed

	@@ -0,0 +1,311 @@

+#!/usr/bin/env python
+"""
+MTEB Results Analysis Script.
+This script analyzes MTEB benchmark results from the results directory,
+categorizes tasks, calculates averages, and updates the README.md with
+a comprehensive results table.
+"""
+import json
+import re
+from pathlib import Path
+# Task category mappings based on MTEB benchmark structure
+TASK_CATEGORIES = {
+	# Classification tasks
+	"AmazonCounterfactualClassification": "Classification",
+	"AmazonReviewsClassification": "Classification",
+	"Banking77Classification": "Classification",
+	"EmotionClassification": "Classification",
+	"ImdbClassification": "Classification",
+	"MassiveIntentClassification": "Classification",
+	"MassiveScenarioClassification": "Classification",
+	"MTOPDomainClassification": "Classification",
+	"MTOPIntentClassification": "Classification",
+	"ToxicConversationsClassification": "Classification",
+	"TweetSentimentExtractionClassification": "Classification",
+	# Clustering tasks
+	"ArxivClusteringP2P": "Clustering",
+	"ArxivClusteringS2S": "Clustering",
+	"BiorxivClusteringP2P": "Clustering",
+	"BiorxivClusteringS2S": "Clustering",
+	"MedrxivClusteringP2P": "Clustering",
+	"MedrxivClusteringS2S": "Clustering",
+	"RedditClustering": "Clustering",
+	"RedditClusteringP2P": "Clustering",
+	"StackExchangeClustering": "Clustering",
+	"StackExchangeClusteringP2P": "Clustering",
+	"TwentyNewsgroupsClustering": "Clustering",
+	# Pair Classification tasks
+	"SprintDuplicateQuestions": "PairClassification",
+	"TwitterSemEval2015": "PairClassification",
+	"TwitterURLCorpus": "PairClassification",
+	# Reranking tasks
+	"AskUbuntuDupQuestions": "Reranking",
+	"MindSmallReranking": "Reranking",
+	"SciDocsRR": "Reranking",
+	"StackOverflowDupQuestions": "Reranking",
+	# Retrieval tasks
+	"ArguAna": "Retrieval",
+	"ClimateFEVER": "Retrieval",
+	"CQADupstackRetrieval": "Retrieval",
+	"DBPedia": "Retrieval",
+	"FEVER": "Retrieval",
+	"FiQA2018": "Retrieval",
+	"HotpotQA": "Retrieval",
+	"MSMARCO": "Retrieval",
+	"NFCorpus": "Retrieval",
+	"NQ": "Retrieval",
+	"QuoraRetrieval": "Retrieval",
+	"SCIDOCS": "Retrieval",
+	"SciFact": "Retrieval",
+	"Touche2020": "Retrieval",
+	"TRECCOVID": "Retrieval",
+	# Code retrieval tasks
+	"CodeSearchNetCCRetrieval": "CodeRetrieval",
+	"COIRCodeSearchNetRetrieval": "CodeRetrieval",
+	"StackOverflowQA": "CodeRetrieval",
+	"AppsRetrieval": "CodeRetrieval",
+	"CodeTransOceanContest": "CodeRetrieval",
+	"CodeTransOceanDL": "CodeRetrieval",
+	"CodeFeedbackMT": "CodeRetrieval",
+	"SyntheticText2SQL": "CodeRetrieval",
+	"CosQA": "CodeRetrieval",
+	# STS (Semantic Textual Similarity) tasks
+	"BIOSSES": "STS",
+	"SICK-R": "STS",
+	"STS12": "STS",
+	"STS13": "STS",
+	"STS14": "STS",
+	"STS15": "STS",
+	"STS16": "STS",
+	"STS17": "STS",
+	"STS22": "STS",
+	"STSBenchmark": "STS",
+	"SummEval": "STS",
+}
+def load_mteb_results(results_dir: Path) -> dict[str, dict]:
+	"""Load all MTEB results from the results directory."""
+	results = {}
+	for json_file in results_dir.glob("*.json"):
+		if json_file.name == "model_meta.json":
+			continue
+		try:
+			with json_file.open() as f:
+				data = json.load(f)
+				task_name = data.get("task_name", json_file.stem)
+				results[task_name] = data
+		except (json.JSONDecodeError, KeyError):
+			pass
+	return results
+def extract_main_score(result_data: dict) -> float:
+	"""Extract the main score from a task result."""
+	try:
+		scores = result_data["scores"]["test"][0]
+		return scores["main_score"]
+	except (KeyError, IndexError, TypeError):
+		return 0.0
+def categorize_tasks(results: dict[str, dict]) -> dict[str, list[tuple[str, float]]]:
+	"""Categorize tasks and extract their scores."""
+	categories: dict[str, list[tuple[str, float]]] = {}
+	for task_name, result_data in results.items():
+		# Get category from mapping, or try to infer from task name
+		category = TASK_CATEGORIES.get(task_name)
+		if not category:
+			# Try to infer category from task name patterns
+			if "Classification" in task_name:
+				category = "Classification"
+			elif "Clustering" in task_name:
+				category = "Clustering"
+			elif "Retrieval" in task_name or "QA" in task_name:
+				category = "Retrieval"
+			elif "STS" in task_name or "SICK" in task_name or "BIOSSES" in task_name:
+				category = "STS"
+			elif "Code" in task_name or "SQL" in task_name:
+				category = "CodeRetrieval"
+			else:
+				category = "Other"
+		score = extract_main_score(result_data)
+		if category not in categories:
+			categories[category] = []
+		categories[category].append((task_name, score))
+	# Sort tasks within each category
+	for category_tasks in categories.values():
+		category_tasks.sort(key=lambda x: x[0])
+	return categories
+def calculate_averages(categories: dict[str, list[tuple[str, float]]]) -> dict[str, float]:
+	"""Calculate average scores for each category."""
+	averages = {}
+	for category, tasks in categories.items():
+		scores = [score for _, score in tasks if score > 0]  # Exclude failed tasks (score = 0)
+		if scores:
+			averages[category] = sum(scores) / len(scores)
+		else:
+			averages[category] = 0.0
+	return averages
+def generate_results_table(categories: dict[str, list[tuple[str, float]]], averages: dict[str, float]) -> str:
+	"""Generate a markdown table with the results."""
+	# Calculate overall average
+	all_scores = []
+	for tasks in categories.values():
+		all_scores.extend([score for _, score in tasks if score > 0])
+	overall_avg = sum(all_scores) / len(all_scores) if all_scores else 0.0
+	# Create table
+	table_lines = [
+		"## MTEB Benchmark Results",
+		"",
+		f"**Overall Average Score: {overall_avg:.4f}**",
+		"",
+		"| Category | Task | Score |",
+		"|----------|------|-------|",
+	]
+	# Sort categories for consistent ordering
+	category_order = [
+		"Classification",
+		"Clustering",
+		"PairClassification",
+		"Reranking",
+		"Retrieval",
+		"CodeRetrieval",
+		"STS",
+		"Other",
+	]
+	for category in category_order:
+		if category not in categories:
+			continue
+		tasks = categories[category]
+		if not tasks:
+			continue
+		# Add category average row
+		avg_score = averages[category]
+		table_lines.append(f"| **{category}** | **Average** | **{avg_score:.4f}** |")
+		# Add individual tasks
+		for task_name, score in tasks:
+			if score > 0:  # Only show successful tasks
+				table_lines.append(f"| | {task_name} | {score:.4f} |")
+			else:
+				table_lines.append(f"| | {task_name} | Failed |")
+		table_lines.append("| | | |")  # Empty row for spacing
+	# Add summary statistics
+	table_lines.extend(
+		[
+			"",
+			"### Summary Statistics",
+			"",
+			f"- **Total Tasks**: {sum(len(tasks) for tasks in categories.values())}",
+			f"- **Successful Tasks**: {len(all_scores)}",
+			f"- **Failed Tasks**: {sum(len(tasks) for tasks in categories.values()) - len(all_scores)}",
+			f"- **Overall Average**: {overall_avg:.4f}",
+			"",
+			"### Category Averages",
+			"",
+		]
+	)
+	for category in category_order:
+		if category in averages and categories.get(category):
+			avg = averages[category]
+			task_count = len([s for _, s in categories[category] if s > 0])
+			table_lines.append(f"- **{category}**: {avg:.4f} ({task_count} tasks)")
+	return "\n".join(table_lines)
+def update_readme(results_table: str, readme_path: Path = Path("README.md")) -> None:
+	"""Update the README.md file with the results table."""
+	if not readme_path.exists():
+		return
+	# Read current README
+	with readme_path.open() as f:
+		content = f.read()
+	# Find the insertion point or replace existing MTEB results
+	mteb_pattern = r"## MTEB Benchmark Results.*?(?=\n## |\n# |\Z)"
+	if re.search(mteb_pattern, content, re.DOTALL):
+		# Replace existing MTEB results section
+		new_content = re.sub(mteb_pattern, results_table, content, flags=re.DOTALL)
+	# Find a good insertion point (before Acknowledgments section or at the end)
+	elif "## Acknowledgments" in content:
+		new_content = content.replace("## Acknowledgments", f"{results_table}\n\n## Acknowledgments")
+	elif "## License" in content:
+		new_content = content.replace("## License", f"{results_table}\n\n## License")
+	else:
+		# Add at the end
+		new_content = f"{content}\n\n{results_table}"
+	# Write updated README
+	with readme_path.open("w") as f:
+		f.write(new_content)
+def main() -> None:
+	"""Main function to analyze MTEB results and update README."""
+	results_dir = Path("mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled")
+	if not results_dir.exists():
+		return
+	results = load_mteb_results(results_dir)
+	if not results:
+		return
+	categories = categorize_tasks(results)
+	averages = calculate_averages(categories)
+	results_table = generate_results_table(categories, averages)
+	update_readme(results_table)
+	# Print summary to console
+	sum(len(tasks) for tasks in categories.values())
+	successful_tasks = sum(len([s for _, s in tasks if s > 0]) for tasks in categories.values())
+	if successful_tasks > 0:
+		all_scores = []
+		for tasks in categories.values():
+			all_scores.extend([score for _, score in tasks if score > 0])
+		sum(all_scores) / len(all_scores)
+	for category, tasks in categories.items():
+		len([s for _, s in tasks if s > 0])
+		averages.get(category, 0.0)
+if __name__ == "__main__":
+	main()

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/AmazonCounterfactualClassification.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "dataset_revision": "e8379541af4e31359cca9fbcf4b00f2671dba205",
-  "evaluation_time": 8.737873554229736,
   "kg_co2_emissions": null,
   "mteb_version": "1.14.15",
   "scores": {

 {
   "dataset_revision": "e8379541af4e31359cca9fbcf4b00f2671dba205",
+  "evaluation_time": 7.698482990264893,
   "kg_co2_emissions": null,
   "mteb_version": "1.14.15",
   "scores": {

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/AmazonReviewsClassification.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d",
+  "evaluation_time": 5.071816444396973,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {
+    "test": [
+      {
+        "accuracy": 0.26374,
+        "f1": 0.25472288926645315,
+        "f1_weighted": 0.25472288926645315,
+        "hf_subset": "en",
+        "languages": [
+          "eng-Latn"
+        ],
+        "main_score": 0.26374,
+        "scores_per_experiment": [
+          {
+            "accuracy": 0.29,
+            "f1": 0.2830487996396496,
+            "f1_weighted": 0.2830487996396495
+          },
+          {
+            "accuracy": 0.276,
+            "f1": 0.26621916451801775,
+            "f1_weighted": 0.2662191645180177
+          },
+          {
+            "accuracy": 0.2682,
+            "f1": 0.24934092172665734,
+            "f1_weighted": 0.24934092172665728
+          },
+          {
+            "accuracy": 0.297,
+            "f1": 0.29141160920496506,
+            "f1_weighted": 0.29141160920496506
+          },
+          {
+            "accuracy": 0.268,
+            "f1": 0.2528895121087961,
+            "f1_weighted": 0.2528895121087961
+          },
+          {
+            "accuracy": 0.2548,
+            "f1": 0.25158219767608686,
+            "f1_weighted": 0.2515821976760869
+          },
+          {
+            "accuracy": 0.2192,
+            "f1": 0.21535453372408658,
+            "f1_weighted": 0.21535453372408656
+          },
+          {
+            "accuracy": 0.264,
+            "f1": 0.2493331111938578,
+            "f1_weighted": 0.24933311119385781
+          },
+          {
+            "accuracy": 0.2694,
+            "f1": 0.2569449221084947,
+            "f1_weighted": 0.2569449221084947
+          },
+          {
+            "accuracy": 0.2308,
+            "f1": 0.23110412076392003,
+            "f1_weighted": 0.23110412076392003
+          }
+        ]
+      }
+    ]
+  },
+  "task_name": "AmazonReviewsClassification"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/AppsRetrieval.json ADDED Viewed

	@@ -0,0 +1,159 @@

+{
+  "dataset_revision": "f22508f96b7a36c2415181ed8bb76f76e04ae2d5",
+  "evaluation_time": 7.666281223297119,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {
+    "test": [
+      {
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn",
+          "python-Code"
+        ],
+        "main_score": 0.00085,
+        "map_at_1": 0.00053,
+        "map_at_10": 0.00071,
+        "map_at_100": 0.00084,
+        "map_at_1000": 0.00102,
+        "map_at_20": 0.00078,
+        "map_at_3": 0.00062,
+        "map_at_5": 0.00062,
+        "mrr_at_1": 0.0005312084993359894,
+        "mrr_at_10": 0.0007082779991146524,
+        "mrr_at_100": 0.0008420281651420131,
+        "mrr_at_1000": 0.0010194100996475369,
+        "mrr_at_20": 0.000778427190426253,
+        "mrr_at_3": 0.000619743249225321,
+        "mrr_at_5": 0.000619743249225321,
+        "nauc_map_at_1000_diff1": 0.17541432452987074,
+        "nauc_map_at_1000_max": -0.4154388332336005,
+        "nauc_map_at_1000_std": -0.3819043407981619,
+        "nauc_map_at_100_diff1": 0.20679746840207688,
+        "nauc_map_at_100_max": -0.4833278286603358,
+        "nauc_map_at_100_std": -0.4458942676390952,
+        "nauc_map_at_10_diff1": 0.24647952852769084,
+        "nauc_map_at_10_max": -0.5373695050821257,
+        "nauc_map_at_10_std": -0.5236953500263536,
+        "nauc_map_at_1_diff1": 0.29581300303762115,
+        "nauc_map_at_1_max": -0.5373695050821257,
+        "nauc_map_at_1_std": -0.5373695050821257,
+        "nauc_map_at_20_diff1": 0.21533097655723032,
+        "nauc_map_at_20_max": -0.5291249767848446,
+        "nauc_map_at_20_std": -0.4922264006934063,
+        "nauc_map_at_3_diff1": 0.26762244617480385,
+        "nauc_map_at_3_max": -0.5320020257576935,
+        "nauc_map_at_3_std": -0.542736984406558,
+        "nauc_map_at_5_diff1": 0.26762244617480385,
+        "nauc_map_at_5_max": -0.5320020257576935,
+        "nauc_map_at_5_std": -0.542736984406558,
+        "nauc_mrr_at_1000_diff1": 0.17541443100790596,
+        "nauc_mrr_at_1000_max": -0.4154385500206541,
+        "nauc_mrr_at_1000_std": -0.38190393644049175,
+        "nauc_mrr_at_100_diff1": 0.20679746840207688,
+        "nauc_mrr_at_100_max": -0.4833278286603358,
+        "nauc_mrr_at_100_std": -0.4458942676390952,
+        "nauc_mrr_at_10_diff1": 0.24647952852769084,
+        "nauc_mrr_at_10_max": -0.5373695050821257,
+        "nauc_mrr_at_10_std": -0.5236953500263536,
+        "nauc_mrr_at_1_diff1": 0.29581300303762115,
+        "nauc_mrr_at_1_max": -0.5373695050821257,
+        "nauc_mrr_at_1_std": -0.5373695050821257,
+        "nauc_mrr_at_20_diff1": 0.21533097655723032,
+        "nauc_mrr_at_20_max": -0.5291249767848446,
+        "nauc_mrr_at_20_std": -0.4922264006934063,
+        "nauc_mrr_at_3_diff1": 0.26762244617480385,
+        "nauc_mrr_at_3_max": -0.5320020257576935,
+        "nauc_mrr_at_3_std": -0.542736984406558,
+        "nauc_mrr_at_5_diff1": 0.26762244617480385,
+        "nauc_mrr_at_5_max": -0.5320020257576935,
+        "nauc_mrr_at_5_std": -0.542736984406558,
+        "nauc_ndcg_at_1000_diff1": 0.03711794407808404,
+        "nauc_ndcg_at_1000_max": -0.10620944898582887,
+        "nauc_ndcg_at_1000_std": -0.07214854599247035,
+        "nauc_ndcg_at_100_diff1": 0.1478165352946149,
+        "nauc_ndcg_at_100_max": -0.3266890379270042,
+        "nauc_ndcg_at_100_std": -0.24237793463929755,
+        "nauc_ndcg_at_10_diff1": 0.22133616828561992,
+        "nauc_ndcg_at_10_max": -0.5398539007431512,
+        "nauc_ndcg_at_10_std": -0.5106250645273135,
+        "nauc_ndcg_at_1_diff1": 0.29581300303762115,
+        "nauc_ndcg_at_1_max": -0.5373695050821257,
+        "nauc_ndcg_at_1_std": -0.5373695050821257,
+        "nauc_ndcg_at_20_diff1": 0.15496100992740594,
+        "nauc_ndcg_at_20_max": -0.5151090921512047,
+        "nauc_ndcg_at_20_std": -0.42331797746940425,
+        "nauc_ndcg_at_3_diff1": 0.25634622342967694,
+        "nauc_ndcg_at_3_max": -0.5298550340279207,
+        "nauc_ndcg_at_3_std": -0.5448839761363309,
+        "nauc_ndcg_at_5_diff1": 0.25634622342967694,
+        "nauc_ndcg_at_5_max": -0.5298550340279207,
+        "nauc_ndcg_at_5_std": -0.5448839761363309,
+        "nauc_precision_at_1000_diff1": 0.020777159599148352,
+        "nauc_precision_at_1000_max": -0.06655316040754289,
+        "nauc_precision_at_1000_std": -0.035219149425472995,
+        "nauc_precision_at_100_diff1": 0.11476528495195117,
+        "nauc_precision_at_100_max": -0.1810698361522713,
+        "nauc_precision_at_100_std": -0.0631149349365322,
+        "nauc_precision_at_10_diff1": 0.17741266421378812,
+        "nauc_precision_at_10_max": -0.5448839761363309,
+        "nauc_precision_at_10_std": -0.48609773784945,
+        "nauc_precision_at_1_diff1": 0.29581300303762115,
+        "nauc_precision_at_1_max": -0.5373695050821257,
+        "nauc_precision_at_1_std": -0.5373695050821257,
+        "nauc_precision_at_20_diff1": 0.0803859336130361,
+        "nauc_precision_at_20_max": -0.4938580214848722,
+        "nauc_precision_at_20_std": -0.32580885579158325,
+        "nauc_precision_at_3_diff1": 0.23003503702438047,
+        "nauc_precision_at_3_max": -0.5248453866584504,
+        "nauc_precision_at_3_std": -0.549893623505801,
+        "nauc_precision_at_5_diff1": 0.2300350370243805,
+        "nauc_precision_at_5_max": -0.5248453866584505,
+        "nauc_precision_at_5_std": -0.5498936235058011,
+        "nauc_recall_at_1000_diff1": 0.020777159599148387,
+        "nauc_recall_at_1000_max": -0.06655316040754268,
+        "nauc_recall_at_1000_std": -0.03521914942547286,
+        "nauc_recall_at_100_diff1": 0.11476528495195135,
+        "nauc_recall_at_100_max": -0.18106983615227104,
+        "nauc_recall_at_100_std": -0.06311493493653193,
+        "nauc_recall_at_10_diff1": 0.17741266421378812,
+        "nauc_recall_at_10_max": -0.5448839761363309,
+        "nauc_recall_at_10_std": -0.48609773784945015,
+        "nauc_recall_at_1_diff1": 0.29581300303762115,
+        "nauc_recall_at_1_max": -0.5373695050821257,
+        "nauc_recall_at_1_std": -0.5373695050821257,
+        "nauc_recall_at_20_diff1": 0.08038593361303595,
+        "nauc_recall_at_20_max": -0.4938580214848721,
+        "nauc_recall_at_20_std": -0.3258088557915833,
+        "nauc_recall_at_3_diff1": 0.23003503702438058,
+        "nauc_recall_at_3_max": -0.5248453866584504,
+        "nauc_recall_at_3_std": -0.549893623505801,
+        "nauc_recall_at_5_diff1": 0.23003503702438058,
+        "nauc_recall_at_5_max": -0.5248453866584504,
+        "nauc_recall_at_5_std": -0.549893623505801,
+        "ndcg_at_1": 0.00053,
+        "ndcg_at_10": 0.00085,
+        "ndcg_at_100": 0.00164,
+        "ndcg_at_1000": 0.01024,
+        "ndcg_at_20": 0.00112,
+        "ndcg_at_3": 0.00066,
+        "ndcg_at_5": 0.00066,
+        "precision_at_1": 0.00053,
+        "precision_at_10": 0.00013,
+        "precision_at_100": 5e-05,
+        "precision_at_1000": 8e-05,
+        "precision_at_20": 0.00012,
+        "precision_at_3": 0.00027,
+        "precision_at_5": 0.00016,
+        "recall_at_1": 0.00053,
+        "recall_at_10": 0.00133,
+        "recall_at_100": 0.00531,
+        "recall_at_1000": 0.08234,
+        "recall_at_20": 0.00239,
+        "recall_at_3": 0.0008,
+        "recall_at_5": 0.0008
+      }
+    ]
+  },
+  "task_name": "AppsRetrieval"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/ArguAna.json ADDED Viewed

	@@ -0,0 +1,158 @@

+{
+  "dataset_revision": "c22ab2a51041ffd869aaddef7af8d8215647e41a",
+  "evaluation_time": 2.6541521549224854,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {
+    "test": [
+      {
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ],
+        "main_score": 0.1509,
+        "map_at_1": 0.0761,
+        "map_at_10": 0.12154,
+        "map_at_100": 0.12944,
+        "map_at_1000": 0.13039,
+        "map_at_20": 0.12583,
+        "map_at_3": 0.10218,
+        "map_at_5": 0.11381,
+        "mrr_at_1": 0.07681365576102418,
+        "mrr_at_10": 0.12187591727065411,
+        "mrr_at_100": 0.1297644188004288,
+        "mrr_at_1000": 0.13071256214837015,
+        "mrr_at_20": 0.12612116393992717,
+        "mrr_at_3": 0.10229966808914177,
+        "mrr_at_5": 0.11389284020862968,
+        "nauc_map_at_1000_diff1": 0.16146733687467354,
+        "nauc_map_at_1000_max": 0.06481036694891384,
+        "nauc_map_at_1000_std": 0.06943873238380074,
+        "nauc_map_at_100_diff1": 0.16139928093588565,
+        "nauc_map_at_100_max": 0.0644505695492588,
+        "nauc_map_at_100_std": 0.06974933502492409,
+        "nauc_map_at_10_diff1": 0.16658684660598214,
+        "nauc_map_at_10_max": 0.06224588862021987,
+        "nauc_map_at_10_std": 0.06880011677576231,
+        "nauc_map_at_1_diff1": 0.19579759176541026,
+        "nauc_map_at_1_max": 0.02603892226990134,
+        "nauc_map_at_1_std": 0.04620265141724082,
+        "nauc_map_at_20_diff1": 0.16382252282054222,
+        "nauc_map_at_20_max": 0.06529226434404913,
+        "nauc_map_at_20_std": 0.06849826441400649,
+        "nauc_map_at_3_diff1": 0.16402956096741358,
+        "nauc_map_at_3_max": 0.0419122332975646,
+        "nauc_map_at_3_std": 0.05925639235658917,
+        "nauc_map_at_5_diff1": 0.16894147524916653,
+        "nauc_map_at_5_max": 0.052414170749768195,
+        "nauc_map_at_5_std": 0.07005093386964208,
+        "nauc_mrr_at_1000_diff1": 0.15689213701351912,
+        "nauc_mrr_at_1000_max": 0.06318320049791439,
+        "nauc_mrr_at_1000_std": 0.06796437033671639,
+        "nauc_mrr_at_100_diff1": 0.1568336618890221,
+        "nauc_mrr_at_100_max": 0.06282844172684152,
+        "nauc_mrr_at_100_std": 0.06827651320612166,
+        "nauc_mrr_at_10_diff1": 0.1613861747091082,
+        "nauc_mrr_at_10_max": 0.06048932175951958,
+        "nauc_mrr_at_10_std": 0.06744723486463321,
+        "nauc_mrr_at_1_diff1": 0.1886592359414356,
+        "nauc_mrr_at_1_max": 0.025143721566481553,
+        "nauc_mrr_at_1_std": 0.04192879681303956,
+        "nauc_mrr_at_20_diff1": 0.15895555488261146,
+        "nauc_mrr_at_20_max": 0.06337596031238824,
+        "nauc_mrr_at_20_std": 0.06705300695703223,
+        "nauc_mrr_at_3_diff1": 0.15808017173425612,
+        "nauc_mrr_at_3_max": 0.03873273590791373,
+        "nauc_mrr_at_3_std": 0.05873440646581739,
+        "nauc_mrr_at_5_diff1": 0.1623674451736993,
+        "nauc_mrr_at_5_max": 0.048599887137470155,
+        "nauc_mrr_at_5_std": 0.06946191051556377,
+        "nauc_ndcg_at_1000_diff1": 0.14057148135021394,
+        "nauc_ndcg_at_1000_max": 0.09398561431514359,
+        "nauc_ndcg_at_1000_std": 0.06871748094502036,
+        "nauc_ndcg_at_100_diff1": 0.14160219742898073,
+        "nauc_ndcg_at_100_max": 0.08793842988004247,
+        "nauc_ndcg_at_100_std": 0.08107847041025427,
+        "nauc_ndcg_at_10_diff1": 0.16112940300466752,
+        "nauc_ndcg_at_10_max": 0.08282286934634887,
+        "nauc_ndcg_at_10_std": 0.07481333025577913,
+        "nauc_ndcg_at_1_diff1": 0.19579759176541026,
+        "nauc_ndcg_at_1_max": 0.02603892226990134,
+        "nauc_ndcg_at_1_std": 0.04620265141724082,
+        "nauc_ndcg_at_20_diff1": 0.15445394581844324,
+        "nauc_ndcg_at_20_max": 0.09290741055177616,
+        "nauc_ndcg_at_20_std": 0.0739310085946421,
+        "nauc_ndcg_at_3_diff1": 0.1574151504354397,
+        "nauc_ndcg_at_3_max": 0.04636430630581481,
+        "nauc_ndcg_at_3_std": 0.06191664189704533,
+        "nauc_ndcg_at_5_diff1": 0.1658753822856203,
+        "nauc_ndcg_at_5_max": 0.06313482448309465,
+        "nauc_ndcg_at_5_std": 0.07904072628627579,
+        "nauc_precision_at_1000_diff1": 0.0609588525314078,
+        "nauc_precision_at_1000_max": 0.1870041318251064,
+        "nauc_precision_at_1000_std": 0.019658161418599534,
+        "nauc_precision_at_100_diff1": 0.09473113411767209,
+        "nauc_precision_at_100_max": 0.1309396613797298,
+        "nauc_precision_at_100_std": 0.10623324275765494,
+        "nauc_precision_at_10_diff1": 0.15121181172955667,
+        "nauc_precision_at_10_max": 0.12477733598184097,
+        "nauc_precision_at_10_std": 0.08475912589528253,
+        "nauc_precision_at_1_diff1": 0.19579759176541026,
+        "nauc_precision_at_1_max": 0.02603892226990134,
+        "nauc_precision_at_1_std": 0.04620265141724082,
+        "nauc_precision_at_20_diff1": 0.1370251724378167,
+        "nauc_precision_at_20_max": 0.14912154538482067,
+        "nauc_precision_at_20_std": 0.08184312031151385,
+        "nauc_precision_at_3_diff1": 0.14253682467701162,
+        "nauc_precision_at_3_max": 0.05671718495423438,
+        "nauc_precision_at_3_std": 0.06788353997677292,
+        "nauc_precision_at_5_diff1": 0.16082986625463053,
+        "nauc_precision_at_5_max": 0.08573137277943063,
+        "nauc_precision_at_5_std": 0.09793524405071982,
+        "nauc_recall_at_1000_diff1": 0.06095885253140698,
+        "nauc_recall_at_1000_max": 0.1870041318251063,
+        "nauc_recall_at_1000_std": 0.019658161418598927,
+        "nauc_recall_at_100_diff1": 0.09473113411767223,
+        "nauc_recall_at_100_max": 0.1309396613797295,
+        "nauc_recall_at_100_std": 0.10623324275765476,
+        "nauc_recall_at_10_diff1": 0.1512118117295564,
+        "nauc_recall_at_10_max": 0.12477733598184074,
+        "nauc_recall_at_10_std": 0.08475912589528235,
+        "nauc_recall_at_1_diff1": 0.19579759176541026,
+        "nauc_recall_at_1_max": 0.02603892226990134,
+        "nauc_recall_at_1_std": 0.04620265141724082,
+        "nauc_recall_at_20_diff1": 0.13702517243781645,
+        "nauc_recall_at_20_max": 0.14912154538482042,
+        "nauc_recall_at_20_std": 0.08184312031151388,
+        "nauc_recall_at_3_diff1": 0.14253682467701162,
+        "nauc_recall_at_3_max": 0.056717184954234404,
+        "nauc_recall_at_3_std": 0.06788353997677302,
+        "nauc_recall_at_5_diff1": 0.16082986625463033,
+        "nauc_recall_at_5_max": 0.08573137277943055,
+        "nauc_recall_at_5_std": 0.0979352440507195,
+        "ndcg_at_1": 0.0761,
+        "ndcg_at_10": 0.1509,
+        "ndcg_at_100": 0.19506,
+        "ndcg_at_1000": 0.22612,
+        "ndcg_at_20": 0.16665,
+        "ndcg_at_3": 0.11065,
+        "ndcg_at_5": 0.13182,
+        "precision_at_1": 0.0761,
+        "precision_at_10": 0.02468,
+        "precision_at_100": 0.00467,
+        "precision_at_1000": 0.00072,
+        "precision_at_20": 0.01547,
+        "precision_at_3": 0.04505,
+        "precision_at_5": 0.03741,
+        "recall_at_1": 0.0761,
+        "recall_at_10": 0.2468,
+        "recall_at_100": 0.46728,
+        "recall_at_1000": 0.72475,
+        "recall_at_20": 0.30939,
+        "recall_at_3": 0.13514,
+        "recall_at_5": 0.18706
+      }
+    ]
+  },
+  "task_name": "ArguAna"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/AskUbuntuDupQuestions.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "dataset_revision": "2000358ca161889fa9c082cb41daa8dcfb161a54",
+  "evaluation_time": 0.4332466125488281,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {
+    "test": [
+      {
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ],
+        "main_score": 0.46429757499703045,
+        "map": 0.46429757499703045,
+        "mrr": 0.601056368992657,
+        "nAUC_map_diff1": 0.16940332025233937,
+        "nAUC_map_max": 0.15925499774951668,
+        "nAUC_map_std": 0.05294826509824163,
+        "nAUC_mrr_diff1": 0.19481488519394907,
+        "nAUC_mrr_max": 0.21250668851129054,
+        "nAUC_mrr_std": 0.022766508692728404
+      }
+    ]
+  },
+  "task_name": "AskUbuntuDupQuestions"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/BIOSSES.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "dataset_revision": "d3fb88f8f02e40887cd149695127462bbcf29b4a",
+  "evaluation_time": 0.0452265739440918,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {
+    "test": [
+      {
+        "cosine_pearson": 0.2956207137346794,
+        "cosine_spearman": 0.30161530624430144,
+        "euclidean_pearson": 0.2995531537590785,
+        "euclidean_spearman": 0.30161530624430144,
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ],
+        "main_score": 0.30161530624430144,
+        "manhattan_pearson": 0.33453615581396934,
+        "manhattan_spearman": 0.3532610613411196,
+        "pearson": 0.2956207137346794,
+        "spearman": 0.30161530624430144
+      }
+    ]
+  },
+  "task_name": "BIOSSES"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/Banking77Classification.json DELETED Viewed

@@ -1,73 +0,0 @@
-{
-  "dataset_revision": "0fd18e25b25c072e09e0d92ab615fda904d66300",
-  "evaluation_time": 6.451777696609497,
-  "kg_co2_emissions": null,
-  "mteb_version": "1.14.15",
-  "scores": {
-    "test": [
-      {
-        "accuracy": 0.4396103896103896,
-        "f1": 0.4142711532114576,
-        "f1_weighted": 0.4142711532114576,
-        "hf_subset": "default",
-        "languages": [
-          "eng-Latn"
-        ],
-        "main_score": 0.4396103896103896,
-        "scores_per_experiment": [
-          {
-            "accuracy": 0.4279220779220779,
-            "f1": 0.4030476288783657,
-            "f1_weighted": 0.4030476288783656
-          },
-          {
-            "accuracy": 0.4211038961038961,
-            "f1": 0.39776168133611584,
-            "f1_weighted": 0.39776168133611584
-          },
-          {
-            "accuracy": 0.45064935064935063,
-            "f1": 0.42872843564828145,
-            "f1_weighted": 0.42872843564828145
-          },
-          {
-            "accuracy": 0.4448051948051948,
-            "f1": 0.420756828398419,
-            "f1_weighted": 0.42075682839841905
-          },
-          {
-            "accuracy": 0.44675324675324674,
-            "f1": 0.42100682221185654,
-            "f1_weighted": 0.42100682221185654
-          },
-          {
-            "accuracy": 0.45324675324675323,
-            "f1": 0.4392342490231314,
-            "f1_weighted": 0.4392342490231314
-          },
-          {
-            "accuracy": 0.437012987012987,
-            "f1": 0.4056017558988273,
-            "f1_weighted": 0.40560175589882724
-          },
-          {
-            "accuracy": 0.42337662337662335,
-            "f1": 0.39123709562594644,
-            "f1_weighted": 0.39123709562594655
-          },
-          {
-            "accuracy": 0.44512987012987015,
-            "f1": 0.41578171494860966,
-            "f1_weighted": 0.41578171494860966
-          },
-          {
-            "accuracy": 0.4461038961038961,
-            "f1": 0.4195553201450221,
-            "f1_weighted": 0.419555320145022
-          }
-        ]
-      }
-    ]
-  },
-  "task_name": "Banking77Classification"
-}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/BiorxivClusteringS2S.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "dataset_revision": "258694dd0231531bc1fd9de6ceb52a0853c6d908",
+  "evaluation_time": 6.352599620819092,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {
+    "test": [
+      {
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ],
+        "main_score": 0.07745778878625219,
+        "v_measure": 0.07745778878625219,
+        "v_measure_std": 0.006515604585361752,
+        "v_measures": [
+          0.07151497621642194,
+          0.07152886858477273,
+          0.07533936305694591,
+          0.07390923787342664,
+          0.07147679207450276,
+          0.07213600223586297,
+          0.08611746483041241,
+          0.08170353591216682,
+          0.08028322075745065,
+          0.09056842632055917
+        ]
+      }
+    ]
+  },
+  "task_name": "BiorxivClusteringS2S"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/COIRCodeSearchNetRetrieval.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "dataset_revision": "4adc7bc41202b5c13543c9c886a25f340634dab3",
+  "evaluation_time": 0.001447916030883789,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {},
+  "task_name": "COIRCodeSearchNetRetrieval"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/CQADupstackProgrammersRetrieval.json DELETED Viewed

@@ -1,158 +0,0 @@
-{
-  "dataset_revision": "6184bc1440d2dbc7612be22b50686b8826d22b32",
-  "evaluation_time": 99.69791841506958,
-  "kg_co2_emissions": null,
-  "mteb_version": "1.14.15",
-  "scores": {
-    "test": [
-      {
-        "hf_subset": "default",
-        "languages": [
-          "eng-Latn"
-        ],
-        "main_score": 0.0501,
-        "map_at_1": 0.02467,
-        "map_at_10": 0.03898,
-        "map_at_100": 0.04261,
-        "map_at_1000": 0.04333,
-        "map_at_20": 0.04068,
-        "map_at_3": 0.03388,
-        "map_at_5": 0.03693,
-        "mrr_at_1": 0.030821917808219176,
-        "mrr_at_10": 0.04904462926723201,
-        "mrr_at_100": 0.05339942610218758,
-        "mrr_at_1000": 0.05413492750157237,
-        "mrr_at_20": 0.05126402659708249,
-        "mrr_at_3": 0.04280821917808219,
-        "mrr_at_5": 0.04634703196347032,
-        "nauc_map_at_1000_diff1": 0.03644747951501248,
-        "nauc_map_at_1000_max": 0.2240572170754659,
-        "nauc_map_at_1000_std": -0.17708810912472517,
-        "nauc_map_at_100_diff1": 0.03759221625144172,
-        "nauc_map_at_100_max": 0.22324901446317413,
-        "nauc_map_at_100_std": -0.17630470695891512,
-        "nauc_map_at_10_diff1": 0.03906418656483989,
-        "nauc_map_at_10_max": 0.22061594321968936,
-        "nauc_map_at_10_std": -0.17777470317814356,
-        "nauc_map_at_1_diff1": 0.1731091343679673,
-        "nauc_map_at_1_max": 0.33459947679728974,
-        "nauc_map_at_1_std": -0.23115450977179597,
-        "nauc_map_at_20_diff1": 0.03795725531499195,
-        "nauc_map_at_20_max": 0.22396003211648763,
-        "nauc_map_at_20_std": -0.17867373725662639,
-        "nauc_map_at_3_diff1": 0.06042780588964212,
-        "nauc_map_at_3_max": 0.2486807528974488,
-        "nauc_map_at_3_std": -0.18512855007450404,
-        "nauc_map_at_5_diff1": 0.04407217741234605,
-        "nauc_map_at_5_max": 0.22647048266105405,
-        "nauc_map_at_5_std": -0.18107585673560017,
-        "nauc_mrr_at_1000_diff1": 0.033601872249839834,
-        "nauc_mrr_at_1000_max": 0.2523936325136619,
-        "nauc_mrr_at_1000_std": -0.19078164353963076,
-        "nauc_mrr_at_100_diff1": 0.03435870935950355,
-        "nauc_mrr_at_100_max": 0.2523932973431928,
-        "nauc_mrr_at_100_std": -0.1900913512193067,
-        "nauc_mrr_at_10_diff1": 0.03361519179733555,
-        "nauc_mrr_at_10_max": 0.25392922716866984,
-        "nauc_mrr_at_10_std": -0.1935061134919541,
-        "nauc_mrr_at_1_diff1": 0.1772995319079407,
-        "nauc_mrr_at_1_max": 0.35182174117717013,
-        "nauc_mrr_at_1_std": -0.24426280067522707,
-        "nauc_mrr_at_20_diff1": 0.03479828151019169,
-        "nauc_mrr_at_20_max": 0.25624951214228564,
-        "nauc_mrr_at_20_std": -0.19212268093923462,
-        "nauc_mrr_at_3_diff1": 0.06173430027850725,
-        "nauc_mrr_at_3_max": 0.26889485727748363,
-        "nauc_mrr_at_3_std": -0.19153801111553947,
-        "nauc_mrr_at_5_diff1": 0.036743759763164886,
-        "nauc_mrr_at_5_max": 0.253857849052297,
-        "nauc_mrr_at_5_std": -0.19604549670316734,
-        "nauc_ndcg_at_1000_diff1": -0.010372586628261796,
-        "nauc_ndcg_at_1000_max": 0.20925878430027478,
-        "nauc_ndcg_at_1000_std": -0.1717044268161809,
-        "nauc_ndcg_at_100_diff1": 0.0023309149151885546,
-        "nauc_ndcg_at_100_max": 0.20125970115134734,
-        "nauc_ndcg_at_100_std": -0.15865628929382014,
-        "nauc_ndcg_at_10_diff1": 0.0026192804576363727,
-        "nauc_ndcg_at_10_max": 0.19884193622357532,
-        "nauc_ndcg_at_10_std": -0.16919003671988075,
-        "nauc_ndcg_at_1_diff1": 0.1772995319079407,
-        "nauc_ndcg_at_1_max": 0.35182174117717013,
-        "nauc_ndcg_at_1_std": -0.24426280067522707,
-        "nauc_ndcg_at_20_diff1": 0.0031543394811079034,
-        "nauc_ndcg_at_20_max": 0.20925361343315524,
-        "nauc_ndcg_at_20_std": -0.17106125631597793,
-        "nauc_ndcg_at_3_diff1": 0.03670154146101528,
-        "nauc_ndcg_at_3_max": 0.23212930749840155,
-        "nauc_ndcg_at_3_std": -0.1728371812831961,
-        "nauc_ndcg_at_5_diff1": 0.0107566708693031,
-        "nauc_ndcg_at_5_max": 0.20474332948099355,
-        "nauc_ndcg_at_5_std": -0.1734952739301359,
-        "nauc_precision_at_1000_diff1": -0.07195606207962846,
-        "nauc_precision_at_1000_max": 0.2542912736794115,
-        "nauc_precision_at_1000_std": -0.1881459402790264,
-        "nauc_precision_at_100_diff1": -0.04518222914182943,
-        "nauc_precision_at_100_max": 0.22138981394024387,
-        "nauc_precision_at_100_std": -0.13384472263037697,
-        "nauc_precision_at_10_diff1": -0.052513811685878764,
-        "nauc_precision_at_10_max": 0.18962064467698705,
-        "nauc_precision_at_10_std": -0.14827004787357115,
-        "nauc_precision_at_1_diff1": 0.1772995319079407,
-        "nauc_precision_at_1_max": 0.35182174117717013,
-        "nauc_precision_at_1_std": -0.24426280067522707,
-        "nauc_precision_at_20_diff1": -0.040789324913047875,
-        "nauc_precision_at_20_max": 0.22086458009752882,
-        "nauc_precision_at_20_std": -0.14430508663959002,
-        "nauc_precision_at_3_diff1": -0.013044619440245884,
-        "nauc_precision_at_3_max": 0.21285488271783465,
-        "nauc_precision_at_3_std": -0.1483164417030193,
-        "nauc_precision_at_5_diff1": -0.05113181393685194,
-        "nauc_precision_at_5_max": 0.1756649379589832,
-        "nauc_precision_at_5_std": -0.15632134056178232,
-        "nauc_recall_at_1000_diff1": -0.047075752528689695,
-        "nauc_recall_at_1000_max": 0.16414155669676642,
-        "nauc_recall_at_1000_std": -0.1513320281746568,
-        "nauc_recall_at_100_diff1": -0.023004658252697183,
-        "nauc_recall_at_100_max": 0.14861973646512244,
-        "nauc_recall_at_100_std": -0.12240747671934184,
-        "nauc_recall_at_10_diff1": -0.051375323084735164,
-        "nauc_recall_at_10_max": 0.1384336247044034,
-        "nauc_recall_at_10_std": -0.14737738059263306,
-        "nauc_recall_at_1_diff1": 0.1731091343679673,
-        "nauc_recall_at_1_max": 0.33459947679728974,
-        "nauc_recall_at_1_std": -0.23115450977179597,
-        "nauc_recall_at_20_diff1": -0.03578815918976938,
-        "nauc_recall_at_20_max": 0.16386688869593355,
-        "nauc_recall_at_20_std": -0.1528456365862212,
-        "nauc_recall_at_3_diff1": -0.021696811828998432,
-        "nauc_recall_at_3_max": 0.1864107664448688,
-        "nauc_recall_at_3_std": -0.14586036842324565,
-        "nauc_recall_at_5_diff1": -0.0538517948884412,
-        "nauc_recall_at_5_max": 0.1453135254521713,
-        "nauc_recall_at_5_std": -0.1531619473747777,
-        "ndcg_at_1": 0.03082,
-        "ndcg_at_10": 0.0501,
-        "ndcg_at_100": 0.07072,
-        "ndcg_at_1000": 0.09327,
-        "ndcg_at_20": 0.05662,
-        "ndcg_at_3": 0.03989,
-        "ndcg_at_5": 0.04484,
-        "precision_at_1": 0.03082,
-        "precision_at_10": 0.00993,
-        "precision_at_100": 0.00241,
-        "precision_at_1000": 0.00052,
-        "precision_at_20": 0.00685,
-        "precision_at_3": 0.02017,
-        "precision_at_5": 0.0153,
-        "recall_at_1": 0.02467,
-        "recall_at_10": 0.07499,
-        "recall_at_100": 0.16969,
-        "recall_at_1000": 0.33718,
-        "recall_at_20": 0.09901,
-        "recall_at_3": 0.04648,
-        "recall_at_5": 0.05869
-      }
-    ]
-  },
-  "task_name": "CQADupstackProgrammersRetrieval"
-}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/CodeFeedbackMT.json ADDED Viewed

	@@ -0,0 +1,158 @@

+{
+  "dataset_revision": "b0f12fa0c0dd67f59c95a5c33d02aeeb4c398c5f",
+  "evaluation_time": 86.56418371200562,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {
+    "test": [
+      {
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ],
+        "main_score": 0.15938,
+        "map_at_1": 0.12171,
+        "map_at_10": 0.14602,
+        "map_at_100": 0.14933,
+        "map_at_1000": 0.14984,
+        "map_at_20": 0.14772,
+        "map_at_3": 0.13875,
+        "map_at_5": 0.14277,
+        "mrr_at_1": 0.12171424267530316,
+        "mrr_at_10": 0.14602286924159336,
+        "mrr_at_100": 0.14933480468311353,
+        "mrr_at_1000": 0.14984386023850896,
+        "mrr_at_20": 0.14771560552079413,
+        "mrr_at_3": 0.1387487133137506,
+        "mrr_at_5": 0.14277070623383797,
+        "nauc_map_at_1000_diff1": 0.49472962132811066,
+        "nauc_map_at_1000_max": 0.1288096788830561,
+        "nauc_map_at_1000_std": 0.09402130699097373,
+        "nauc_map_at_100_diff1": 0.49521525169736585,
+        "nauc_map_at_100_max": 0.12889826157883777,
+        "nauc_map_at_100_std": 0.09387665760881421,
+        "nauc_map_at_10_diff1": 0.5007792220812604,
+        "nauc_map_at_10_max": 0.13096854092843976,
+        "nauc_map_at_10_std": 0.09297542921420311,
+        "nauc_map_at_1_diff1": 0.576490513877843,
+        "nauc_map_at_1_max": 0.155866059169816,
+        "nauc_map_at_1_std": 0.09440442396510458,
+        "nauc_map_at_20_diff1": 0.4980762076056278,
+        "nauc_map_at_20_max": 0.12991844827572516,
+        "nauc_map_at_20_std": 0.09346830652976015,
+        "nauc_map_at_3_diff1": 0.5220433951797554,
+        "nauc_map_at_3_max": 0.1391271534357672,
+        "nauc_map_at_3_std": 0.09400942293158544,
+        "nauc_map_at_5_diff1": 0.5107250849461592,
+        "nauc_map_at_5_max": 0.13530094789210456,
+        "nauc_map_at_5_std": 0.09342049003345741,
+        "nauc_mrr_at_1000_diff1": 0.49472962954673433,
+        "nauc_mrr_at_1000_max": 0.12880968585736355,
+        "nauc_mrr_at_1000_std": 0.0940213068870858,
+        "nauc_mrr_at_100_diff1": 0.49521525169736585,
+        "nauc_mrr_at_100_max": 0.12889826157883777,
+        "nauc_mrr_at_100_std": 0.09387665760881421,
+        "nauc_mrr_at_10_diff1": 0.5007792220812604,
+        "nauc_mrr_at_10_max": 0.13096854092843976,
+        "nauc_mrr_at_10_std": 0.09297542921420311,
+        "nauc_mrr_at_1_diff1": 0.576490513877843,
+        "nauc_mrr_at_1_max": 0.155866059169816,
+        "nauc_mrr_at_1_std": 0.09440442396510458,
+        "nauc_mrr_at_20_diff1": 0.4980762076056278,
+        "nauc_mrr_at_20_max": 0.12991844827572516,
+        "nauc_mrr_at_20_std": 0.09346830652976015,
+        "nauc_mrr_at_3_diff1": 0.5220433951797554,
+        "nauc_mrr_at_3_max": 0.1391271534357672,
+        "nauc_mrr_at_3_std": 0.09400942293158544,
+        "nauc_mrr_at_5_diff1": 0.5107250849461592,
+        "nauc_mrr_at_5_max": 0.13530094789210456,
+        "nauc_mrr_at_5_std": 0.09342049003345741,
+        "nauc_ndcg_at_1000_diff1": 0.42556848285754595,
+        "nauc_ndcg_at_1000_max": 0.1074330906576106,
+        "nauc_ndcg_at_1000_std": 0.09931415214354576,
+        "nauc_ndcg_at_100_diff1": 0.4389633172139021,
+        "nauc_ndcg_at_100_max": 0.10912358012253182,
+        "nauc_ndcg_at_100_std": 0.09591996585185938,
+        "nauc_ndcg_at_10_diff1": 0.4656271351032459,
+        "nauc_ndcg_at_10_max": 0.11811051132398084,
+        "nauc_ndcg_at_10_std": 0.09195643910816585,
+        "nauc_ndcg_at_1_diff1": 0.576490513877843,
+        "nauc_ndcg_at_1_max": 0.155866059169816,
+        "nauc_ndcg_at_1_std": 0.09440442396510458,
+        "nauc_ndcg_at_20_diff1": 0.45697106335736143,
+        "nauc_ndcg_at_20_max": 0.115023380566875,
+        "nauc_ndcg_at_20_std": 0.09369132873791501,
+        "nauc_ndcg_at_3_diff1": 0.5061759461194467,
+        "nauc_ndcg_at_3_max": 0.13434966943537516,
+        "nauc_ndcg_at_3_std": 0.09382725647213368,
+        "nauc_ndcg_at_5_diff1": 0.48712512841939637,
+        "nauc_ndcg_at_5_max": 0.12776188612692832,
+        "nauc_ndcg_at_5_std": 0.09280417774911971,
+        "nauc_precision_at_1000_diff1": 0.22171401911333807,
+        "nauc_precision_at_1000_max": 0.05180228755438657,
+        "nauc_precision_at_1000_std": 0.121478173960711,
+        "nauc_precision_at_100_diff1": 0.2930513840339096,
+        "nauc_precision_at_100_max": 0.058457996208423325,
+        "nauc_precision_at_100_std": 0.10329586184541412,
+        "nauc_precision_at_10_diff1": 0.37748222270492887,
+        "nauc_precision_at_10_max": 0.08516307019678841,
+        "nauc_precision_at_10_std": 0.08936548083478481,
+        "nauc_precision_at_1_diff1": 0.576490513877843,
+        "nauc_precision_at_1_max": 0.155866059169816,
+        "nauc_precision_at_1_std": 0.09440442396510458,
+        "nauc_precision_at_20_diff1": 0.35370118406718887,
+        "nauc_precision_at_20_max": 0.07720501737285508,
+        "nauc_precision_at_20_std": 0.09512670518828382,
+        "nauc_precision_at_3_diff1": 0.4648455680777127,
+        "nauc_precision_at_3_max": 0.12193379632419739,
+        "nauc_precision_at_3_std": 0.09333400762182767,
+        "nauc_precision_at_5_diff1": 0.42689240448557475,
+        "nauc_precision_at_5_max": 0.10840841308271118,
+        "nauc_precision_at_5_std": 0.09114478125877269,
+        "nauc_recall_at_1000_diff1": 0.22171401911333835,
+        "nauc_recall_at_1000_max": 0.05180228755438666,
+        "nauc_recall_at_1000_std": 0.12147817396071107,
+        "nauc_recall_at_100_diff1": 0.2930513840339097,
+        "nauc_recall_at_100_max": 0.05845799620842323,
+        "nauc_recall_at_100_std": 0.103295861845414,
+        "nauc_recall_at_10_diff1": 0.37748222270492904,
+        "nauc_recall_at_10_max": 0.08516307019678845,
+        "nauc_recall_at_10_std": 0.0893654808347849,
+        "nauc_recall_at_1_diff1": 0.576490513877843,
+        "nauc_recall_at_1_max": 0.155866059169816,
+        "nauc_recall_at_1_std": 0.09440442396510458,
+        "nauc_recall_at_20_diff1": 0.353701184067189,
+        "nauc_recall_at_20_max": 0.07720501737285505,
+        "nauc_recall_at_20_std": 0.09512670518828369,
+        "nauc_recall_at_3_diff1": 0.46484556807771255,
+        "nauc_recall_at_3_max": 0.12193379632419747,
+        "nauc_recall_at_3_std": 0.09333400762182767,
+        "nauc_recall_at_5_diff1": 0.4268924044855751,
+        "nauc_recall_at_5_max": 0.10840841308271101,
+        "nauc_recall_at_5_std": 0.09114478125877268,
+        "ndcg_at_1": 0.12171,
+        "ndcg_at_10": 0.15938,
+        "ndcg_at_100": 0.17773,
+        "ndcg_at_1000": 0.19422,
+        "ndcg_at_20": 0.16545,
+        "ndcg_at_3": 0.14423,
+        "ndcg_at_5": 0.1515,
+        "precision_at_1": 0.12171,
+        "precision_at_10": 0.02022,
+        "precision_at_100": 0.00293,
+        "precision_at_1000": 0.00043,
+        "precision_at_20": 0.0113,
+        "precision_at_3": 0.05335,
+        "precision_at_5": 0.03555,
+        "recall_at_1": 0.12171,
+        "recall_at_10": 0.20215,
+        "recall_at_100": 0.29329,
+        "recall_at_1000": 0.42977,
+        "recall_at_20": 0.22595,
+        "recall_at_3": 0.16005,
+        "recall_at_5": 0.17775
+      }
+    ]
+  },
+  "task_name": "CodeFeedbackMT"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/CodeSearchNetCCRetrieval.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "dataset_revision": "6e1effa2c03723c5fde48ee912b5ee08d4f211e8",
+  "evaluation_time": 0.0003421306610107422,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {},
+  "task_name": "CodeSearchNetCCRetrieval"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/CodeTransOceanContest.json ADDED Viewed

	@@ -0,0 +1,159 @@

+{
+  "dataset_revision": "20da4eb20a4b17300c0986ee148c90867a7f2a4d",
+  "evaluation_time": 0.8471865653991699,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {
+    "test": [
+      {
+        "hf_subset": "default",
+        "languages": [
+          "python-Code",
+          "c++-Code"
+        ],
+        "main_score": 0.09511,
+        "map_at_1": 0.06787,
+        "map_at_10": 0.08418,
+        "map_at_100": 0.08966,
+        "map_at_1000": 0.09203,
+        "map_at_20": 0.08539,
+        "map_at_3": 0.07994,
+        "map_at_5": 0.08107,
+        "mrr_at_1": 0.06787330316742081,
+        "mrr_at_10": 0.08418085182791066,
+        "mrr_at_100": 0.08966102694364947,
+        "mrr_at_1000": 0.09203289825977753,
+        "mrr_at_20": 0.08539287509875745,
+        "mrr_at_3": 0.0799396681749623,
+        "mrr_at_5": 0.08107088989441931,
+        "nauc_map_at_1000_diff1": 0.4002206493865976,
+        "nauc_map_at_1000_max": 0.05565781891778103,
+        "nauc_map_at_1000_std": -0.004874041232219024,
+        "nauc_map_at_100_diff1": 0.39935983795466523,
+        "nauc_map_at_100_max": 0.05300217863316174,
+        "nauc_map_at_100_std": -0.003035576698055301,
+        "nauc_map_at_10_diff1": 0.41045827644014476,
+        "nauc_map_at_10_max": 0.06092815179963274,
+        "nauc_map_at_10_std": -0.005952146225472054,
+        "nauc_map_at_1_diff1": 0.5123648006404755,
+        "nauc_map_at_1_max": 0.06018186942687983,
+        "nauc_map_at_1_std": 0.003062256335957277,
+        "nauc_map_at_20_diff1": 0.41026580715260286,
+        "nauc_map_at_20_max": 0.05961500732393842,
+        "nauc_map_at_20_std": -0.008812971012975637,
+        "nauc_map_at_3_diff1": 0.43328416235104994,
+        "nauc_map_at_3_max": 0.061612024091789064,
+        "nauc_map_at_3_std": 0.0009811686045742218,
+        "nauc_map_at_5_diff1": 0.42638117055302016,
+        "nauc_map_at_5_max": 0.062409134446330596,
+        "nauc_map_at_5_std": -3.141760809509476e-05,
+        "nauc_mrr_at_1000_diff1": 0.4002206493865976,
+        "nauc_mrr_at_1000_max": 0.05565781891778103,
+        "nauc_mrr_at_1000_std": -0.004874041232219024,
+        "nauc_mrr_at_100_diff1": 0.39935983795466523,
+        "nauc_mrr_at_100_max": 0.05300217863316174,
+        "nauc_mrr_at_100_std": -0.003035576698055301,
+        "nauc_mrr_at_10_diff1": 0.41045827644014476,
+        "nauc_mrr_at_10_max": 0.06092815179963274,
+        "nauc_mrr_at_10_std": -0.005952146225472054,
+        "nauc_mrr_at_1_diff1": 0.5123648006404755,
+        "nauc_mrr_at_1_max": 0.06018186942687983,
+        "nauc_mrr_at_1_std": 0.003062256335957277,
+        "nauc_mrr_at_20_diff1": 0.41026580715260286,
+        "nauc_mrr_at_20_max": 0.05961500732393842,
+        "nauc_mrr_at_20_std": -0.008812971012975637,
+        "nauc_mrr_at_3_diff1": 0.43328416235104994,
+        "nauc_mrr_at_3_max": 0.061612024091789064,
+        "nauc_mrr_at_3_std": 0.0009811686045742218,
+        "nauc_mrr_at_5_diff1": 0.42638117055302016,
+        "nauc_mrr_at_5_max": 0.062409134446330596,
+        "nauc_mrr_at_5_std": -3.141760809509476e-05,
+        "nauc_ndcg_at_1000_diff1": 0.3401849332107565,
+        "nauc_ndcg_at_1000_max": 0.05887650595047429,
+        "nauc_ndcg_at_1000_std": 0.004274830251501765,
+        "nauc_ndcg_at_100_diff1": 0.3017142674492828,
+        "nauc_ndcg_at_100_max": 0.01657746093566299,
+        "nauc_ndcg_at_100_std": 0.020445323924594527,
+        "nauc_ndcg_at_10_diff1": 0.3606925243087163,
+        "nauc_ndcg_at_10_max": 0.05993698215407892,
+        "nauc_ndcg_at_10_std": -0.012383471019315629,
+        "nauc_ndcg_at_1_diff1": 0.5123648006404755,
+        "nauc_ndcg_at_1_max": 0.06018186942687983,
+        "nauc_ndcg_at_1_std": 0.003062256335957277,
+        "nauc_ndcg_at_20_diff1": 0.3627658572653584,
+        "nauc_ndcg_at_20_max": 0.05503924863968874,
+        "nauc_ndcg_at_20_std": -0.022353744095367632,
+        "nauc_ndcg_at_3_diff1": 0.40774589816759704,
+        "nauc_ndcg_at_3_max": 0.06078295183380332,
+        "nauc_ndcg_at_3_std": 0.002631991326812176,
+        "nauc_ndcg_at_5_diff1": 0.39699453568762005,
+        "nauc_ndcg_at_5_max": 0.06208096521525048,
+        "nauc_ndcg_at_5_std": 0.0009741567889838872,
+        "nauc_precision_at_1000_diff1": -0.332421505946305,
+        "nauc_precision_at_1000_max": 1.0,
+        "nauc_precision_at_1000_std": 0.9564489112227755,
+        "nauc_precision_at_100_diff1": 0.12129385857557387,
+        "nauc_precision_at_100_max": -0.0634555570739123,
+        "nauc_precision_at_100_std": 0.08437119311025783,
+        "nauc_precision_at_10_diff1": 0.2477538993229102,
+        "nauc_precision_at_10_max": 0.058120653790512844,
+        "nauc_precision_at_10_std": -0.028666404671314694,
+        "nauc_precision_at_1_diff1": 0.5123648006404755,
+        "nauc_precision_at_1_max": 0.06018186942687983,
+        "nauc_precision_at_1_std": 0.003062256335957277,
+        "nauc_precision_at_20_diff1": 0.2655368456031618,
+        "nauc_precision_at_20_max": 0.04343249021784076,
+        "nauc_precision_at_20_std": -0.05672812486089926,
+        "nauc_precision_at_3_diff1": 0.3471986913496342,
+        "nauc_precision_at_3_max": 0.05854588807862574,
+        "nauc_precision_at_3_std": 0.007034303620076266,
+        "nauc_precision_at_5_diff1": 0.3279845328741994,
+        "nauc_precision_at_5_max": 0.06114433941272132,
+        "nauc_precision_at_5_std": 0.003670428141042012,
+        "nauc_recall_at_1000_diff1": -0.33242150594628805,
+        "nauc_recall_at_1000_max": 1.0,
+        "nauc_recall_at_1000_std": 0.9564489112227793,
+        "nauc_recall_at_100_diff1": 0.1212938585755736,
+        "nauc_recall_at_100_max": -0.06345555707391222,
+        "nauc_recall_at_100_std": 0.08437119311025772,
+        "nauc_recall_at_10_diff1": 0.24775389932291023,
+        "nauc_recall_at_10_max": 0.05812065379051293,
+        "nauc_recall_at_10_std": -0.02866640467131462,
+        "nauc_recall_at_1_diff1": 0.5123648006404755,
+        "nauc_recall_at_1_max": 0.06018186942687983,
+        "nauc_recall_at_1_std": 0.003062256335957277,
+        "nauc_recall_at_20_diff1": 0.2655368456031617,
+        "nauc_recall_at_20_max": 0.04343249021784082,
+        "nauc_recall_at_20_std": -0.05672812486089907,
+        "nauc_recall_at_3_diff1": 0.34719869134963405,
+        "nauc_recall_at_3_max": 0.058545888078625666,
+        "nauc_recall_at_3_std": 0.007034303620076171,
+        "nauc_recall_at_5_diff1": 0.3279845328741993,
+        "nauc_recall_at_5_max": 0.06114433941272142,
+        "nauc_recall_at_5_std": 0.0036704281410421266,
+        "ndcg_at_1": 0.06787,
+        "ndcg_at_10": 0.09511,
+        "ndcg_at_100": 0.13674,
+        "ndcg_at_1000": 0.21258,
+        "ndcg_at_20": 0.09964,
+        "ndcg_at_3": 0.0849,
+        "ndcg_at_5": 0.08684,
+        "precision_at_1": 0.06787,
+        "precision_at_10": 0.01312,
+        "precision_at_100": 0.00362,
+        "precision_at_1000": 0.00099,
+        "precision_at_20": 0.00747,
+        "precision_at_3": 0.03318,
+        "precision_at_5": 0.02081,
+        "recall_at_1": 0.06787,
+        "recall_at_10": 0.13122,
+        "recall_at_100": 0.36199,
+        "recall_at_1000": 0.98643,
+        "recall_at_20": 0.14932,
+        "recall_at_3": 0.09955,
+        "recall_at_5": 0.10407
+      }
+    ]
+  },
+  "task_name": "CodeTransOceanContest"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/CodeTransOceanDL.json ADDED Viewed

	@@ -0,0 +1,158 @@

+{
+  "dataset_revision": "281562cb8a1265ab5c0824bfa6ddcd9b0a15618f",
+  "evaluation_time": 0.3599967956542969,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {
+    "test": [
+      {
+        "hf_subset": "default",
+        "languages": [
+          "python-Code"
+        ],
+        "main_score": 0.27797,
+        "map_at_1": 0.06667,
+        "map_at_10": 0.16857,
+        "map_at_100": 0.18958,
+        "map_at_1000": 0.18973,
+        "map_at_20": 0.18736,
+        "map_at_3": 0.08704,
+        "map_at_5": 0.12065,
+        "mrr_at_1": 0.022222222222222223,
+        "mrr_at_10": 0.14406305114638449,
+        "mrr_at_100": 0.16441129312841948,
+        "mrr_at_1000": 0.1645598395211377,
+        "mrr_at_20": 0.16211789469464907,
+        "mrr_at_3": 0.057407407407407414,
+        "mrr_at_5": 0.09407407407407407,
+        "nauc_map_at_1000_diff1": 0.02784086538826234,
+        "nauc_map_at_1000_max": -0.287573307732991,
+        "nauc_map_at_1000_std": -0.034745406422382066,
+        "nauc_map_at_100_diff1": 0.027731715108106954,
+        "nauc_map_at_100_max": -0.28771336576146495,
+        "nauc_map_at_100_std": -0.03383814176825187,
+        "nauc_map_at_10_diff1": 0.03945986957137019,
+        "nauc_map_at_10_max": -0.25226936866501254,
+        "nauc_map_at_10_std": -0.03190349240293486,
+        "nauc_map_at_1_diff1": -0.0031079509882836883,
+        "nauc_map_at_1_max": -0.38109292549861384,
+        "nauc_map_at_1_std": 0.01117967981397013,
+        "nauc_map_at_20_diff1": 0.02588133080683189,
+        "nauc_map_at_20_max": -0.2867971359129569,
+        "nauc_map_at_20_std": -0.04076734332555616,
+        "nauc_map_at_3_diff1": -0.041422641218933104,
+        "nauc_map_at_3_max": -0.3935239742048571,
+        "nauc_map_at_3_std": -0.016444699737666612,
+        "nauc_map_at_5_diff1": 0.09205138060696524,
+        "nauc_map_at_5_max": -0.33277332112682373,
+        "nauc_map_at_5_std": -0.03392255678772473,
+        "nauc_mrr_at_1000_diff1": -0.28331752487610157,
+        "nauc_mrr_at_1000_max": -0.2111897323809926,
+        "nauc_mrr_at_1000_std": -0.16180758984470822,
+        "nauc_mrr_at_100_diff1": -0.2830509416012681,
+        "nauc_mrr_at_100_max": -0.21149355358382807,
+        "nauc_mrr_at_100_std": -0.16037144078976506,
+        "nauc_mrr_at_10_diff1": -0.24908906607383133,
+        "nauc_mrr_at_10_max": -0.16222471422585077,
+        "nauc_mrr_at_10_std": -0.13732552303818502,
+        "nauc_mrr_at_1_diff1": -0.5747249798765764,
+        "nauc_mrr_at_1_max": -0.4543198282801182,
+        "nauc_mrr_at_1_std": -0.35008049369466065,
+        "nauc_mrr_at_20_diff1": -0.2788790690994075,
+        "nauc_mrr_at_20_max": -0.21245333313324236,
+        "nauc_mrr_at_20_std": -0.16798426695097868,
+        "nauc_mrr_at_3_diff1": -0.2978755408478371,
+        "nauc_mrr_at_3_max": -0.3738950777316688,
+        "nauc_mrr_at_3_std": -0.16400629993717764,
+        "nauc_mrr_at_5_diff1": -0.2553974200779292,
+        "nauc_mrr_at_5_max": -0.2566333148288954,
+        "nauc_mrr_at_5_std": -0.1662715837606456,
+        "nauc_ndcg_at_1000_diff1": 0.018095130261789123,
+        "nauc_ndcg_at_1000_max": -0.25210611817093725,
+        "nauc_ndcg_at_1000_std": -0.045639669938462205,
+        "nauc_ndcg_at_100_diff1": 0.014294076823156266,
+        "nauc_ndcg_at_100_max": -0.2523941368276548,
+        "nauc_ndcg_at_100_std": -0.024740265353583573,
+        "nauc_ndcg_at_10_diff1": 0.028517877606712184,
+        "nauc_ndcg_at_10_max": -0.1379350447346928,
+        "nauc_ndcg_at_10_std": -0.04890416556969064,
+        "nauc_ndcg_at_1_diff1": -0.0031079509882836883,
+        "nauc_ndcg_at_1_max": -0.38109292549861384,
+        "nauc_ndcg_at_1_std": 0.01117967981397013,
+        "nauc_ndcg_at_20_diff1": -0.01798223055051044,
+        "nauc_ndcg_at_20_max": -0.21587479592623202,
+        "nauc_ndcg_at_20_std": -0.08674791082336787,
+        "nauc_ndcg_at_3_diff1": -0.055225744089572794,
+        "nauc_ndcg_at_3_max": -0.3980023359780902,
+        "nauc_ndcg_at_3_std": -0.026396552418542944,
+        "nauc_ndcg_at_5_diff1": 0.1484750076478242,
+        "nauc_ndcg_at_5_max": -0.3149749102906245,
+        "nauc_ndcg_at_5_std": -0.0507138930089742,
+        "nauc_precision_at_1000_diff1": 1.0,
+        "nauc_precision_at_1000_max": 1.0,
+        "nauc_precision_at_1000_std": 1.0,
+        "nauc_precision_at_100_diff1": -0.17098506069093805,
+        "nauc_precision_at_100_max": -0.22292250233425662,
+        "nauc_precision_at_100_std": 0.8978758169934575,
+        "nauc_precision_at_10_diff1": -0.017575447383332443,
+        "nauc_precision_at_10_max": 0.1212641892262422,
+        "nauc_precision_at_10_std": -0.08237519878626094,
+        "nauc_precision_at_1_diff1": -0.0031079509882836883,
+        "nauc_precision_at_1_max": -0.38109292549861384,
+        "nauc_precision_at_1_std": 0.01117967981397013,
+        "nauc_precision_at_20_diff1": -0.3151420415930391,
+        "nauc_precision_at_20_max": 0.12029359793394034,
+        "nauc_precision_at_20_std": -0.33358252911059727,
+        "nauc_precision_at_3_diff1": -0.08457460873068837,
+        "nauc_precision_at_3_max": -0.4075244582667053,
+        "nauc_precision_at_3_std": -0.04755669155513028,
+        "nauc_precision_at_5_diff1": 0.2490820314035684,
+        "nauc_precision_at_5_max": -0.2860130677842162,
+        "nauc_precision_at_5_std": -0.07902785749667773,
+        "nauc_recall_at_1000_diff1": NaN,
+        "nauc_recall_at_1000_max": NaN,
+        "nauc_recall_at_1000_std": NaN,
+        "nauc_recall_at_100_diff1": -0.1709850606909425,
+        "nauc_recall_at_100_max": -0.2229225023342648,
+        "nauc_recall_at_100_std": 0.8978758169934654,
+        "nauc_recall_at_10_diff1": -0.017575447383333363,
+        "nauc_recall_at_10_max": 0.12126418922624167,
+        "nauc_recall_at_10_std": -0.08237519878626104,
+        "nauc_recall_at_1_diff1": -0.0031079509882836883,
+        "nauc_recall_at_1_max": -0.38109292549861384,
+        "nauc_recall_at_1_std": 0.01117967981397013,
+        "nauc_recall_at_20_diff1": -0.3151420415930406,
+        "nauc_recall_at_20_max": 0.12029359793393885,
+        "nauc_recall_at_20_std": -0.33358252911059794,
+        "nauc_recall_at_3_diff1": -0.08457460873068838,
+        "nauc_recall_at_3_max": -0.4075244582667053,
+        "nauc_recall_at_3_std": -0.047556691555130225,
+        "nauc_recall_at_5_diff1": 0.24908203140356805,
+        "nauc_recall_at_5_max": -0.2860130677842164,
+        "nauc_recall_at_5_std": -0.07902785749667794,
+        "ndcg_at_1": 0.06667,
+        "ndcg_at_10": 0.27797,
+        "ndcg_at_100": 0.35629,
+        "ndcg_at_1000": 0.35936,
+        "ndcg_at_20": 0.33924,
+        "ndcg_at_3": 0.09722,
+        "ndcg_at_5": 0.15724,
+        "precision_at_1": 0.06667,
+        "precision_at_10": 0.06556,
+        "precision_at_100": 0.00978,
+        "precision_at_1000": 0.001,
+        "precision_at_20": 0.04417,
+        "precision_at_3": 0.04259,
+        "precision_at_5": 0.05444,
+        "recall_at_1": 0.06667,
+        "recall_at_10": 0.65556,
+        "recall_at_100": 0.97778,
+        "recall_at_1000": 1.0,
+        "recall_at_20": 0.88333,
+        "recall_at_3": 0.12778,
+        "recall_at_5": 0.27222
+      }
+    ]
+  },
+  "task_name": "CodeTransOceanDL"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/CosQA.json ADDED Viewed

	@@ -0,0 +1,159 @@

+{
+  "dataset_revision": "bc5efb7e9d437246ce393ed19d772e08e4a79535",
+  "evaluation_time": 20.75157332420349,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {
+    "test": [
+      {
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn",
+          "python-Code"
+        ],
+        "main_score": 0.00971,
+        "map_at_1": 0.004,
+        "map_at_10": 0.00722,
+        "map_at_100": 0.01022,
+        "map_at_1000": 0.01074,
+        "map_at_20": 0.00905,
+        "map_at_3": 0.005,
+        "map_at_5": 0.0065,
+        "mrr_at_1": 0.002,
+        "mrr_at_10": 0.004341269841269841,
+        "mrr_at_100": 0.007548299135165892,
+        "mrr_at_1000": 0.008049580536282804,
+        "mrr_at_20": 0.0062800911077226865,
+        "mrr_at_3": 0.003,
+        "mrr_at_5": 0.003,
+        "nauc_map_at_1000_diff1": -0.3294146795633615,
+        "nauc_map_at_1000_max": -0.24406048510714173,
+        "nauc_map_at_1000_std": -0.1753914769148377,
+        "nauc_map_at_100_diff1": -0.3401799456138604,
+        "nauc_map_at_100_max": -0.25047032754654014,
+        "nauc_map_at_100_std": -0.17739627719079182,
+        "nauc_map_at_10_diff1": -0.4378248126973643,
+        "nauc_map_at_10_max": -0.30162019359765524,
+        "nauc_map_at_10_std": -0.21082330602051547,
+        "nauc_map_at_1_diff1": -0.5747249798765764,
+        "nauc_map_at_1_max": -0.20056345586262406,
+        "nauc_map_at_1_std": -0.24282264555943125,
+        "nauc_map_at_20_diff1": -0.3606250545571463,
+        "nauc_map_at_20_max": -0.24877416436848493,
+        "nauc_map_at_20_std": -0.1867708065128874,
+        "nauc_map_at_3_diff1": -0.5596994902066005,
+        "nauc_map_at_3_max": -0.27539576066541455,
+        "nauc_map_at_3_std": -0.2772739468741615,
+        "nauc_map_at_5_diff1": -0.4422612536376958,
+        "nauc_map_at_5_max": -0.3124806505541681,
+        "nauc_map_at_5_std": -0.21923180120121363,
+        "nauc_mrr_at_1000_diff1": -0.301017062901502,
+        "nauc_mrr_at_1000_max": -0.30574852118541557,
+        "nauc_mrr_at_1000_std": -0.12721072093737698,
+        "nauc_mrr_at_100_diff1": -0.31271510738527736,
+        "nauc_mrr_at_100_max": -0.3177547103171918,
+        "nauc_mrr_at_100_std": -0.12828398711778335,
+        "nauc_mrr_at_10_diff1": -0.4292943091791753,
+        "nauc_mrr_at_10_max": -0.45746906437484464,
+        "nauc_mrr_at_10_std": -0.1527977322458919,
+        "nauc_mrr_at_1_diff1": -0.5747249798765764,
+        "nauc_mrr_at_1_max": -0.4995975315266972,
+        "nauc_mrr_at_1_std": -0.0705661389857795,
+        "nauc_mrr_at_20_diff1": -0.3407197064284155,
+        "nauc_mrr_at_20_max": -0.33440320216781877,
+        "nauc_mrr_at_20_std": -0.12890471230942302,
+        "nauc_mrr_at_3_diff1": -0.5496824970932833,
+        "nauc_mrr_at_3_max": -0.5246400143099902,
+        "nauc_mrr_at_3_std": -0.18540381003488063,
+        "nauc_mrr_at_5_diff1": -0.5496824970932833,
+        "nauc_mrr_at_5_max": -0.5246400143099902,
+        "nauc_mrr_at_5_std": -0.18540381003488063,
+        "nauc_ndcg_at_1000_diff1": -0.1726642525674944,
+        "nauc_ndcg_at_1000_max": -0.1519851274416735,
+        "nauc_ndcg_at_1000_std": -0.152970784901727,
+        "nauc_ndcg_at_100_diff1": -0.22648229459252223,
+        "nauc_ndcg_at_100_max": -0.20905633164487697,
+        "nauc_ndcg_at_100_std": -0.15127742985051915,
+        "nauc_ndcg_at_10_diff1": -0.3920183074503633,
+        "nauc_ndcg_at_10_max": -0.31340312237742524,
+        "nauc_ndcg_at_10_std": -0.18755048697604484,
+        "nauc_ndcg_at_1_diff1": -0.5747249798765764,
+        "nauc_ndcg_at_1_max": -0.20056345586262406,
+        "nauc_ndcg_at_1_std": -0.24282264555943125,
+        "nauc_ndcg_at_20_diff1": -0.24984338312909435,
+        "nauc_ndcg_at_20_max": -0.19884254695674725,
+        "nauc_ndcg_at_20_std": -0.13845214629934277,
+        "nauc_ndcg_at_3_diff1": -0.556708481180822,
+        "nauc_ndcg_at_3_max": -0.2902920538313011,
+        "nauc_ndcg_at_3_std": -0.28413190328326815,
+        "nauc_ndcg_at_5_diff1": -0.3900872909119857,
+        "nauc_ndcg_at_5_max": -0.338313007450872,
+        "nauc_ndcg_at_5_std": -0.19903625569609631,
+        "nauc_precision_at_1000_diff1": -0.11872279831421322,
+        "nauc_precision_at_1000_max": -0.10043235608226829,
+        "nauc_precision_at_1000_std": -0.1582434744663016,
+        "nauc_precision_at_100_diff1": -0.1709441093780988,
+        "nauc_precision_at_100_max": -0.18360728409607915,
+        "nauc_precision_at_100_std": -0.14930413784253577,
+        "nauc_precision_at_10_diff1": -0.3340488328414273,
+        "nauc_precision_at_10_max": -0.32435977700265317,
+        "nauc_precision_at_10_std": -0.155978892764511,
+        "nauc_precision_at_1_diff1": -0.5747249798765764,
+        "nauc_precision_at_1_max": -0.20056345586262406,
+        "nauc_precision_at_1_std": -0.24282264555943125,
+        "nauc_precision_at_20_diff1": -0.1562553357562748,
+        "nauc_precision_at_20_max": -0.1475961655730907,
+        "nauc_precision_at_20_std": -0.09494597165646255,
+        "nauc_precision_at_3_diff1": -0.5496824970932834,
+        "nauc_precision_at_3_max": -0.32528396386727493,
+        "nauc_precision_at_3_std": -0.3002414810839818,
+        "nauc_precision_at_5_diff1": -0.30024148108398174,
+        "nauc_precision_at_5_max": -0.3806904570253108,
+        "nauc_precision_at_5_std": -0.1629997316876844,
+        "nauc_recall_at_1000_diff1": -0.11872279831421319,
+        "nauc_recall_at_1000_max": -0.10043235608226833,
+        "nauc_recall_at_1000_std": -0.1582434744663014,
+        "nauc_recall_at_100_diff1": -0.17094410937809862,
+        "nauc_recall_at_100_max": -0.1836072840960791,
+        "nauc_recall_at_100_std": -0.14930413784253557,
+        "nauc_recall_at_10_diff1": -0.33404883284142745,
+        "nauc_recall_at_10_max": -0.3243597770026533,
+        "nauc_recall_at_10_std": -0.15597889276451127,
+        "nauc_recall_at_1_diff1": -0.5747249798765764,
+        "nauc_recall_at_1_max": -0.20056345586262406,
+        "nauc_recall_at_1_std": -0.24282264555943125,
+        "nauc_recall_at_20_diff1": -0.15625533575627482,
+        "nauc_recall_at_20_max": -0.14759616557309066,
+        "nauc_recall_at_20_std": -0.09494597165646257,
+        "nauc_recall_at_3_diff1": -0.5496824970932833,
+        "nauc_recall_at_3_max": -0.3252839638672748,
+        "nauc_recall_at_3_std": -0.3002414810839818,
+        "nauc_recall_at_5_diff1": -0.30024148108398174,
+        "nauc_recall_at_5_max": -0.38069045702531085,
+        "nauc_recall_at_5_std": -0.1629997316876844,
+        "ndcg_at_1": 0.004,
+        "ndcg_at_10": 0.00971,
+        "ndcg_at_100": 0.02508,
+        "ndcg_at_1000": 0.04516,
+        "ndcg_at_20": 0.01632,
+        "ndcg_at_3": 0.00526,
+        "ndcg_at_5": 0.00785,
+        "precision_at_1": 0.004,
+        "precision_at_10": 0.0018,
+        "precision_at_100": 0.00092,
+        "precision_at_1000": 0.00026,
+        "precision_at_20": 0.0022,
+        "precision_at_3": 0.002,
+        "precision_at_5": 0.0024,
+        "recall_at_1": 0.004,
+        "recall_at_10": 0.018,
+        "recall_at_100": 0.092,
+        "recall_at_1000": 0.264,
+        "recall_at_20": 0.044,
+        "recall_at_3": 0.006,
+        "recall_at_5": 0.012
+      }
+    ]
+  },
+  "task_name": "CosQA"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/STSBenchmark.json DELETED Viewed

@@ -1,26 +0,0 @@
-{
-  "dataset_revision": "b0fddb56ed78048fa8b90373c8a3cfc37b684831",
-  "evaluation_time": 0.12331175804138184,
-  "kg_co2_emissions": null,
-  "mteb_version": "1.14.15",
-  "scores": {
-    "test": [
-      {
-        "cosine_pearson": 0.34632056143460516,
-        "cosine_spearman": 0.42973159111999676,
-        "euclidean_pearson": 0.4043313982401531,
-        "euclidean_spearman": 0.42973159111999676,
-        "hf_subset": "default",
-        "languages": [
-          "eng-Latn"
-        ],
-        "main_score": 0.42973159111999676,
-        "manhattan_pearson": 0.511950240807258,
-        "manhattan_spearman": 0.5019330550880601,
-        "pearson": 0.34632056143460516,
-        "spearman": 0.42973159111999676
-      }
-    ]
-  },
-  "task_name": "STSBenchmark"
-}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/SprintDuplicateQuestions.json DELETED Viewed

@@ -1,58 +0,0 @@
-{
-  "dataset_revision": "d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46",
-  "evaluation_time": 1.9629368782043457,
-  "kg_co2_emissions": null,
-  "mteb_version": "1.14.15",
-  "scores": {
-    "test": [
-      {
-        "cosine_accuracy": 0.9926237623762376,
-        "cosine_accuracy_threshold": 0.9106360077857971,
-        "cosine_ap": 0.4700755863552174,
-        "cosine_f1": 0.4925187032418952,
-        "cosine_f1_threshold": 0.8986777067184448,
-        "cosine_precision": 0.6539735099337748,
-        "cosine_recall": 0.395,
-        "dot_accuracy": 0.9926237623762376,
-        "dot_accuracy_threshold": 0.9106361269950867,
-        "dot_ap": 0.47007548398718707,
-        "dot_f1": 0.4925187032418952,
-        "dot_f1_threshold": 0.8986777663230896,
-        "dot_precision": 0.6539735099337748,
-        "dot_recall": 0.395,
-        "euclidean_accuracy": 0.9926237623762376,
-        "euclidean_accuracy_threshold": 0.42276236414909363,
-        "euclidean_ap": 0.47007558217981027,
-        "euclidean_f1": 0.4925187032418952,
-        "euclidean_f1_threshold": 0.4501606225967407,
-        "euclidean_precision": 0.6539735099337748,
-        "euclidean_recall": 0.395,
-        "hf_subset": "default",
-        "languages": [
-          "eng-Latn"
-        ],
-        "main_score": 0.6386707007383838,
-        "manhattan_accuracy": 0.9939207920792079,
-        "manhattan_accuracy_threshold": 4.824772834777832,
-        "manhattan_ap": 0.6386707007383838,
-        "manhattan_f1": 0.6293103448275862,
-        "manhattan_f1_threshold": 5.194998741149902,
-        "manhattan_precision": 0.6822429906542056,
-        "manhattan_recall": 0.584,
-        "max_accuracy": 0.9939207920792079,
-        "max_ap": 0.6386707007383838,
-        "max_f1": 0.6293103448275862,
-        "max_precision": 0.6822429906542056,
-        "max_recall": 0.584,
-        "similarity_accuracy": 0.9926237623762376,
-        "similarity_accuracy_threshold": 0.9106360077857971,
-        "similarity_ap": 0.4700755863552174,
-        "similarity_f1": 0.4925187032418952,
-        "similarity_f1_threshold": 0.8986777067184448,
-        "similarity_precision": 0.6539735099337748,
-        "similarity_recall": 0.395
-      }
-    ]
-  },
-  "task_name": "SprintDuplicateQuestions"
-}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/StackExchangeClustering.json DELETED Viewed

@@ -1,47 +0,0 @@
-{
-  "dataset_revision": "6cbc1f7b2bc0622f2e39d2c77fa502909748c259",
-  "evaluation_time": 1075.5739603042603,
-  "kg_co2_emissions": null,
-  "mteb_version": "1.14.15",
-  "scores": {
-    "test": [
-      {
-        "hf_subset": "default",
-        "languages": [
-          "eng-Latn"
-        ],
-        "main_score": 0.2747977935355363,
-        "v_measure": 0.2747977935355363,
-        "v_measure_std": 0.04408138950391278,
-        "v_measures": [
-          0.2671568735697825,
-          0.35324106044655595,
-          0.2134334295678833,
-          0.26069561242914296,
-          0.2360037867112385,
-          0.18352010080864292,
-          0.21227539957559294,
-          0.22564157353303899,
-          0.31014309699664405,
-          0.2792317143409387,
-          0.30736400840236383,
-          0.33654065468328326,
-          0.3375811203083562,
-          0.23635769205347795,
-          0.2889733490218442,
-          0.2628972368553193,
-          0.2892573063858698,
-          0.3093369539018476,
-          0.2778955236652676,
-          0.29489160764728006,
-          0.3092126928451642,
-          0.22100223054084894,
-          0.23711645754707986,
-          0.3264131545037563,
-          0.2937622020471872
-        ]
-      }
-    ]
-  },
-  "task_name": "StackExchangeClustering"
-}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/StackOverflowQA.json ADDED Viewed

	@@ -0,0 +1,158 @@

+{
+  "dataset_revision": "db8f169f3894c14a00251061f957b2063eef2bd5",
+  "evaluation_time": 21.146663904190063,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {
+    "test": [
+      {
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ],
+        "main_score": 0.17615,
+        "map_at_1": 0.14142,
+        "map_at_10": 0.16367,
+        "map_at_100": 0.16807,
+        "map_at_1000": 0.16867,
+        "map_at_20": 0.16588,
+        "map_at_3": 0.1568,
+        "map_at_5": 0.16034,
+        "mrr_at_1": 0.14142427281845538,
+        "mrr_at_10": 0.1636685851204407,
+        "mrr_at_100": 0.16806598010525844,
+        "mrr_at_1000": 0.16867443260066448,
+        "mrr_at_20": 0.16587850269947257,
+        "mrr_at_3": 0.15680374456703444,
+        "mrr_at_5": 0.1603393513874958,
+        "nauc_map_at_1000_diff1": 0.5441830305562326,
+        "nauc_map_at_1000_max": 0.2166816774885428,
+        "nauc_map_at_1000_std": 0.14505555737829307,
+        "nauc_map_at_100_diff1": 0.5446431157527537,
+        "nauc_map_at_100_max": 0.21689938576550866,
+        "nauc_map_at_100_std": 0.14493387106545103,
+        "nauc_map_at_10_diff1": 0.5511736320008027,
+        "nauc_map_at_10_max": 0.21922402299128418,
+        "nauc_map_at_10_std": 0.14589505600247163,
+        "nauc_map_at_1_diff1": 0.6335084714813365,
+        "nauc_map_at_1_max": 0.2416674532732567,
+        "nauc_map_at_1_std": 0.15189301837631614,
+        "nauc_map_at_20_diff1": 0.5481004538160913,
+        "nauc_map_at_20_max": 0.21754392477744908,
+        "nauc_map_at_20_std": 0.14574143361267317,
+        "nauc_map_at_3_diff1": 0.569602881272386,
+        "nauc_map_at_3_max": 0.22657597605102178,
+        "nauc_map_at_3_std": 0.14362624083093203,
+        "nauc_map_at_5_diff1": 0.5601655545127238,
+        "nauc_map_at_5_max": 0.22021980923815318,
+        "nauc_map_at_5_std": 0.145190486252428,
+        "nauc_mrr_at_1000_diff1": 0.5441830305562326,
+        "nauc_mrr_at_1000_max": 0.2166816774885428,
+        "nauc_mrr_at_1000_std": 0.14505555737829307,
+        "nauc_mrr_at_100_diff1": 0.5446431157527537,
+        "nauc_mrr_at_100_max": 0.21689938576550866,
+        "nauc_mrr_at_100_std": 0.14493387106545103,
+        "nauc_mrr_at_10_diff1": 0.5511736320008027,
+        "nauc_mrr_at_10_max": 0.21922402299128418,
+        "nauc_mrr_at_10_std": 0.14589505600247163,
+        "nauc_mrr_at_1_diff1": 0.6335084714813365,
+        "nauc_mrr_at_1_max": 0.2416674532732567,
+        "nauc_mrr_at_1_std": 0.15189301837631614,
+        "nauc_mrr_at_20_diff1": 0.5481004538160913,
+        "nauc_mrr_at_20_max": 0.21754392477744908,
+        "nauc_mrr_at_20_std": 0.14574143361267317,
+        "nauc_mrr_at_3_diff1": 0.569602881272386,
+        "nauc_mrr_at_3_max": 0.22657597605102178,
+        "nauc_mrr_at_3_std": 0.14362624083093203,
+        "nauc_mrr_at_5_diff1": 0.5601655545127238,
+        "nauc_mrr_at_5_max": 0.22021980923815318,
+        "nauc_mrr_at_5_std": 0.145190486252428,
+        "nauc_ndcg_at_1000_diff1": 0.4728678699567455,
+        "nauc_ndcg_at_1000_max": 0.18937253079534216,
+        "nauc_ndcg_at_1000_std": 0.14596120873695492,
+        "nauc_ndcg_at_100_diff1": 0.4829489403420902,
+        "nauc_ndcg_at_100_max": 0.19711295138806267,
+        "nauc_ndcg_at_100_std": 0.14004483265553003,
+        "nauc_ndcg_at_10_diff1": 0.5147356366280121,
+        "nauc_ndcg_at_10_max": 0.20936478000130024,
+        "nauc_ndcg_at_10_std": 0.14480134602662714,
+        "nauc_ndcg_at_1_diff1": 0.6335084714813365,
+        "nauc_ndcg_at_1_max": 0.2416674532732567,
+        "nauc_ndcg_at_1_std": 0.15189301837631614,
+        "nauc_ndcg_at_20_diff1": 0.5045372953308567,
+        "nauc_ndcg_at_20_max": 0.20390468798029948,
+        "nauc_ndcg_at_20_std": 0.14429100965430774,
+        "nauc_ndcg_at_3_diff1": 0.5501813298382772,
+        "nauc_ndcg_at_3_max": 0.22229855178363508,
+        "nauc_ndcg_at_3_std": 0.1399986570615583,
+        "nauc_ndcg_at_5_diff1": 0.5343279242377332,
+        "nauc_ndcg_at_5_max": 0.21164562906788129,
+        "nauc_ndcg_at_5_std": 0.14278785553527687,
+        "nauc_precision_at_1000_diff1": 0.2504046219335285,
+        "nauc_precision_at_1000_max": 0.08591924265428995,
+        "nauc_precision_at_1000_std": 0.1677320203837767,
+        "nauc_precision_at_100_diff1": 0.31425670977915415,
+        "nauc_precision_at_100_max": 0.1387542114851391,
+        "nauc_precision_at_100_std": 0.1261904558936239,
+        "nauc_precision_at_10_diff1": 0.41968706662348626,
+        "nauc_precision_at_10_max": 0.18390157987927358,
+        "nauc_precision_at_10_std": 0.14312672622707642,
+        "nauc_precision_at_1_diff1": 0.6335084714813365,
+        "nauc_precision_at_1_max": 0.2416674532732567,
+        "nauc_precision_at_1_std": 0.15189301837631614,
+        "nauc_precision_at_20_diff1": 0.39118835707188254,
+        "nauc_precision_at_20_max": 0.16759815130477784,
+        "nauc_precision_at_20_std": 0.14154312425469426,
+        "nauc_precision_at_3_diff1": 0.4986851913309839,
+        "nauc_precision_at_3_max": 0.2110426423927967,
+        "nauc_precision_at_3_std": 0.13007101364000376,
+        "nauc_precision_at_5_diff1": 0.4672079991177685,
+        "nauc_precision_at_5_max": 0.18897950891809692,
+        "nauc_precision_at_5_std": 0.13674491342908243,
+        "nauc_recall_at_1000_diff1": 0.25040462193352936,
+        "nauc_recall_at_1000_max": 0.08591924265429102,
+        "nauc_recall_at_1000_std": 0.1677320203837774,
+        "nauc_recall_at_100_diff1": 0.3142567097791538,
+        "nauc_recall_at_100_max": 0.1387542114851391,
+        "nauc_recall_at_100_std": 0.12619045589362404,
+        "nauc_recall_at_10_diff1": 0.41968706662348615,
+        "nauc_recall_at_10_max": 0.18390157987927366,
+        "nauc_recall_at_10_std": 0.1431267262270766,
+        "nauc_recall_at_1_diff1": 0.6335084714813365,
+        "nauc_recall_at_1_max": 0.2416674532732567,
+        "nauc_recall_at_1_std": 0.15189301837631614,
+        "nauc_recall_at_20_diff1": 0.3911883570718826,
+        "nauc_recall_at_20_max": 0.16759815130477776,
+        "nauc_recall_at_20_std": 0.1415431242546944,
+        "nauc_recall_at_3_diff1": 0.49868519133098393,
+        "nauc_recall_at_3_max": 0.21104264239279674,
+        "nauc_recall_at_3_std": 0.13007101364000365,
+        "nauc_recall_at_5_diff1": 0.4672079991177685,
+        "nauc_recall_at_5_max": 0.18897950891809673,
+        "nauc_recall_at_5_std": 0.13674491342908224,
+        "ndcg_at_1": 0.14142,
+        "ndcg_at_10": 0.17615,
+        "ndcg_at_100": 0.20104,
+        "ndcg_at_1000": 0.22165,
+        "ndcg_at_20": 0.18433,
+        "ndcg_at_3": 0.16187,
+        "ndcg_at_5": 0.16825,
+        "precision_at_1": 0.14142,
+        "precision_at_10": 0.02161,
+        "precision_at_100": 0.00341,
+        "precision_at_1000": 0.00051,
+        "precision_at_20": 0.01244,
+        "precision_at_3": 0.05884,
+        "precision_at_5": 0.03842,
+        "recall_at_1": 0.14142,
+        "recall_at_10": 0.21615,
+        "recall_at_100": 0.34052,
+        "recall_at_1000": 0.51254,
+        "recall_at_20": 0.24875,
+        "recall_at_3": 0.17653,
+        "recall_at_5": 0.19208
+      }
+    ]
+  },
+  "task_name": "StackOverflowQA"
+}

mteb_results/gte-Qwen2-7B-instruct-M2V-Distilled/distilled/SyntheticText2SQL.json ADDED Viewed

	@@ -0,0 +1,159 @@

+{
+  "dataset_revision": "686b87296c3a0191b5d9415a00526c62db9fce09",
+  "evaluation_time": 92.1711049079895,
+  "kg_co2_emissions": null,
+  "mteb_version": "1.14.15",
+  "scores": {
+    "test": [
+      {
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn",
+          "sql-Code"
+        ],
+        "main_score": 0.00489,
+        "map_at_1": 0.00034,
+        "map_at_10": 0.00331,
+        "map_at_100": 0.00393,
+        "map_at_1000": 0.00414,
+        "map_at_20": 0.00359,
+        "map_at_3": 0.00251,
+        "map_at_5": 0.00291,
+        "mrr_at_1": 0.002221842420099128,
+        "mrr_at_10": 0.004530496754048283,
+        "mrr_at_100": 0.005170980682014603,
+        "mrr_at_1000": 0.005371938321099836,
+        "mrr_at_20": 0.004822607995513001,
+        "mrr_at_3": 0.0038454964963254143,
+        "mrr_at_5": 0.004195863954879508,
+        "nauc_map_at_1000_diff1": -0.27416046541710665,
+        "nauc_map_at_1000_max": -0.12307636241646212,
+        "nauc_map_at_1000_std": -0.3169779465421886,
+        "nauc_map_at_100_diff1": -0.2867723753018623,
+        "nauc_map_at_100_max": -0.11793114543437405,
+        "nauc_map_at_100_std": -0.32768340793729833,
+        "nauc_map_at_10_diff1": -0.3071810467939698,
+        "nauc_map_at_10_max": -0.09394849271438202,
+        "nauc_map_at_10_std": -0.3443159235101931,
+        "nauc_map_at_1_diff1": -0.045574562309770715,
+        "nauc_map_at_1_max": -0.415009003625047,
+        "nauc_map_at_1_std": -0.28503182744193584,
+        "nauc_map_at_20_diff1": -0.30073635348293454,
+        "nauc_map_at_20_max": -0.1035440934145476,
+        "nauc_map_at_20_std": -0.33728144942994526,
+        "nauc_map_at_3_diff1": -0.36276475560891563,
+        "nauc_map_at_3_max": -0.09000122816382457,
+        "nauc_map_at_3_std": -0.35808488719288767,
+        "nauc_map_at_5_diff1": -0.34649671639377566,
+        "nauc_map_at_5_max": -0.07741484623960085,
+        "nauc_map_at_5_std": -0.3454332041446047,
+        "nauc_mrr_at_1000_diff1": -0.32007654216936365,
+        "nauc_mrr_at_1000_max": -0.05306747639186935,
+        "nauc_mrr_at_1000_std": -0.33505538550557523,
+        "nauc_mrr_at_100_diff1": -0.33152877543566905,
+        "nauc_mrr_at_100_max": -0.04652715811851764,
+        "nauc_mrr_at_100_std": -0.3439648778335655,
+        "nauc_mrr_at_10_diff1": -0.35260191778612204,
+        "nauc_mrr_at_10_max": -0.018284442733176375,
+        "nauc_mrr_at_10_std": -0.3583806093519501,
+        "nauc_mrr_at_1_diff1": -0.49322913632443244,
+        "nauc_mrr_at_1_max": 0.18386885076318166,
+        "nauc_mrr_at_1_std": -0.36881544615998557,
+        "nauc_mrr_at_20_diff1": -0.34523116750414906,
+        "nauc_mrr_at_20_max": -0.030423846920737567,
+        "nauc_mrr_at_20_std": -0.3523413443042862,
+        "nauc_mrr_at_3_diff1": -0.3986937025555519,
+        "nauc_mrr_at_3_max": 0.00596597350896994,
+        "nauc_mrr_at_3_std": -0.37595027480484544,
+        "nauc_mrr_at_5_diff1": -0.37058906995780483,
+        "nauc_mrr_at_5_max": 0.0023804395413750843,
+        "nauc_mrr_at_5_std": -0.3649770343981212,
+        "nauc_ndcg_at_1000_diff1": -0.12191989446547287,
+        "nauc_ndcg_at_1000_max": -0.18069129976379253,
+        "nauc_ndcg_at_1000_std": -0.21737660540578904,
+        "nauc_ndcg_at_100_diff1": -0.21534614581420813,
+        "nauc_ndcg_at_100_max": -0.16549108196966383,
+        "nauc_ndcg_at_100_std": -0.2967519876094673,
+        "nauc_ndcg_at_10_diff1": -0.2766087694329189,
+        "nauc_ndcg_at_10_max": -0.10425653229278331,
+        "nauc_ndcg_at_10_std": -0.34614483144111813,
+        "nauc_ndcg_at_1_diff1": -0.045574562309770715,
+        "nauc_ndcg_at_1_max": -0.415009003625047,
+        "nauc_ndcg_at_1_std": -0.28503182744193584,
+        "nauc_ndcg_at_20_diff1": -0.26495356113264346,
+        "nauc_ndcg_at_20_max": -0.12302281530014428,
+        "nauc_ndcg_at_20_std": -0.33040207062914734,
+        "nauc_ndcg_at_3_diff1": -0.35550615579366496,
+        "nauc_ndcg_at_3_max": -0.09065063772541752,
+        "nauc_ndcg_at_3_std": -0.3666750120549603,
+        "nauc_ndcg_at_5_diff1": -0.3367147607777083,
+        "nauc_ndcg_at_5_max": -0.07594752160761341,
+        "nauc_ndcg_at_5_std": -0.349392770228869,
+        "nauc_precision_at_1000_diff1": -0.05796266193135331,
+        "nauc_precision_at_1000_max": -0.19596247289607774,
+        "nauc_precision_at_1000_std": -0.1726159439969235,
+        "nauc_precision_at_100_diff1": -0.1623283482675489,
+        "nauc_precision_at_100_max": -0.20045006262758877,
+        "nauc_precision_at_100_std": -0.2711122975734177,
+        "nauc_precision_at_10_diff1": -0.23692269420435214,
+        "nauc_precision_at_10_max": -0.11995105342526458,
+        "nauc_precision_at_10_std": -0.34723986176409266,
+        "nauc_precision_at_1_diff1": -0.045574562309770715,
+        "nauc_precision_at_1_max": -0.415009003625047,
+        "nauc_precision_at_1_std": -0.28503182744193584,
+        "nauc_precision_at_20_diff1": -0.22656401175983737,
+        "nauc_precision_at_20_max": -0.14607723359403244,
+        "nauc_precision_at_20_std": -0.3206744368813374,
+        "nauc_precision_at_3_diff1": -0.3421859065827053,
+        "nauc_precision_at_3_max": -0.09374847026615557,
+        "nauc_precision_at_3_std": -0.37955719702776525,
+        "nauc_precision_at_5_diff1": -0.3217864826892486,
+        "nauc_precision_at_5_max": -0.07574764495371311,
+        "nauc_precision_at_5_std": -0.35431940648491467,
+        "nauc_recall_at_1000_diff1": -0.057962661931353035,
+        "nauc_recall_at_1000_max": -0.19596247289607757,
+        "nauc_recall_at_1000_std": -0.17261594399692332,
+        "nauc_recall_at_100_diff1": -0.16232834826754888,
+        "nauc_recall_at_100_max": -0.20045006262758874,
+        "nauc_recall_at_100_std": -0.2711122975734177,
+        "nauc_recall_at_10_diff1": -0.2369226942043523,
+        "nauc_recall_at_10_max": -0.11995105342526483,
+        "nauc_recall_at_10_std": -0.34723986176409277,
+        "nauc_recall_at_1_diff1": -0.045574562309770715,
+        "nauc_recall_at_1_max": -0.415009003625047,
+        "nauc_recall_at_1_std": -0.28503182744193584,
+        "nauc_recall_at_20_diff1": -0.22656401175983737,
+        "nauc_recall_at_20_max": -0.14607723359403255,
+        "nauc_recall_at_20_std": -0.3206744368813374,
+        "nauc_recall_at_3_diff1": -0.3421859065827052,
+        "nauc_recall_at_3_max": -0.09374847026615546,
+        "nauc_recall_at_3_std": -0.37955719702776536,
+        "nauc_recall_at_5_diff1": -0.3217864826892487,
+        "nauc_recall_at_5_max": -0.07574764495371322,
+        "nauc_recall_at_5_std": -0.3543194064849148,
+        "ndcg_at_1": 0.00034,
+        "ndcg_at_10": 0.00489,
+        "ndcg_at_100": 0.00885,
+        "ndcg_at_1000": 0.01629,
+        "ndcg_at_20": 0.00592,
+        "ndcg_at_3": 0.00322,
+        "ndcg_at_5": 0.00394,
+        "precision_at_1": 0.00034,
+        "precision_at_10": 0.00099,
+        "precision_at_100": 0.00031,
+        "precision_at_1000": 9e-05,
+        "precision_at_20": 0.0007,
+        "precision_at_3": 0.00177,
+        "precision_at_5": 0.0014,
+        "recall_at_1": 0.00034,
+        "recall_at_10": 0.00991,
+        "recall_at_100": 0.03076,
+        "recall_at_1000": 0.09383,
+        "recall_at_20": 0.01401,
+        "recall_at_3": 0.0053,
+        "recall_at_5": 0.00701
+      }
+    ]
+  },
+  "task_name": "SyntheticText2SQL"
+}

mteb_results/mteb_parsed_results.json DELETED Viewed

@@ -1,3 +0,0 @@
-{
-  "gte-Qwen2-7B-instruct-M2V-Distilled": "ResultSet(datasets={'Banking77Classification': DatasetResult(scores=[0.4396103896103896], time=6.451777696609497), 'StackExchangeClustering': DatasetResult(scores=[0.2747977935355363], time=1075.5739603042603), 'STSBenchmark': DatasetResult(scores=[0.42973159111999676], time=0.12331175804138184), 'CQADupstackProgrammersRetrieval': DatasetResult(scores=[0.0501], time=99.69791841506958), 'SprintDuplicateQuestions': DatasetResult(scores=[0.6386707007383838], time=1.9629368782043457)})"
-}

mteb_results/mteb_raw_results.json DELETED Viewed

@@ -1,7 +0,0 @@
-[
-  "dataset_revision='0fd18e25b25c072e09e0d92ab615fda904d66300' task_name='Banking77Classification' mteb_version='1.14.15' scores={'test': [{'accuracy': 0.4396103896103896, 'f1': 0.4142711532114576, 'f1_weighted': 0.4142711532114576, 'scores_per_experiment': [{'accuracy': 0.4279220779220779, 'f1': 0.4030476288783657, 'f1_weighted': 0.4030476288783656}, {'accuracy': 0.4211038961038961, 'f1': 0.39776168133611584, 'f1_weighted': 0.39776168133611584}, {'accuracy': 0.45064935064935063, 'f1': 0.42872843564828145, 'f1_weighted': 0.42872843564828145}, {'accuracy': 0.4448051948051948, 'f1': 0.420756828398419, 'f1_weighted': 0.42075682839841905}, {'accuracy': 0.44675324675324674, 'f1': 0.42100682221185654, 'f1_weighted': 0.42100682221185654}, {'accuracy': 0.45324675324675323, 'f1': 0.4392342490231314, 'f1_weighted': 0.4392342490231314}, {'accuracy': 0.437012987012987, 'f1': 0.4056017558988273, 'f1_weighted': 0.40560175589882724}, {'accuracy': 0.42337662337662335, 'f1': 0.39123709562594644, 'f1_weighted': 0.39123709562594655}, {'accuracy': 0.44512987012987015, 'f1': 0.41578171494860966, 'f1_weighted': 0.41578171494860966}, {'accuracy': 0.4461038961038961, 'f1': 0.4195553201450221, 'f1_weighted': 0.419555320145022}], 'main_score': 0.4396103896103896, 'hf_subset': 'default', 'languages': ['eng-Latn']}]} evaluation_time=6.451777696609497 kg_co2_emissions=None",
-  "dataset_revision='6cbc1f7b2bc0622f2e39d2c77fa502909748c259' task_name='StackExchangeClustering' mteb_version='1.14.15' scores={'test': [{'v_measure': 0.2747977935355363, 'v_measure_std': 0.04408138950391278, 'v_measures': [0.2671568735697825, 0.35324106044655595, 0.2134334295678833, 0.26069561242914296, 0.2360037867112385, 0.18352010080864292, 0.21227539957559294, 0.22564157353303899, 0.31014309699664405, 0.2792317143409387, 0.30736400840236383, 0.33654065468328326, 0.3375811203083562, 0.23635769205347795, 0.2889733490218442, 0.2628972368553193, 0.2892573063858698, 0.3093369539018476, 0.2778955236652676, 0.29489160764728006, 0.3092126928451642, 0.22100223054084894, 0.23711645754707986, 0.3264131545037563, 0.2937622020471872], 'main_score': 0.2747977935355363, 'hf_subset': 'default', 'languages': ['eng-Latn']}]} evaluation_time=1075.5739603042603 kg_co2_emissions=None",
-  "dataset_revision='b0fddb56ed78048fa8b90373c8a3cfc37b684831' task_name='STSBenchmark' mteb_version='1.14.15' scores={'test': [{'pearson': 0.34632056143460516, 'spearman': 0.42973159111999676, 'cosine_pearson': 0.34632056143460516, 'cosine_spearman': 0.42973159111999676, 'manhattan_pearson': 0.511950240807258, 'manhattan_spearman': 0.5019330550880601, 'euclidean_pearson': 0.4043313982401531, 'euclidean_spearman': 0.42973159111999676, 'main_score': 0.42973159111999676, 'hf_subset': 'default', 'languages': ['eng-Latn']}]} evaluation_time=0.12331175804138184 kg_co2_emissions=None",
-  "dataset_revision='6184bc1440d2dbc7612be22b50686b8826d22b32' task_name='CQADupstackProgrammersRetrieval' mteb_version='1.14.15' scores={'test': [{'ndcg_at_1': 0.03082, 'ndcg_at_3': 0.03989, 'ndcg_at_5': 0.04484, 'ndcg_at_10': 0.0501, 'ndcg_at_20': 0.05662, 'ndcg_at_100': 0.07072, 'ndcg_at_1000': 0.09327, 'map_at_1': 0.02467, 'map_at_3': 0.03388, 'map_at_5': 0.03693, 'map_at_10': 0.03898, 'map_at_20': 0.04068, 'map_at_100': 0.04261, 'map_at_1000': 0.04333, 'recall_at_1': 0.02467, 'recall_at_3': 0.04648, 'recall_at_5': 0.05869, 'recall_at_10': 0.07499, 'recall_at_20': 0.09901, 'recall_at_100': 0.16969, 'recall_at_1000': 0.33718, 'precision_at_1': 0.03082, 'precision_at_3': 0.02017, 'precision_at_5': 0.0153, 'precision_at_10': 0.00993, 'precision_at_20': 0.00685, 'precision_at_100': 0.00241, 'precision_at_1000': 0.00052, 'mrr_at_1': 0.030821917808219176, 'mrr_at_3': 0.04280821917808219, 'mrr_at_5': 0.04634703196347032, 'mrr_at_10': 0.04904462926723201, 'mrr_at_20': 0.05126402659708249, 'mrr_at_100': 0.05339942610218758, 'mrr_at_1000': 0.05413492750157237, 'nauc_ndcg_at_1_max': 0.35182174117717013, 'nauc_ndcg_at_1_std': -0.24426280067522707, 'nauc_ndcg_at_1_diff1': 0.1772995319079407, 'nauc_ndcg_at_3_max': 0.23212930749840155, 'nauc_ndcg_at_3_std': -0.1728371812831961, 'nauc_ndcg_at_3_diff1': 0.03670154146101528, 'nauc_ndcg_at_5_max': 0.20474332948099355, 'nauc_ndcg_at_5_std': -0.1734952739301359, 'nauc_ndcg_at_5_diff1': 0.0107566708693031, 'nauc_ndcg_at_10_max': 0.19884193622357532, 'nauc_ndcg_at_10_std': -0.16919003671988075, 'nauc_ndcg_at_10_diff1': 0.0026192804576363727, 'nauc_ndcg_at_20_max': 0.20925361343315524, 'nauc_ndcg_at_20_std': -0.17106125631597793, 'nauc_ndcg_at_20_diff1': 0.0031543394811079034, 'nauc_ndcg_at_100_max': 0.20125970115134734, 'nauc_ndcg_at_100_std': -0.15865628929382014, 'nauc_ndcg_at_100_diff1': 0.0023309149151885546, 'nauc_ndcg_at_1000_max': 0.20925878430027478, 'nauc_ndcg_at_1000_std': -0.1717044268161809, 'nauc_ndcg_at_1000_diff1': -0.010372586628261796, 'nauc_map_at_1_max': 0.33459947679728974, 'nauc_map_at_1_std': -0.23115450977179597, 'nauc_map_at_1_diff1': 0.1731091343679673, 'nauc_map_at_3_max': 0.2486807528974488, 'nauc_map_at_3_std': -0.18512855007450404, 'nauc_map_at_3_diff1': 0.06042780588964212, 'nauc_map_at_5_max': 0.22647048266105405, 'nauc_map_at_5_std': -0.18107585673560017, 'nauc_map_at_5_diff1': 0.04407217741234605, 'nauc_map_at_10_max': 0.22061594321968936, 'nauc_map_at_10_std': -0.17777470317814356, 'nauc_map_at_10_diff1': 0.03906418656483989, 'nauc_map_at_20_max': 0.22396003211648763, 'nauc_map_at_20_std': -0.17867373725662639, 'nauc_map_at_20_diff1': 0.03795725531499195, 'nauc_map_at_100_max': 0.22324901446317413, 'nauc_map_at_100_std': -0.17630470695891512, 'nauc_map_at_100_diff1': 0.03759221625144172, 'nauc_map_at_1000_max': 0.2240572170754659, 'nauc_map_at_1000_std': -0.17708810912472517, 'nauc_map_at_1000_diff1': 0.03644747951501248, 'nauc_recall_at_1_max': 0.33459947679728974, 'nauc_recall_at_1_std': -0.23115450977179597, 'nauc_recall_at_1_diff1': 0.1731091343679673, 'nauc_recall_at_3_max': 0.1864107664448688, 'nauc_recall_at_3_std': -0.14586036842324565, 'nauc_recall_at_3_diff1': -0.021696811828998432, 'nauc_recall_at_5_max': 0.1453135254521713, 'nauc_recall_at_5_std': -0.1531619473747777, 'nauc_recall_at_5_diff1': -0.0538517948884412, 'nauc_recall_at_10_max': 0.1384336247044034, 'nauc_recall_at_10_std': -0.14737738059263306, 'nauc_recall_at_10_diff1': -0.051375323084735164, 'nauc_recall_at_20_max': 0.16386688869593355, 'nauc_recall_at_20_std': -0.1528456365862212, 'nauc_recall_at_20_diff1': -0.03578815918976938, 'nauc_recall_at_100_max': 0.14861973646512244, 'nauc_recall_at_100_std': -0.12240747671934184, 'nauc_recall_at_100_diff1': -0.023004658252697183, 'nauc_recall_at_1000_max': 0.16414155669676642, 'nauc_recall_at_1000_std': -0.1513320281746568, 'nauc_recall_at_1000_diff1': -0.047075752528689695, 'nauc_precision_at_1_max': 0.35182174117717013, 'nauc_precision_at_1_std': -0.24426280067522707, 'nauc_precision_at_1_diff1': 0.1772995319079407, 'nauc_precision_at_3_max': 0.21285488271783465, 'nauc_precision_at_3_std': -0.1483164417030193, 'nauc_precision_at_3_diff1': -0.013044619440245884, 'nauc_precision_at_5_max': 0.1756649379589832, 'nauc_precision_at_5_std': -0.15632134056178232, 'nauc_precision_at_5_diff1': -0.05113181393685194, 'nauc_precision_at_10_max': 0.18962064467698705, 'nauc_precision_at_10_std': -0.14827004787357115, 'nauc_precision_at_10_diff1': -0.052513811685878764, 'nauc_precision_at_20_max': 0.22086458009752882, 'nauc_precision_at_20_std': -0.14430508663959002, 'nauc_precision_at_20_diff1': -0.040789324913047875, 'nauc_precision_at_100_max': 0.22138981394024387, 'nauc_precision_at_100_std': -0.13384472263037697, 'nauc_precision_at_100_diff1': -0.04518222914182943, 'nauc_precision_at_1000_max': 0.2542912736794115, 'nauc_precision_at_1000_std': -0.1881459402790264, 'nauc_precision_at_1000_diff1': -0.07195606207962846, 'nauc_mrr_at_1_max': 0.35182174117717013, 'nauc_mrr_at_1_std': -0.24426280067522707, 'nauc_mrr_at_1_diff1': 0.1772995319079407, 'nauc_mrr_at_3_max': 0.26889485727748363, 'nauc_mrr_at_3_std': -0.19153801111553947, 'nauc_mrr_at_3_diff1': 0.06173430027850725, 'nauc_mrr_at_5_max': 0.253857849052297, 'nauc_mrr_at_5_std': -0.19604549670316734, 'nauc_mrr_at_5_diff1': 0.036743759763164886, 'nauc_mrr_at_10_max': 0.25392922716866984, 'nauc_mrr_at_10_std': -0.1935061134919541, 'nauc_mrr_at_10_diff1': 0.03361519179733555, 'nauc_mrr_at_20_max': 0.25624951214228564, 'nauc_mrr_at_20_std': -0.19212268093923462, 'nauc_mrr_at_20_diff1': 0.03479828151019169, 'nauc_mrr_at_100_max': 0.2523932973431928, 'nauc_mrr_at_100_std': -0.1900913512193067, 'nauc_mrr_at_100_diff1': 0.03435870935950355, 'nauc_mrr_at_1000_max': 0.2523936325136619, 'nauc_mrr_at_1000_std': -0.19078164353963076, 'nauc_mrr_at_1000_diff1': 0.033601872249839834, 'main_score': 0.0501, 'hf_subset': 'default', 'languages': ['eng-Latn']}]} evaluation_time=99.69791841506958 kg_co2_emissions=None",
-  "dataset_revision='d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46' task_name='SprintDuplicateQuestions' mteb_version='1.14.15' scores={'test': [{'similarity_accuracy': 0.9926237623762376, 'similarity_accuracy_threshold': 0.9106360077857971, 'similarity_f1': 0.4925187032418952, 'similarity_f1_threshold': 0.8986777067184448, 'similarity_precision': 0.6539735099337748, 'similarity_recall': 0.395, 'similarity_ap': 0.4700755863552174, 'cosine_accuracy': 0.9926237623762376, 'cosine_accuracy_threshold': 0.9106360077857971, 'cosine_f1': 0.4925187032418952, 'cosine_f1_threshold': 0.8986777067184448, 'cosine_precision': 0.6539735099337748, 'cosine_recall': 0.395, 'cosine_ap': 0.4700755863552174, 'manhattan_accuracy': 0.9939207920792079, 'manhattan_accuracy_threshold': 4.824772834777832, 'manhattan_f1': 0.6293103448275862, 'manhattan_f1_threshold': 5.194998741149902, 'manhattan_precision': 0.6822429906542056, 'manhattan_recall': 0.584, 'manhattan_ap': 0.6386707007383838, 'euclidean_accuracy': 0.9926237623762376, 'euclidean_accuracy_threshold': 0.42276236414909363, 'euclidean_f1': 0.4925187032418952, 'euclidean_f1_threshold': 0.4501606225967407, 'euclidean_precision': 0.6539735099337748, 'euclidean_recall': 0.395, 'euclidean_ap': 0.47007558217981027, 'dot_accuracy': 0.9926237623762376, 'dot_accuracy_threshold': 0.9106361269950867, 'dot_f1': 0.4925187032418952, 'dot_f1_threshold': 0.8986777663230896, 'dot_precision': 0.6539735099337748, 'dot_recall': 0.395, 'dot_ap': 0.47007548398718707, 'max_accuracy': 0.9939207920792079, 'max_f1': 0.6293103448275862, 'max_precision': 0.6822429906542056, 'max_recall': 0.584, 'max_ap': 0.6386707007383838, 'main_score': 0.6386707007383838, 'hf_subset': 'default', 'languages': ['eng-Latn']}]} evaluation_time=1.9629368782043457 kg_co2_emissions=None"
-]

mteb_results/mteb_report.txt DELETED Viewed

@@ -1,21 +0,0 @@
-================================================================================
-MTEB Evaluation Report
-================================================================================
-Model: gte-Qwen2-7B-instruct-M2V-Distilled
-Model Path: .
-Evaluation Time: 1235.71 seconds
-Total Datasets: 1
-Summary Statistics:
-  Average Score: 0.0501
-  Median Score: 0.0501
-  Standard Deviation: 0.0000
-  Score Range: 0.0501 - 0.0501
-Detailed Results:
---------------------------------------------------
-                              Model Average (All) Average (MTEB) Classification Clustering PairClassification Reranking Retrieval STS Summarization PEARL WordSim
-gte-Qwen2-7B-instruct-M2V-Distilled           nan            nan            nan        nan                nan       nan      5.01 nan           nan   nan     nan
-================================================================================

mteb_results/mteb_summary.json DELETED Viewed

@@ -1,20 +0,0 @@
-{
-  "model_name": "gte-Qwen2-7B-instruct-M2V-Distilled",
-  "evaluation_time_seconds": 1235.7057559490204,
-  "task_scores": {
-    "gte-Qwen2-7B-instruct-M2V-Distilled": {
-      "task_means": "Classification           NaN\nClustering               NaN\nPairClassification       NaN\nReranking                NaN\nRetrieval             0.0501\nSTS                      NaN\nSummarization            NaN\nPEARL                    NaN\nWordSim                  NaN\ndtype: float64",
-      "dataset_scores": {
-        "CQADupstack": 0.0501
-      }
-    }
-  },
-  "summary_stats": {
-    "total_datasets": 1,
-    "average_score": 0.0501,
-    "median_score": 0.0501,
-    "std_dev": 0.0,
-    "min_score": 0.0501,
-    "max_score": 0.0501
-  }
-}

pyproject.toml CHANGED Viewed

@@ -12,6 +12,7 @@ dependencies = [
     "matplotlib>=3.10.3",
     "model2vec[train]>=0.5.0",
     "mteb>=1.14.15",
     "psutil>=7.0.0",
     "scikit-learn>=1.6.1",
     "sentence-transformers>=4.1.0",

     "matplotlib>=3.10.3",
     "model2vec[train]>=0.5.0",
     "mteb>=1.14.15",
+    "numpy>=1.26.4",
     "psutil>=7.0.0",
     "scikit-learn>=1.6.1",
     "sentence-transformers>=4.1.0",

uv.lock CHANGED Viewed

@@ -498,6 +498,7 @@ dependencies = [
     { name = "matplotlib" },
     { name = "model2vec", extra = ["train"] },
     { name = "mteb" },
     { name = "psutil" },
     { name = "scikit-learn" },
     { name = "sentence-transformers" },
@@ -519,6 +520,7 @@ requires-dist = [
     { name = "matplotlib", specifier = ">=3.10.3" },
     { name = "model2vec", extras = ["train"], specifier = ">=0.5.0" },
     { name = "mteb", specifier = ">=1.14.15" },
     { name = "psutil", specifier = ">=7.0.0" },
     { name = "scikit-learn", specifier = ">=1.6.1" },
     { name = "sentence-transformers", specifier = ">=4.1.0" },

     { name = "matplotlib" },
     { name = "model2vec", extra = ["train"] },
     { name = "mteb" },
+    { name = "numpy" },
     { name = "psutil" },
     { name = "scikit-learn" },
     { name = "sentence-transformers" },
     { name = "matplotlib", specifier = ">=3.10.3" },
     { name = "model2vec", extras = ["train"], specifier = ">=0.5.0" },
     { name = "mteb", specifier = ">=1.14.15" },
+    { name = "numpy", specifier = ">=1.26.4" },
     { name = "psutil", specifier = ">=7.0.0" },
     { name = "scikit-learn", specifier = ">=1.6.1" },
     { name = "sentence-transformers", specifier = ">=4.1.0" },