qMTEB / aggregate_data.py
varun4's picture
no breaking code
f70e4f4
raw
history blame
2.06 kB
import gradio as gr
import pandas as pd
import json
import os
TASKS_CLUSTERING = [
"ArxivClusteringP2P",
"ArxivClusteringS2S",
"BiorxivClusteringP2P",
"BiorxivClusteringS2S",
"MedrxivClusteringP2P",
"MedrxivClusteringS2S",
"RedditClustering",
"RedditClusteringP2P",
"StackExchangeClustering",
"StackExchangeClusteringP2P",
"TwentyNewsgroupsClustering",
]
TASKS_PAIR_CLASSIFICATION = [
"SprintDuplicateQuestions",
"TwitterSemEval2015",
"TwitterURLCorpus",
]
MODELS = [
"all-MiniLM-L6-v2"
]
def get_model_size(model_name):
return os.path.getsize(f"models/{model_name}/pytorch_model.bin") / (1024.0 * 1024.0)
def compute_model_score(model_name):
results_dir = "results"
model_dir = os.path.join(results_dir, model_name)
scores = []
# Get scores for clustering tasks
for task in TASKS_CLUSTERING:
task_file = os.path.join(model_dir, f"{task}.json")
with open(task_file, 'r') as f:
data = json.load(f)
v_measure = data['test']['v_measure']
scores.append(v_measure)
# Get scores for pair classification tasks
for task in TASKS_PAIR_CLASSIFICATION:
task_file = os.path.join(model_dir, f"{task}.json")
with open(task_file, 'r') as f:
data = json.load(f)
max_ap = data['test']['max']['ap']
scores.append(max_ap)
# Compute average score
average_score = sum(scores) / len(scores)
return average_score
DATA = {
"Model": MODELS,
"Model Size (MB)": [
get_model_size(model) for model in MODELS
],
"Score": [
5 # compute_model_score(model) for model in MODELS
],
"q8 Model Size (MB)": [
get_model_size(model + "-q8") for model in MODELS
],
"q8 Score": [
compute_model_score(model + "-q8") for model in MODELS
],
}
with open('data.json', 'w') as json_file:
json.dump(DATA, json_file)