Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import os | |
import os | |
import sys | |
ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) | |
sys.path.append(ROOT_DIR) | |
import pandas as pd | |
import numpy as np | |
from core.paper import Paper | |
def compare(ground_truth, automated_truth, key, verbose, url): | |
if key not in ground_truth.keys() or key not in automated_truth.keys(): | |
return np.nan | |
if (pd.isna(ground_truth[key]) or pd.isna(automated_truth[key])): | |
return np.nan | |
if (key == "license"): | |
ground_truth[key] = "No" if ground_truth[key] == "No" else "Yes" | |
res = ground_truth[key] == automated_truth[key] | |
if verbose and res == False: | |
print(f"{key} acc. - {automated_truth[key]} (GT:{ground_truth[key]}) ({url})") | |
return res | |
max_workers = 6 | |
compare_to_gt = True | |
verbose = True | |
training = True | |
paper_dump = pd.read_csv("data/results.csv", sep="\t") | |
papers = [Paper.from_row(row) for _, row in paper_dump.iterrows()] | |
eval_readme = [] | |
eval_training = [] | |
eval_evaluating = [] | |
eval_licensing = [] | |
eval_weights = [] | |
eval_dependencies = [] | |
full_results = [] | |
for idx, paper in enumerate(papers): | |
if paper.venue != "MIDL" or paper.main_repo_url is None or (int(paper.year) >= 2024 if training else int(paper.year) < 2024): | |
continue | |
# if (verbose): | |
# print(f"\nEvaluating {idx} out of {len(papers)} papers...") | |
# print(f'Paper title - "{paper.title}" ({paper.year})') | |
# print(f'Repository link - {paper.main_repo_url}') | |
eval_dependencies.append(compare(paper.code_repro_manual, paper.code_repro_auto, "dependencies", verbose, paper.main_repo_url)) | |
eval_training.append(compare(paper.code_repro_manual, paper.code_repro_auto, "training", verbose, paper.main_repo_url)) | |
eval_evaluating.append(compare(paper.code_repro_manual, paper.code_repro_auto, "evaluation", verbose, paper.main_repo_url)) | |
eval_weights.append(compare(paper.code_repro_manual, paper.code_repro_auto, "weights", verbose, paper.main_repo_url)) | |
eval_readme.append(compare(paper.code_repro_manual, paper.code_repro_auto, "readme", verbose, paper.main_repo_url)) | |
eval_licensing.append(compare(paper.code_repro_manual, paper.code_repro_auto, "license", verbose, paper.main_repo_url)) | |
print("\nSummary:") | |
print(f"Dependencies acc. - {int(100 * np.nanmean(eval_dependencies))}%") | |
print(f"Training acc. - {int(100 * np.nanmean(eval_training))}%") | |
print(f"Evaluating acc. - {int(100 * np.nanmean(eval_evaluating))}%") | |
print(f"Weights acc. - {int(100 * np.nanmean(eval_weights))}%") | |
print(f"README acc. - {int(100 * np.nanmean(eval_readme))}%") | |
print(f"LICENSE acc. - {int(100 * np.nanmean(eval_licensing))}%") |