|
import pandas as pd |
|
from src.display.utils import BENCHMARK_COLS |
|
from src.about import Tasks |
|
from src.leaderboard.read_evals import get_raw_eval_results |
|
|
|
print("Tasks definitions:") |
|
for task in Tasks: |
|
print(f"- {task.name}: benchmark={task.value.benchmark}, metric={task.value.metric}, col_name={task.value.col_name}") |
|
|
|
print("\nBenchmark columns:", BENCHMARK_COLS) |
|
|
|
try: |
|
|
|
raw_results = get_raw_eval_results("eval-results", "eval-queue") |
|
print("\nRaw results:") |
|
for result in raw_results: |
|
print("\nResult:") |
|
print("- eval_name:", result.eval_name) |
|
print("- results:", result.results) |
|
data_dict = result.to_dict() |
|
print("- data_dict:", data_dict) |
|
|
|
|
|
all_data_json = [v.to_dict() for v in raw_results] |
|
df = pd.DataFrame.from_records(all_data_json) |
|
print("\nDataFrame columns:", df.columns.tolist()) |
|
print("\nDataFrame contents:") |
|
print(df) |
|
except Exception as e: |
|
print("\nError:", str(e)) |
|
import traceback |
|
traceback.print_exc() |
|
|
|
|
|
print("\nRaw data from results file:") |
|
import json |
|
with open("eval-results/results_1.json") as f: |
|
print(json.dumps(json.load(f), indent=2)) |
|
|