Spaces:
Running
Running
File size: 2,923 Bytes
03f9084 a708f96 03f9084 a708f96 03f9084 a708f96 03f9084 a708f96 03f9084 a708f96 03f9084 a708f96 03f9084 a708f96 03f9084 a708f96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
from dataclasses import dataclass, make_dataclass
import pandas as pd
def fields(raw_class):
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
# These classes are for user facing column names,
# to avoid having to change them all around the code
# when a modif is needed
@dataclass
class ColumnContent:
name: str
type: str
displayed_by_default: bool
hidden: bool = False
never_hidden: bool = False
## Leaderboard columns
auto_eval_column_dict = []
# 'Model', 'NCR', 'HTU', 'TCU', 'NED', 'PVI', 'KQA', 'TII', 'MTA', 'BRI', 'Overall', 'Agentic Framework']
# Init
auto_eval_column_dict.append(
["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)]
)
# Agentic Framework
auto_eval_column_dict.append(
["agentic_framework", ColumnContent, ColumnContent("Agentic Framework", "markdown", True, never_hidden=True)]
)
# NCR
auto_eval_column_dict.append(
["ncr", ColumnContent, ColumnContent("NCR", "markdown", True)]
)
# HTU
auto_eval_column_dict.append(
["htu", ColumnContent, ColumnContent("HTU", "markdown", True)]
)
# TCU
auto_eval_column_dict.append(
["tcu", ColumnContent, ColumnContent("TCU", "markdown", True)]
)
# NED
auto_eval_column_dict.append(
["ned", ColumnContent, ColumnContent("NED", "markdown", True)]
)
# PVI
auto_eval_column_dict.append(
["pvi", ColumnContent, ColumnContent("PVI", "markdown", True)]
)
# KQA
auto_eval_column_dict.append(
["kqa", ColumnContent, ColumnContent("KQA", "markdown", True)]
)
# TII
auto_eval_column_dict.append(
["tii", ColumnContent, ColumnContent("TII", "markdown", True)]
)
# MTA
auto_eval_column_dict.append(
["mta", ColumnContent, ColumnContent("MTA", "markdown", True)]
)
# BRI
auto_eval_column_dict.append(
["bri", ColumnContent, ColumnContent("BRI", "markdown", True)]
)
# Overall
auto_eval_column_dict.append(
["overall", ColumnContent, ColumnContent("Overall ⬆️", "markdown", True, never_hidden=True)]
)
# Create AutoEvalColumn class
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict)
# Column selection
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
# BENCHMARK_COLS = [t.value.col_name for t in Tasks]
NUMERIC_INTERVALS = {
"?": pd.Interval(-1, 0, closed="right"),
"~1.5": pd.Interval(0, 2, closed="right"),
"~3": pd.Interval(2, 4, closed="right"),
"~7": pd.Interval(4, 9, closed="right"),
"~13": pd.Interval(9, 20, closed="right"),
"~35": pd.Interval(20, 45, closed="right"),
"~60": pd.Interval(45, 70, closed="right"),
"70+": pd.Interval(70, 10000, closed="right"),
}
|