File size: 2,923 Bytes
03f9084
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a708f96
03f9084
a708f96
 
03f9084
a708f96
 
03f9084
a708f96
 
 
03f9084
a708f96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03f9084
a708f96
 
 
 
03f9084
a708f96
 
 
 
03f9084
a708f96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from dataclasses import dataclass, make_dataclass

import pandas as pd


def fields(raw_class):
    return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]


# These classes are for user facing column names,
# to avoid having to change them all around the code
# when a modif is needed
@dataclass
class ColumnContent:
    name: str
    type: str
    displayed_by_default: bool
    hidden: bool = False
    never_hidden: bool = False


## Leaderboard columns


auto_eval_column_dict = []

# 'Model', 'NCR', 'HTU', 'TCU', 'NED', 'PVI', 'KQA', 'TII', 'MTA', 'BRI', 'Overall', 'Agentic Framework']
# Init
auto_eval_column_dict.append(
    ["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)]
)

# Agentic Framework
auto_eval_column_dict.append(
    ["agentic_framework", ColumnContent, ColumnContent("Agentic Framework", "markdown", True, never_hidden=True)]
)

# NCR
auto_eval_column_dict.append(
    ["ncr", ColumnContent, ColumnContent("NCR", "markdown", True)]
)

# HTU
auto_eval_column_dict.append(
    ["htu", ColumnContent, ColumnContent("HTU", "markdown", True)]
)

# TCU 
auto_eval_column_dict.append(
    ["tcu", ColumnContent, ColumnContent("TCU", "markdown", True)]
)

# NED
auto_eval_column_dict.append(
    ["ned", ColumnContent, ColumnContent("NED", "markdown", True)]
)

# PVI
auto_eval_column_dict.append(
    ["pvi", ColumnContent, ColumnContent("PVI", "markdown", True)]
)

# KQA
auto_eval_column_dict.append(
    ["kqa", ColumnContent, ColumnContent("KQA", "markdown", True)]
)

# TII
auto_eval_column_dict.append(
    ["tii", ColumnContent, ColumnContent("TII", "markdown", True)]
)

# MTA
auto_eval_column_dict.append(
    ["mta", ColumnContent, ColumnContent("MTA", "markdown", True)]
)

# BRI
auto_eval_column_dict.append(
    ["bri", ColumnContent, ColumnContent("BRI", "markdown", True)]
)

# Overall
auto_eval_column_dict.append(
    ["overall", ColumnContent, ColumnContent("Overall ⬆️", "markdown", True, never_hidden=True)]
)



# Create AutoEvalColumn class
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict)


# Column selection
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]

# BENCHMARK_COLS = [t.value.col_name for t in Tasks]

NUMERIC_INTERVALS = {
    "?": pd.Interval(-1, 0, closed="right"),
    "~1.5": pd.Interval(0, 2, closed="right"),
    "~3": pd.Interval(2, 4, closed="right"),
    "~7": pd.Interval(4, 9, closed="right"),
    "~13": pd.Interval(9, 20, closed="right"),
    "~35": pd.Interval(20, 45, closed="right"),
    "~60": pd.Interval(45, 70, closed="right"),
    "70+": pd.Interval(70, 10000, closed="right"),
}