from dataclasses import dataclass, make_dataclass import pandas as pd def fields(raw_class): return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"] # These classes are for user facing column names, # to avoid having to change them all around the code # when a modif is needed @dataclass class ColumnContent: name: str type: str displayed_by_default: bool hidden: bool = False never_hidden: bool = False ## Leaderboard columns auto_eval_column_dict = [] # 'Model', 'NCR', 'HTU', 'TCU', 'NED', 'PVI', 'KQA', 'TII', 'MTA', 'BRI', 'Overall', 'Agentic Framework'] # Init auto_eval_column_dict.append( ["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)] ) # Agentic Framework auto_eval_column_dict.append( ["agentic_framework", ColumnContent, ColumnContent("Agentic Framework", "markdown", True, never_hidden=True)] ) # NCR auto_eval_column_dict.append( ["ncr", ColumnContent, ColumnContent("NCR", "markdown", True)] ) # HTU auto_eval_column_dict.append( ["htu", ColumnContent, ColumnContent("HTU", "markdown", True)] ) # TCU auto_eval_column_dict.append( ["tcu", ColumnContent, ColumnContent("TCU", "markdown", True)] ) # NED auto_eval_column_dict.append( ["ned", ColumnContent, ColumnContent("NED", "markdown", True)] ) # PVI auto_eval_column_dict.append( ["pvi", ColumnContent, ColumnContent("PVI", "markdown", True)] ) # KQA auto_eval_column_dict.append( ["kqa", ColumnContent, ColumnContent("KQA", "markdown", True)] ) # TII auto_eval_column_dict.append( ["tii", ColumnContent, ColumnContent("TII", "markdown", True)] ) # MTA auto_eval_column_dict.append( ["mta", ColumnContent, ColumnContent("MTA", "markdown", True)] ) # BRI auto_eval_column_dict.append( ["bri", ColumnContent, ColumnContent("BRI", "markdown", True)] ) # Overall auto_eval_column_dict.append( ["overall", ColumnContent, ColumnContent("Overall ⬆️", "markdown", True, never_hidden=True)] ) # Create AutoEvalColumn class AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict) # Column selection COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden] TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden] COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden] TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden] # BENCHMARK_COLS = [t.value.col_name for t in Tasks] NUMERIC_INTERVALS = { "?": pd.Interval(-1, 0, closed="right"), "~1.5": pd.Interval(0, 2, closed="right"), "~3": pd.Interval(2, 4, closed="right"), "~7": pd.Interval(4, 9, closed="right"), "~13": pd.Interval(9, 20, closed="right"), "~35": pd.Interval(20, 45, closed="right"), "~60": pd.Interval(45, 70, closed="right"), "70+": pd.Interval(70, 10000, closed="right"), }