Spaces:
Running
Running
File size: 1,449 Bytes
460d762 12cea14 460d762 b323764 460d762 12cea14 460d762 b323764 460d762 12cea14 460d762 b323764 460d762 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from src.utils_display import AutoEvalColumn, model_hyperlink
gpt4_values = {
AutoEvalColumn.model.name: model_hyperlink("https://arxiv.org/abs/2303.08774", "gpt4"),
AutoEvalColumn.revision.name: "tech report",
AutoEvalColumn.precision.name: None,
AutoEvalColumn.average.name: 84.3,
AutoEvalColumn.arc.name: 96.3,
AutoEvalColumn.hellaswag.name: 95.3,
AutoEvalColumn.mmlu.name: 86.4,
AutoEvalColumn.truthfulqa.name: 59.0,
AutoEvalColumn.dummy.name: "GPT-4",
AutoEvalColumn.model_type.name: "",
}
gpt35_values = {
AutoEvalColumn.model.name: model_hyperlink("https://arxiv.org/abs/2303.08774", "gpt3.5"),
AutoEvalColumn.revision.name: "tech report",
AutoEvalColumn.precision.name: None,
AutoEvalColumn.average.name: 71.9,
AutoEvalColumn.arc.name: 85.2,
AutoEvalColumn.hellaswag.name: 85.5,
AutoEvalColumn.mmlu.name: 70.0,
AutoEvalColumn.truthfulqa.name: 47.0,
AutoEvalColumn.dummy.name: "GPT-3.5",
AutoEvalColumn.model_type.name: "",
}
baseline = {
AutoEvalColumn.model.name: "<p>Baseline</p>",
AutoEvalColumn.revision.name: "N/A",
AutoEvalColumn.precision.name: None,
AutoEvalColumn.average.name: 25.0,
AutoEvalColumn.arc.name: 25.0,
AutoEvalColumn.hellaswag.name: 25.0,
AutoEvalColumn.mmlu.name: 25.0,
AutoEvalColumn.truthfulqa.name: 25.0,
AutoEvalColumn.dummy.name: "baseline",
AutoEvalColumn.model_type.name: "",
}
|