{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Fetch the data from the hub" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/hynky/.pyenv/versions/3.12.2/envs/datatrove/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import os\n", "import itertools\n", "import pandas as pd\n", "from concurrent.futures import ThreadPoolExecutor\n", "from tqdm import tqdm\n", "import itertools\n", "import huggingface_hub\n", "from tensorboard.backend.event_processing.event_accumulator import EventAccumulator\n", "from huggingface_hub.utils import EntryNotFoundError" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def step_element_match(step_to_check, step_element):\n", " step_element = step_element.strip().replace(\" \", \"\")\n", " if \"-\" in step_element:\n", " a, b = step_element.split(\"-\")\n", " c = None\n", " if \"%\" in b:\n", " b, c = b.split(\"%\")\n", " return (int(a) <= step_to_check <= int(b) and\n", " (c is None or (step_to_check - int(a)) % int(c) == 0))\n", " elif \"%\" in step_element:\n", " return step_to_check % int(step_element[1:]) == 0\n", " else:\n", " return step_to_check == int(step_element)\n", " \n", "def fetch_run_results_simple(repo_name, runs_to_fetch, steps_to_fetch, prefix, agg_score_columns, column_name,\n", " seed_merge_method, oauth_token=None, prefix_file=None):\n", " if not runs_to_fetch:\n", " return\n", "\n", " def fetch_run_files(run_to_fetch):\n", " def filename_to_steps_timestamp(fn):\n", " step, ts = fn.split(\"_events.out.tfevents.\")\n", " return int(step[-7:]), int(ts[:ts.index(\".\")])\n", "\n", " run_to_fetch += \"_e\"\n", " try:\n", " eval_repo_file_names = [f.path for f in\n", " huggingface_hub.list_repo_tree(repo_name, run_to_fetch, expand=False,\n", " token=oauth_token) if\n", " \"_events.out.tfevents\" in f.path]\n", " except EntryNotFoundError:\n", " return []\n", "\n", " eval_files = [os.path.relpath(f, run_to_fetch) for f in eval_repo_file_names]\n", " timestamps = {}\n", " for fn in eval_files:\n", " steps, ts = filename_to_steps_timestamp(fn)\n", " if steps not in timestamps or timestamps[steps][0] < ts:\n", " timestamps[steps] = ts, fn\n", "\n", " results = []\n", " for eval_file, repofile in zip(eval_files, eval_repo_file_names):\n", " steps, ts = filename_to_steps_timestamp(eval_file)\n", " if not any(step_element_match(steps, step_el) for step_el in steps_to_fetch.split(\",\")):\n", " continue\n", " if timestamps[steps][1] == eval_file:\n", " results.append((run_to_fetch, steps, repofile))\n", " return results\n", "\n", " def load_run_file(data):\n", " run_to_fetch, steps, repofile = data\n", " loader = EventAccumulator(huggingface_hub.hf_hub_download(repo_name, repofile, token=oauth_token))\n", " loader.Reload()\n", " runname = run_to_fetch.removeprefix(prefix).removesuffix(\"-_e\")\n", " column_names = [\"runname\", \"seed\", \"steps\", \"agg_score\"]\n", " column_values = [runname, 0, steps, 0.0]\n", "\n", " for tag in loader.Tags()['scalars']:\n", " if not \"stderr\" in tag and tag.split('/')[0] == 'e':\n", " event_list = loader.Scalars(tag)\n", " tag = tag.split('/')\n", " column_names.append(f\"{tag[1]}/{tag[2]}\")\n", " column_values.append(event_list[-1].value)\n", "\n", " return pd.DataFrame([column_values], columns=column_names)\n", "\n", " with ThreadPoolExecutor() as pool:\n", " run_files = list(itertools.chain.from_iterable(\n", " tqdm(pool.map(fetch_run_files, runs_to_fetch), total=len(runs_to_fetch), desc=\"Fetching datafiles...\")))\n", " df = pd.concat(tqdm(pool.map(load_run_file, run_files), total=len(run_files), desc=\"Loading evals data...\"))\n", "\n", " cols_to_avg = [col for col in agg_score_columns if col in df.columns]\n", " if cols_to_avg:\n", " df['agg_score'] = df[cols_to_avg].mean(axis=1)\n", "\n", " prefix_file = prefix_file + \"_\" if prefix_file else \"\"\n", " df.to_csv(f\"{prefix_file}{repo_name.split('/')[-1]}_metrics.csv\", index=False)\n", " print(f\"Metrics saved to {repo_name.split('/')[-1]}_metrics.csv\")\n", "\n", " return df" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Fetching datafiles...: 100%|██████████| 1/1 [00:02<00:00, 2.94s/it]\n", "Loading evals data...: 100%|██████████| 82/82 [00:15<00:00, 5.37it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Metrics saved to loubna-edu_fw_ablations_metrics.csv\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/html": [ "
\n", " | runname | \n", "seed | \n", "steps | \n", "agg_score | \n", "commonsense_qa/acc | \n", "commonsense_qa/acc_norm | \n", "hellaswag/acc | \n", "hellaswag/acc_norm | \n", "openbookqa/acc | \n", "openbookqa/acc_norm | \n", "... | \n", "siqa/acc | \n", "siqa/acc_norm | \n", "winogrande/acc | \n", "winogrande/acc_norm | \n", "all/acc | \n", "all/acc_norm | \n", "arc/acc | \n", "arc/acc_norm | \n", "mmlu/acc | \n", "mmlu/acc_norm | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "edu_fineweb_350b_tokens-seed-1 | \n", "0 | \n", "2000 | \n", "0.390326 | \n", "0.284 | \n", "0.283 | \n", "0.314 | \n", "0.325 | \n", "0.164 | \n", "0.296 | \n", "... | \n", "0.362 | \n", "0.406 | \n", "0.511 | \n", "0.511 | \n", "0.279674 | \n", "0.299162 | \n", "0.3795 | \n", "0.3850 | \n", "0.265997 | \n", "0.284605 | \n", "
0 | \n", "edu_fineweb_350b_tokens-seed-1 | \n", "0 | \n", "4000 | \n", "0.414680 | \n", "0.322 | \n", "0.307 | \n", "0.343 | \n", "0.395 | \n", "0.196 | \n", "0.320 | \n", "... | \n", "0.371 | \n", "0.388 | \n", "0.518 | \n", "0.495 | \n", "0.290613 | \n", "0.312593 | \n", "0.4215 | \n", "0.4285 | \n", "0.274401 | \n", "0.295939 | \n", "
0 | \n", "edu_fineweb_350b_tokens-seed-1 | \n", "0 | \n", "6000 | \n", "0.428390 | \n", "0.319 | \n", "0.311 | \n", "0.372 | \n", "0.431 | \n", "0.202 | \n", "0.352 | \n", "... | \n", "0.373 | \n", "0.392 | \n", "0.520 | \n", "0.519 | \n", "0.303980 | \n", "0.323323 | \n", "0.4315 | \n", "0.4460 | \n", "0.288591 | \n", "0.306123 | \n", "
0 | \n", "edu_fineweb_350b_tokens-seed-1 | \n", "0 | \n", "8000 | \n", "0.443615 | \n", "0.340 | \n", "0.311 | \n", "0.379 | \n", "0.463 | \n", "0.204 | \n", "0.360 | \n", "... | \n", "0.384 | \n", "0.404 | \n", "0.517 | \n", "0.517 | \n", "0.315148 | \n", "0.333284 | \n", "0.4630 | \n", "0.4790 | \n", "0.299186 | \n", "0.314921 | \n", "
0 | \n", "edu_fineweb_350b_tokens-seed-1 | \n", "0 | \n", "10000 | \n", "0.441457 | \n", "0.346 | \n", "0.317 | \n", "0.390 | \n", "0.454 | \n", "0.222 | \n", "0.364 | \n", "... | \n", "0.366 | \n", "0.395 | \n", "0.514 | \n", "0.506 | \n", "0.318935 | \n", "0.335419 | \n", "0.4890 | \n", "0.4820 | \n", "0.302189 | \n", "0.317653 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
0 | \n", "edu_fineweb_350b_tokens-seed-1 | \n", "0 | \n", "160000 | \n", "0.507129 | \n", "0.430 | \n", "0.359 | \n", "0.473 | \n", "0.593 | \n", "0.282 | \n", "0.418 | \n", "... | \n", "0.392 | \n", "0.402 | \n", "0.576 | \n", "0.575 | \n", "0.369137 | \n", "0.393898 | \n", "0.5670 | \n", "0.5725 | \n", "0.350226 | \n", "0.374533 | \n", "
0 | \n", "edu_fineweb_350b_tokens-seed-1 | \n", "0 | \n", "162000 | \n", "0.509118 | \n", "0.416 | \n", "0.367 | \n", "0.474 | \n", "0.592 | \n", "0.288 | \n", "0.408 | \n", "... | \n", "0.390 | \n", "0.409 | \n", "0.572 | \n", "0.577 | \n", "0.367420 | \n", "0.392861 | \n", "0.5720 | \n", "0.5780 | \n", "0.348268 | \n", "0.372947 | \n", "
0 | \n", "edu_fineweb_350b_tokens-seed-1 | \n", "0 | \n", "164000 | \n", "0.507843 | \n", "0.416 | \n", "0.365 | \n", "0.467 | \n", "0.591 | \n", "0.276 | \n", "0.408 | \n", "... | \n", "0.395 | \n", "0.406 | \n", "0.576 | \n", "0.580 | \n", "0.368319 | \n", "0.392000 | \n", "0.5635 | \n", "0.5715 | \n", "0.349943 | \n", "0.372246 | \n", "
0 | \n", "edu_fineweb_350b_tokens-seed-1 | \n", "0 | \n", "166000 | \n", "0.508308 | \n", "0.415 | \n", "0.364 | \n", "0.472 | \n", "0.593 | \n", "0.282 | \n", "0.414 | \n", "... | \n", "0.401 | \n", "0.408 | \n", "0.575 | \n", "0.570 | \n", "0.370593 | \n", "0.393176 | \n", "0.5640 | \n", "0.5760 | \n", "0.352203 | \n", "0.373463 | \n", "
0 | \n", "edu_fineweb_350b_tokens-seed-1 | \n", "0 | \n", "167000 | \n", "0.509494 | \n", "0.429 | \n", "0.362 | \n", "0.472 | \n", "0.597 | \n", "0.290 | \n", "0.418 | \n", "... | \n", "0.395 | \n", "0.404 | \n", "0.582 | \n", "0.578 | \n", "0.369666 | \n", "0.394136 | \n", "0.5670 | \n", "0.5735 | \n", "0.350671 | \n", "0.374453 | \n", "
82 rows × 22 columns
\n", "\n", " | runname | \n", "seed | \n", "steps | \n", "agg_score | \n", "commonsense_qa/acc | \n", "commonsense_qa/acc_norm | \n", "hellaswag/acc | \n", "hellaswag/acc_norm | \n", "openbookqa/acc | \n", "openbookqa/acc_norm | \n", "... | \n", "siqa/acc | \n", "siqa/acc_norm | \n", "winogrande/acc | \n", "winogrande/acc_norm | \n", "all/acc | \n", "all/acc_norm | \n", "arc/acc | \n", "arc/acc_norm | \n", "mmlu/acc | \n", "mmlu/acc_norm | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "FineWeb-Edu | \n", "0 | \n", "2000 | \n", "0.390326 | \n", "0.284 | \n", "0.283 | \n", "0.314 | \n", "0.325 | \n", "0.164 | \n", "0.296 | \n", "... | \n", "0.362 | \n", "0.406 | \n", "0.511 | \n", "0.511 | \n", "0.279674 | \n", "0.299162 | \n", "0.3795 | \n", "0.3850 | \n", "0.265997 | \n", "0.284605 | \n", "
1 | \n", "FineWeb-Edu | \n", "0 | \n", "4000 | \n", "0.414680 | \n", "0.322 | \n", "0.307 | \n", "0.343 | \n", "0.395 | \n", "0.196 | \n", "0.320 | \n", "... | \n", "0.371 | \n", "0.388 | \n", "0.518 | \n", "0.495 | \n", "0.290613 | \n", "0.312593 | \n", "0.4215 | \n", "0.4285 | \n", "0.274401 | \n", "0.295939 | \n", "
2 | \n", "FineWeb-Edu | \n", "0 | \n", "6000 | \n", "0.428390 | \n", "0.319 | \n", "0.311 | \n", "0.372 | \n", "0.431 | \n", "0.202 | \n", "0.352 | \n", "... | \n", "0.373 | \n", "0.392 | \n", "0.520 | \n", "0.519 | \n", "0.303980 | \n", "0.323323 | \n", "0.4315 | \n", "0.4460 | \n", "0.288591 | \n", "0.306123 | \n", "
3 | \n", "FineWeb-Edu | \n", "0 | \n", "8000 | \n", "0.443615 | \n", "0.340 | \n", "0.311 | \n", "0.379 | \n", "0.463 | \n", "0.204 | \n", "0.360 | \n", "... | \n", "0.384 | \n", "0.404 | \n", "0.517 | \n", "0.517 | \n", "0.315148 | \n", "0.333284 | \n", "0.4630 | \n", "0.4790 | \n", "0.299186 | \n", "0.314921 | \n", "
4 | \n", "FineWeb-Edu | \n", "0 | \n", "10000 | \n", "0.441457 | \n", "0.346 | \n", "0.317 | \n", "0.390 | \n", "0.454 | \n", "0.222 | \n", "0.364 | \n", "... | \n", "0.366 | \n", "0.395 | \n", "0.514 | \n", "0.506 | \n", "0.318935 | \n", "0.335419 | \n", "0.4890 | \n", "0.4820 | \n", "0.302189 | \n", "0.317653 | \n", "
5 rows × 22 columns
\n", "\n", " | runname | \n", "steps | \n", "agg_score | \n", "commonsense_qa/acc | \n", "commonsense_qa/acc_norm | \n", "hellaswag/acc | \n", "hellaswag/acc_norm | \n", "openbookqa/acc | \n", "openbookqa/acc_norm | \n", "piqa/acc | \n", "... | \n", "winogrande/acc_norm | \n", "sciq/acc | \n", "sciq/acc_norm | \n", "arc/acc | \n", "arc/acc_norm | \n", "mmlu/acc | \n", "mmlu/acc_norm | \n", "seed | \n", "all/acc | \n", "all/acc_norm | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1253 | \n", "FineWeb-Edu | \n", "160000 | \n", "0.507129 | \n", "0.430 | \n", "0.359 | \n", "0.473 | \n", "0.593 | \n", "0.282 | \n", "0.418 | \n", "0.744 | \n", "... | \n", "0.575 | \n", "NaN | \n", "NaN | \n", "0.5670 | \n", "0.5725 | \n", "0.350226 | \n", "0.374533 | \n", "0.0 | \n", "0.369137 | \n", "0.393898 | \n", "
1254 | \n", "FineWeb-Edu | \n", "162000 | \n", "0.509118 | \n", "0.416 | \n", "0.367 | \n", "0.474 | \n", "0.592 | \n", "0.288 | \n", "0.408 | \n", "0.747 | \n", "... | \n", "0.577 | \n", "NaN | \n", "NaN | \n", "0.5720 | \n", "0.5780 | \n", "0.348268 | \n", "0.372947 | \n", "0.0 | \n", "0.367420 | \n", "0.392861 | \n", "
1255 | \n", "FineWeb-Edu | \n", "164000 | \n", "0.507843 | \n", "0.416 | \n", "0.365 | \n", "0.467 | \n", "0.591 | \n", "0.276 | \n", "0.408 | \n", "0.737 | \n", "... | \n", "0.580 | \n", "NaN | \n", "NaN | \n", "0.5635 | \n", "0.5715 | \n", "0.349943 | \n", "0.372246 | \n", "0.0 | \n", "0.368319 | \n", "0.392000 | \n", "
1256 | \n", "FineWeb-Edu | \n", "166000 | \n", "0.508308 | \n", "0.415 | \n", "0.364 | \n", "0.472 | \n", "0.593 | \n", "0.282 | \n", "0.414 | \n", "0.740 | \n", "... | \n", "0.570 | \n", "NaN | \n", "NaN | \n", "0.5640 | \n", "0.5760 | \n", "0.352203 | \n", "0.373463 | \n", "0.0 | \n", "0.370593 | \n", "0.393176 | \n", "
1257 | \n", "FineWeb-Edu | \n", "167000 | \n", "0.509494 | \n", "0.429 | \n", "0.362 | \n", "0.472 | \n", "0.597 | \n", "0.290 | \n", "0.418 | \n", "0.738 | \n", "... | \n", "0.578 | \n", "NaN | \n", "NaN | \n", "0.5670 | \n", "0.5735 | \n", "0.350671 | \n", "0.374453 | \n", "0.0 | \n", "0.369666 | \n", "0.394136 | \n", "
5 rows × 24 columns
\n", "\n", " | steps | \n", "
---|---|
runname | \n", "\n", " |
C4 | \n", "168 | \n", "
Dolma | \n", "168 | \n", "
FineWeb (ours) | \n", "168 | \n", "
FineWeb-Edu | \n", "82 | \n", "
RedPajama2 | \n", "168 | \n", "
RefinedWeb | \n", "168 | \n", "
SlimPajama | \n", "168 | \n", "
The Pile | \n", "168 | \n", "
\n", " | runname | \n", "steps | \n", "agg_score | \n", "commonsense_qa/acc | \n", "commonsense_qa/acc_norm | \n", "hellaswag/acc | \n", "hellaswag/acc_norm | \n", "openbookqa/acc | \n", "openbookqa/acc_norm | \n", "piqa/acc | \n", "... | \n", "siqa/acc | \n", "siqa/acc_norm | \n", "winogrande/acc | \n", "winogrande/acc_norm | \n", "sciq/acc | \n", "sciq/acc_norm | \n", "arc/acc | \n", "arc/acc_norm | \n", "mmlu/acc | \n", "mmlu/acc_norm | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "C4 | \n", "0 | \n", "0.330893 | \n", "0.186 | \n", "0.233 | \n", "0.272 | \n", "0.258 | \n", "0.166 | \n", "0.286 | \n", "0.542 | \n", "... | \n", "0.367 | \n", "0.362 | \n", "0.516 | \n", "0.497 | \n", "0.208 | \n", "0.202 | \n", "0.2195 | \n", "0.2510 | \n", "0.230294 | \n", "0.250147 | \n", "
1 | \n", "C4 | \n", "1000 | \n", "0.355112 | \n", "0.229 | \n", "0.260 | \n", "0.286 | \n", "0.288 | \n", "0.128 | \n", "0.250 | \n", "0.614 | \n", "... | \n", "0.351 | \n", "0.404 | \n", "0.519 | \n", "0.476 | \n", "0.565 | \n", "0.518 | \n", "0.2680 | \n", "0.2935 | \n", "0.238951 | \n", "0.250399 | \n", "
2 | \n", "C4 | \n", "2000 | \n", "0.378435 | \n", "0.268 | \n", "0.278 | \n", "0.312 | \n", "0.330 | \n", "0.122 | \n", "0.276 | \n", "0.646 | \n", "... | \n", "0.375 | \n", "0.400 | \n", "0.509 | \n", "0.500 | \n", "0.676 | \n", "0.577 | \n", "0.3065 | \n", "0.3230 | \n", "0.247275 | \n", "0.255482 | \n", "
3 | \n", "C4 | \n", "3000 | \n", "0.387795 | \n", "0.280 | \n", "0.295 | \n", "0.331 | \n", "0.380 | \n", "0.152 | \n", "0.274 | \n", "0.660 | \n", "... | \n", "0.376 | \n", "0.387 | \n", "0.512 | \n", "0.496 | \n", "0.725 | \n", "0.621 | \n", "0.3175 | \n", "0.3340 | \n", "0.254534 | \n", "0.267363 | \n", "
4 | \n", "C4 | \n", "4000 | \n", "0.399320 | \n", "0.296 | \n", "0.298 | \n", "0.351 | \n", "0.406 | \n", "0.168 | \n", "0.282 | \n", "0.676 | \n", "... | \n", "0.382 | \n", "0.404 | \n", "0.522 | \n", "0.503 | \n", "0.723 | \n", "0.618 | \n", "0.3255 | \n", "0.3470 | \n", "0.254762 | \n", "0.263563 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
1171 | \n", "The Pile | \n", "163000 | \n", "0.463789 | \n", "0.379 | \n", "0.349 | \n", "0.441 | \n", "0.555 | \n", "0.240 | \n", "0.366 | \n", "0.701 | \n", "... | \n", "0.405 | \n", "0.388 | \n", "0.585 | \n", "0.560 | \n", "0.875 | \n", "0.820 | \n", "0.4475 | \n", "0.4450 | \n", "0.299378 | \n", "0.326313 | \n", "
1172 | \n", "The Pile | \n", "164000 | \n", "0.462758 | \n", "0.369 | \n", "0.344 | \n", "0.438 | \n", "0.552 | \n", "0.248 | \n", "0.348 | \n", "0.708 | \n", "... | \n", "0.395 | \n", "0.401 | \n", "0.577 | \n", "0.567 | \n", "0.874 | \n", "0.806 | \n", "0.4465 | \n", "0.4355 | \n", "0.302083 | \n", "0.331563 | \n", "
1173 | \n", "The Pile | \n", "165000 | \n", "0.465026 | \n", "0.383 | \n", "0.350 | \n", "0.438 | \n", "0.553 | \n", "0.234 | \n", "0.352 | \n", "0.707 | \n", "... | \n", "0.400 | \n", "0.401 | \n", "0.569 | \n", "0.556 | \n", "0.874 | \n", "0.811 | \n", "0.4460 | \n", "0.4455 | \n", "0.305193 | \n", "0.331708 | \n", "
1174 | \n", "The Pile | \n", "166000 | \n", "0.462349 | \n", "0.377 | \n", "0.346 | \n", "0.440 | \n", "0.557 | \n", "0.228 | \n", "0.346 | \n", "0.711 | \n", "... | \n", "0.398 | \n", "0.398 | \n", "0.572 | \n", "0.558 | \n", "0.877 | \n", "0.811 | \n", "0.4525 | \n", "0.4385 | \n", "0.301952 | \n", "0.331295 | \n", "
1175 | \n", "The Pile | \n", "167000 | \n", "0.464539 | \n", "0.386 | \n", "0.354 | \n", "0.434 | \n", "0.557 | \n", "0.232 | \n", "0.356 | \n", "0.706 | \n", "... | \n", "0.402 | \n", "0.402 | \n", "0.573 | \n", "0.559 | \n", "0.867 | \n", "0.802 | \n", "0.4475 | \n", "0.4375 | \n", "0.301934 | \n", "0.330810 | \n", "
1176 rows × 21 columns
\n", "\n", " | runname | \n", "seed | \n", "steps | \n", "agg_score | \n", "commonsense_qa/acc | \n", "commonsense_qa/acc_norm | \n", "hellaswag/acc | \n", "hellaswag/acc_norm | \n", "openbookqa/acc | \n", "openbookqa/acc_norm | \n", "... | \n", "siqa/acc | \n", "siqa/acc_norm | \n", "winogrande/acc | \n", "winogrande/acc_norm | \n", "all/acc | \n", "all/acc_norm | \n", "arc/acc | \n", "arc/acc_norm | \n", "mmlu/acc | \n", "mmlu/acc_norm | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "FineWeb (FW) | \n", "0 | \n", "4000 | \n", "0.389983 | \n", "0.275 | \n", "0.281 | \n", "0.352 | \n", "0.383 | \n", "0.152 | \n", "0.286 | \n", "... | \n", "0.365 | \n", "0.385 | \n", "0.505 | \n", "0.493 | \n", "0.265054 | \n", "0.281046 | \n", "0.3265 | \n", "0.3435 | \n", "0.250500 | \n", "0.264368 | \n", "
0 | \n", "FineWeb (FW) | \n", "0 | \n", "5000 | \n", "0.397987 | \n", "0.303 | \n", "0.297 | \n", "0.349 | \n", "0.397 | \n", "0.154 | \n", "0.290 | \n", "... | \n", "0.375 | \n", "0.383 | \n", "0.509 | \n", "0.502 | \n", "0.268548 | \n", "0.282678 | \n", "0.3340 | \n", "0.3560 | \n", "0.253134 | \n", "0.264896 | \n", "
0 | \n", "FineWeb (FW) | \n", "0 | \n", "6000 | \n", "0.403954 | \n", "0.317 | \n", "0.319 | \n", "0.359 | \n", "0.416 | \n", "0.166 | \n", "0.284 | \n", "... | \n", "0.379 | \n", "0.400 | \n", "0.516 | \n", "0.490 | \n", "0.268197 | \n", "0.286678 | \n", "0.3330 | \n", "0.3590 | \n", "0.252102 | \n", "0.268633 | \n", "
0 | \n", "FineWeb (FW) | \n", "0 | \n", "7000 | \n", "0.404859 | \n", "0.298 | \n", "0.310 | \n", "0.367 | \n", "0.424 | \n", "0.176 | \n", "0.290 | \n", "... | \n", "0.382 | \n", "0.396 | \n", "0.511 | \n", "0.494 | \n", "0.271701 | \n", "0.289459 | \n", "0.3250 | \n", "0.3510 | \n", "0.256203 | \n", "0.271874 | \n", "
0 | \n", "FineWeb (FW) | \n", "0 | \n", "8000 | \n", "0.403283 | \n", "0.330 | \n", "0.319 | \n", "0.364 | \n", "0.412 | \n", "0.176 | \n", "0.276 | \n", "... | \n", "0.383 | \n", "0.403 | \n", "0.510 | \n", "0.493 | \n", "0.267533 | \n", "0.287018 | \n", "0.3295 | \n", "0.3510 | \n", "0.251046 | \n", "0.269266 | \n", "
5 rows × 22 columns
\n", "