{ "cells": [ { "cell_type": "code", "execution_count": 19, "id": "138889b92720ce2e", "metadata": { "ExecuteTime": { "end_time": "2024-04-30T15:08:02.398435Z", "start_time": "2024-04-30T15:08:02.194901Z" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | runname | \n", "seed | \n", "steps | \n", "agg_score | \n", "commonsense_qa/acc | \n", "commonsense_qa/acc_norm | \n", "hellaswag/acc | \n", "hellaswag/acc_norm | \n", "openbookqa/acc | \n", "openbookqa/acc_norm | \n", "... | \n", "siqa/acc | \n", "siqa/acc_norm | \n", "winogrande/acc | \n", "winogrande/acc_norm | \n", "sciq/acc | \n", "sciq/acc_norm | \n", "arc/acc | \n", "arc/acc_norm | \n", "mmlu/acc | \n", "mmlu/acc_norm | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "big-run-sampled_full_filtered_no_dedup | \n", "6 | \n", "0 | \n", "0.330893 | \n", "0.186 | \n", "0.233 | \n", "0.272 | \n", "0.258 | \n", "0.166 | \n", "0.286 | \n", "... | \n", "0.367 | \n", "0.362 | \n", "0.516 | \n", "0.497 | \n", "0.209 | \n", "0.202 | \n", "0.2195 | \n", "0.2510 | \n", "0.230294 | \n", "0.250147 | \n", "
1 | \n", "big-run-sampled_full_filtered_no_dedup | \n", "6 | \n", "1000 | \n", "0.360520 | \n", "0.254 | \n", "0.260 | \n", "0.290 | \n", "0.281 | \n", "0.138 | \n", "0.256 | \n", "... | \n", "0.362 | \n", "0.400 | \n", "0.517 | \n", "0.524 | \n", "0.573 | \n", "0.515 | \n", "0.2675 | \n", "0.2895 | \n", "0.239489 | \n", "0.251660 | \n", "
2 | \n", "big-run-sampled_full_filtered_no_dedup | \n", "6 | \n", "2000 | \n", "0.373315 | \n", "0.285 | \n", "0.278 | \n", "0.315 | \n", "0.323 | \n", "0.138 | \n", "0.272 | \n", "... | \n", "0.365 | \n", "0.395 | \n", "0.509 | \n", "0.490 | \n", "0.677 | \n", "0.596 | \n", "0.3075 | \n", "0.3235 | \n", "0.250318 | \n", "0.261019 | \n", "
3 | \n", "big-run-sampled_full_filtered_no_dedup | \n", "6 | \n", "3000 | \n", "0.388201 | \n", "0.294 | \n", "0.291 | \n", "0.327 | \n", "0.341 | \n", "0.152 | \n", "0.298 | \n", "... | \n", "0.371 | \n", "0.396 | \n", "0.512 | \n", "0.504 | \n", "0.712 | \n", "0.621 | \n", "0.3220 | \n", "0.3390 | \n", "0.255646 | \n", "0.266605 | \n", "
4 | \n", "big-run-sampled_full_filtered_no_dedup | \n", "6 | \n", "4000 | \n", "0.393412 | \n", "0.306 | \n", "0.307 | \n", "0.337 | \n", "0.360 | \n", "0.172 | \n", "0.284 | \n", "... | \n", "0.380 | \n", "0.402 | \n", "0.522 | \n", "0.510 | \n", "0.729 | \n", "0.612 | \n", "0.3100 | \n", "0.3385 | \n", "0.253048 | \n", "0.266798 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
670 | \n", "big-run-sampled_full_ind_minhash | \n", "6 | \n", "163000 | \n", "0.481842 | \n", "0.427 | \n", "0.393 | \n", "0.488 | \n", "0.579 | \n", "0.242 | \n", "0.358 | \n", "... | \n", "0.420 | \n", "0.397 | \n", "0.587 | \n", "0.568 | \n", "0.885 | \n", "0.809 | \n", "0.4760 | \n", "0.4595 | \n", "0.305843 | \n", "0.330238 | \n", "
671 | \n", "big-run-sampled_full_ind_minhash | \n", "6 | \n", "164000 | \n", "0.482727 | \n", "0.426 | \n", "0.394 | \n", "0.487 | \n", "0.582 | \n", "0.238 | \n", "0.360 | \n", "... | \n", "0.422 | \n", "0.398 | \n", "0.575 | \n", "0.562 | \n", "0.885 | \n", "0.827 | \n", "0.4745 | \n", "0.4625 | \n", "0.307377 | \n", "0.332317 | \n", "
672 | \n", "big-run-sampled_full_ind_minhash | \n", "6 | \n", "165000 | \n", "0.482413 | \n", "0.423 | \n", "0.397 | \n", "0.482 | \n", "0.573 | \n", "0.238 | \n", "0.360 | \n", "... | \n", "0.409 | \n", "0.396 | \n", "0.581 | \n", "0.569 | \n", "0.889 | \n", "0.829 | \n", "0.4675 | \n", "0.4600 | \n", "0.308059 | \n", "0.331304 | \n", "
673 | \n", "big-run-sampled_full_ind_minhash | \n", "6 | \n", "166000 | \n", "0.482014 | \n", "0.422 | \n", "0.391 | \n", "0.477 | \n", "0.573 | \n", "0.230 | \n", "0.358 | \n", "... | \n", "0.420 | \n", "0.400 | \n", "0.586 | \n", "0.566 | \n", "0.883 | \n", "0.817 | \n", "0.4660 | \n", "0.4645 | \n", "0.304975 | \n", "0.329611 | \n", "
674 | \n", "big-run-sampled_full_ind_minhash | \n", "6 | \n", "167000 | \n", "0.486587 | \n", "0.424 | \n", "0.402 | \n", "0.490 | \n", "0.579 | \n", "0.236 | \n", "0.360 | \n", "... | \n", "0.417 | \n", "0.405 | \n", "0.585 | \n", "0.575 | \n", "0.884 | \n", "0.832 | \n", "0.4760 | \n", "0.4715 | \n", "0.309503 | \n", "0.332197 | \n", "
675 rows × 22 columns
\n", "