{ "cells": [ { "cell_type": "code", "execution_count": 23, "id": "138889b92720ce2e", "metadata": { "ExecuteTime": { "end_time": "2024-04-30T13:28:07.130909Z", "start_time": "2024-04-30T13:28:06.470042Z" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | runname | \n", "seed | \n", "steps | \n", "agg_score | \n", "commonsense_qa/acc | \n", "commonsense_qa/acc_norm | \n", "hellaswag/acc | \n", "hellaswag/acc_norm | \n", "openbookqa/acc | \n", "openbookqa/acc_norm | \n", "... | \n", "siqa/acc | \n", "siqa/acc_norm | \n", "winogrande/acc | \n", "winogrande/acc_norm | \n", "sciq/acc | \n", "sciq/acc_norm | \n", "arc/acc | \n", "arc/acc_norm | \n", "mmlu/acc | \n", "mmlu/acc_norm | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "deduped_removed_cross | \n", "5 | \n", "0 | \n", "0.330893 | \n", "0.186 | \n", "0.233 | \n", "0.272 | \n", "0.258 | \n", "0.166 | \n", "0.286 | \n", "... | \n", "0.367 | \n", "0.362 | \n", "0.516 | \n", "0.497 | \n", "0.208 | \n", "0.202 | \n", "0.2195 | \n", "0.2510 | \n", "0.230294 | \n", "0.250147 | \n", "
1 | \n", "deduped_removed_cross | \n", "5 | \n", "1000 | \n", "0.354090 | \n", "0.253 | \n", "0.257 | \n", "0.290 | \n", "0.278 | \n", "0.124 | \n", "0.264 | \n", "... | \n", "0.368 | \n", "0.389 | \n", "0.509 | \n", "0.491 | \n", "0.582 | \n", "0.516 | \n", "0.2825 | \n", "0.2955 | \n", "0.239520 | \n", "0.253223 | \n", "
2 | \n", "deduped_removed_cross | \n", "5 | \n", "2000 | \n", "0.373601 | \n", "0.274 | \n", "0.290 | \n", "0.313 | \n", "0.312 | \n", "0.116 | \n", "0.258 | \n", "... | \n", "0.367 | \n", "0.397 | \n", "0.516 | \n", "0.505 | \n", "0.686 | \n", "0.582 | \n", "0.3090 | \n", "0.3200 | \n", "0.247320 | \n", "0.262812 | \n", "
3 | \n", "deduped_removed_cross | \n", "5 | \n", "3000 | \n", "0.383122 | \n", "0.306 | \n", "0.292 | \n", "0.323 | \n", "0.335 | \n", "0.150 | \n", "0.278 | \n", "... | \n", "0.371 | \n", "0.401 | \n", "0.513 | \n", "0.500 | \n", "0.712 | \n", "0.611 | \n", "0.3075 | \n", "0.3415 | \n", "0.248568 | \n", "0.263474 | \n", "
4 | \n", "deduped_removed_cross | \n", "5 | \n", "4000 | \n", "0.390222 | \n", "0.300 | \n", "0.292 | \n", "0.324 | \n", "0.351 | \n", "0.144 | \n", "0.278 | \n", "... | \n", "0.386 | \n", "0.395 | \n", "0.511 | \n", "0.511 | \n", "0.750 | \n", "0.658 | \n", "0.3260 | \n", "0.3445 | \n", "0.259246 | \n", "0.273276 | \n", "
5 | \n", "deduped_removed_cross | \n", "5 | \n", "5000 | \n", "0.400239 | \n", "0.322 | \n", "0.308 | \n", "0.325 | \n", "0.364 | \n", "0.172 | \n", "0.298 | \n", "... | \n", "0.382 | \n", "0.398 | \n", "0.518 | \n", "0.522 | \n", "0.751 | \n", "0.661 | \n", "0.3470 | \n", "0.3545 | \n", "0.258485 | \n", "0.271414 | \n", "
6 | \n", "deduped_removed_cross | \n", "5 | \n", "6000 | \n", "0.401484 | \n", "0.315 | \n", "0.314 | \n", "0.341 | \n", "0.372 | \n", "0.162 | \n", "0.314 | \n", "... | \n", "0.377 | \n", "0.390 | \n", "0.498 | \n", "0.492 | \n", "0.776 | \n", "0.669 | \n", "0.3530 | \n", "0.3565 | \n", "0.261842 | \n", "0.276371 | \n", "
7 | \n", "deduped_removed_cross | \n", "5 | \n", "7000 | \n", "0.403533 | \n", "0.324 | \n", "0.315 | \n", "0.350 | \n", "0.386 | \n", "0.188 | \n", "0.298 | \n", "... | \n", "0.376 | \n", "0.384 | \n", "0.518 | \n", "0.521 | \n", "0.769 | \n", "0.672 | \n", "0.3625 | \n", "0.3585 | \n", "0.265558 | \n", "0.274768 | \n", "
8 | \n", "deduped_removed_cross | \n", "5 | \n", "8000 | \n", "0.411774 | \n", "0.344 | \n", "0.313 | \n", "0.352 | \n", "0.409 | \n", "0.170 | \n", "0.310 | \n", "... | \n", "0.374 | \n", "0.391 | \n", "0.530 | \n", "0.521 | \n", "0.781 | \n", "0.677 | \n", "0.3530 | \n", "0.3615 | \n", "0.267141 | \n", "0.283691 | \n", "
9 | \n", "deduped_removed_cross | \n", "5 | \n", "9000 | \n", "0.410993 | \n", "0.335 | \n", "0.322 | \n", "0.361 | \n", "0.404 | \n", "0.182 | \n", "0.294 | \n", "... | \n", "0.374 | \n", "0.391 | \n", "0.526 | \n", "0.514 | \n", "0.769 | \n", "0.672 | \n", "0.3630 | \n", "0.3715 | \n", "0.266464 | \n", "0.284446 | \n", "
10 | \n", "deduped_removed_cross | \n", "5 | \n", "10000 | \n", "0.417883 | \n", "0.330 | \n", "0.320 | \n", "0.370 | \n", "0.417 | \n", "0.192 | \n", "0.324 | \n", "... | \n", "0.389 | \n", "0.389 | \n", "0.518 | \n", "0.524 | \n", "0.785 | \n", "0.682 | \n", "0.3735 | \n", "0.3745 | \n", "0.268085 | \n", "0.283562 | \n", "
11 | \n", "deduped_removed_cross | \n", "5 | \n", "11000 | \n", "0.422325 | \n", "0.332 | \n", "0.328 | \n", "0.366 | \n", "0.426 | \n", "0.188 | \n", "0.320 | \n", "... | \n", "0.398 | \n", "0.397 | \n", "0.535 | \n", "0.529 | \n", "0.801 | \n", "0.695 | \n", "0.3775 | \n", "0.3800 | \n", "0.267457 | \n", "0.285596 | \n", "
12 | \n", "deduped_removed_cross | \n", "5 | \n", "12000 | \n", "0.420167 | \n", "0.348 | \n", "0.324 | \n", "0.364 | \n", "0.434 | \n", "0.194 | \n", "0.306 | \n", "... | \n", "0.377 | \n", "0.392 | \n", "0.541 | \n", "0.527 | \n", "0.790 | \n", "0.690 | \n", "0.3680 | \n", "0.3755 | \n", "0.267547 | \n", "0.285836 | \n", "
13 | \n", "deduped_removed_cross | \n", "5 | \n", "13000 | \n", "0.422913 | \n", "0.346 | \n", "0.330 | \n", "0.372 | \n", "0.438 | \n", "0.190 | \n", "0.320 | \n", "... | \n", "0.392 | \n", "0.396 | \n", "0.540 | \n", "0.522 | \n", "0.802 | \n", "0.707 | \n", "0.3760 | \n", "0.3845 | \n", "0.271108 | \n", "0.287802 | \n", "
14 | \n", "deduped_removed_cross | \n", "5 | \n", "13500 | \n", "0.421868 | \n", "0.345 | \n", "0.322 | \n", "0.370 | \n", "0.431 | \n", "0.202 | \n", "0.330 | \n", "... | \n", "0.387 | \n", "0.392 | \n", "0.540 | \n", "0.516 | \n", "0.797 | \n", "0.700 | \n", "0.3790 | \n", "0.3870 | \n", "0.269510 | \n", "0.287944 | \n", "
15 | \n", "deduped_removed_cross | \n", "6 | \n", "0 | \n", "0.330893 | \n", "0.186 | \n", "0.233 | \n", "0.272 | \n", "0.258 | \n", "0.166 | \n", "0.286 | \n", "... | \n", "0.367 | \n", "0.362 | \n", "0.516 | \n", "0.497 | \n", "0.208 | \n", "0.202 | \n", "0.2195 | \n", "0.2510 | \n", "0.230294 | \n", "0.250147 | \n", "
16 | \n", "deduped_removed_cross | \n", "6 | \n", "1000 | \n", "0.360039 | \n", "0.236 | \n", "0.259 | \n", "0.283 | \n", "0.277 | \n", "0.130 | \n", "0.274 | \n", "... | \n", "0.354 | \n", "0.386 | \n", "0.509 | \n", "0.507 | \n", "0.559 | \n", "0.500 | \n", "0.2590 | \n", "0.2970 | \n", "0.243455 | \n", "0.254311 | \n", "
17 | \n", "deduped_removed_cross | \n", "6 | \n", "2000 | \n", "0.371564 | \n", "0.270 | \n", "0.283 | \n", "0.303 | \n", "0.305 | \n", "0.132 | \n", "0.280 | \n", "... | \n", "0.377 | \n", "0.392 | \n", "0.522 | \n", "0.504 | \n", "0.665 | \n", "0.566 | \n", "0.3040 | \n", "0.3135 | \n", "0.249051 | \n", "0.255010 | \n", "
18 | \n", "deduped_removed_cross | \n", "6 | \n", "3000 | \n", "0.383770 | \n", "0.283 | \n", "0.286 | \n", "0.323 | \n", "0.320 | \n", "0.156 | \n", "0.296 | \n", "... | \n", "0.375 | \n", "0.394 | \n", "0.503 | \n", "0.497 | \n", "0.721 | \n", "0.626 | \n", "0.3140 | \n", "0.3410 | \n", "0.254015 | \n", "0.266158 | \n", "
19 | \n", "deduped_removed_cross | \n", "6 | \n", "4000 | \n", "0.391082 | \n", "0.293 | \n", "0.298 | \n", "0.339 | \n", "0.361 | \n", "0.160 | \n", "0.292 | \n", "... | \n", "0.380 | \n", "0.399 | \n", "0.505 | \n", "0.494 | \n", "0.719 | \n", "0.615 | \n", "0.3375 | \n", "0.3375 | \n", "0.256696 | \n", "0.268152 | \n", "
20 | \n", "deduped_removed_cross | \n", "6 | \n", "5000 | \n", "0.399130 | \n", "0.309 | \n", "0.311 | \n", "0.343 | \n", "0.376 | \n", "0.160 | \n", "0.286 | \n", "... | \n", "0.392 | \n", "0.401 | \n", "0.525 | \n", "0.512 | \n", "0.733 | \n", "0.639 | \n", "0.3390 | \n", "0.3580 | \n", "0.257450 | \n", "0.271040 | \n", "
21 | \n", "deduped_removed_cross | \n", "6 | \n", "6000 | \n", "0.402792 | \n", "0.326 | \n", "0.318 | \n", "0.353 | \n", "0.387 | \n", "0.176 | \n", "0.284 | \n", "... | \n", "0.376 | \n", "0.405 | \n", "0.522 | \n", "0.514 | \n", "0.753 | \n", "0.664 | \n", "0.3450 | \n", "0.3645 | \n", "0.262549 | \n", "0.273836 | \n", "
22 | \n", "deduped_removed_cross | \n", "6 | \n", "7000 | \n", "0.408846 | \n", "0.319 | \n", "0.319 | \n", "0.356 | \n", "0.407 | \n", "0.172 | \n", "0.300 | \n", "... | \n", "0.386 | \n", "0.399 | \n", "0.521 | \n", "0.521 | \n", "0.764 | \n", "0.662 | \n", "0.3585 | \n", "0.3625 | \n", "0.262740 | \n", "0.276266 | \n", "
23 | \n", "deduped_removed_cross | \n", "6 | \n", "8000 | \n", "0.411429 | \n", "0.314 | \n", "0.323 | \n", "0.361 | \n", "0.412 | \n", "0.168 | \n", "0.286 | \n", "... | \n", "0.395 | \n", "0.404 | \n", "0.533 | \n", "0.511 | \n", "0.754 | \n", "0.646 | \n", "0.3555 | \n", "0.3690 | \n", "0.263875 | \n", "0.278433 | \n", "
24 | \n", "deduped_removed_cross | \n", "6 | \n", "9000 | \n", "0.417279 | \n", "0.337 | \n", "0.329 | \n", "0.367 | \n", "0.421 | \n", "0.176 | \n", "0.294 | \n", "... | \n", "0.407 | \n", "0.403 | \n", "0.532 | \n", "0.526 | \n", "0.775 | \n", "0.666 | \n", "0.3605 | \n", "0.3730 | \n", "0.265119 | \n", "0.283235 | \n", "
25 | \n", "deduped_removed_cross | \n", "6 | \n", "10000 | \n", "0.421399 | \n", "0.339 | \n", "0.322 | \n", "0.376 | \n", "0.426 | \n", "0.174 | \n", "0.320 | \n", "... | \n", "0.397 | \n", "0.401 | \n", "0.542 | \n", "0.532 | \n", "0.764 | \n", "0.673 | \n", "0.3675 | \n", "0.3840 | \n", "0.272474 | \n", "0.286190 | \n", "
26 | \n", "deduped_removed_cross | \n", "6 | \n", "11000 | \n", "0.421204 | \n", "0.349 | \n", "0.337 | \n", "0.378 | \n", "0.428 | \n", "0.188 | \n", "0.314 | \n", "... | \n", "0.403 | \n", "0.398 | \n", "0.530 | \n", "0.516 | \n", "NaN | \n", "NaN | \n", "0.3690 | \n", "0.3780 | \n", "0.269131 | \n", "0.288633 | \n", "
27 | \n", "deduped_removed_cross | \n", "6 | \n", "12000 | \n", "0.421667 | \n", "0.342 | \n", "0.326 | \n", "0.383 | \n", "0.434 | \n", "0.174 | \n", "0.310 | \n", "... | \n", "0.399 | \n", "0.396 | \n", "0.538 | \n", "0.525 | \n", "NaN | \n", "NaN | \n", "0.3660 | \n", "0.3810 | \n", "0.270691 | \n", "0.287333 | \n", "
28 | \n", "deduped_removed_cross | \n", "6 | \n", "13000 | \n", "0.424979 | \n", "0.349 | \n", "0.336 | \n", "0.383 | \n", "0.440 | \n", "0.178 | \n", "0.314 | \n", "... | \n", "0.401 | \n", "0.392 | \n", "0.535 | \n", "0.526 | \n", "NaN | \n", "NaN | \n", "0.3785 | \n", "0.3905 | \n", "0.268910 | \n", "0.289335 | \n", "
29 | \n", "deduped_removed_cross | \n", "6 | \n", "13500 | \n", "0.425356 | \n", "0.347 | \n", "0.333 | \n", "0.386 | \n", "0.444 | \n", "0.186 | \n", "0.322 | \n", "... | \n", "0.406 | \n", "0.392 | \n", "0.543 | \n", "0.527 | \n", "0.783 | \n", "0.682 | \n", "0.3745 | \n", "0.3890 | \n", "0.270869 | \n", "0.289845 | \n", "
30 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "0 | \n", "0.331018 | \n", "0.186 | \n", "0.233 | \n", "0.272 | \n", "0.258 | \n", "0.166 | \n", "0.286 | \n", "... | \n", "0.367 | \n", "0.362 | \n", "0.515 | \n", "0.497 | \n", "NaN | \n", "NaN | \n", "0.2195 | \n", "0.2520 | \n", "0.230228 | \n", "0.250147 | \n", "
31 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "1000 | \n", "0.349494 | \n", "0.217 | \n", "0.248 | \n", "0.288 | \n", "0.286 | \n", "0.104 | \n", "0.244 | \n", "... | \n", "0.366 | \n", "0.380 | \n", "0.499 | \n", "0.492 | \n", "0.546 | \n", "0.484 | \n", "0.2565 | \n", "0.2780 | \n", "0.239651 | \n", "0.253956 | \n", "
32 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "2000 | \n", "0.367893 | \n", "0.245 | \n", "0.280 | \n", "0.298 | \n", "0.288 | \n", "0.128 | \n", "0.280 | \n", "... | \n", "0.366 | \n", "0.383 | \n", "0.519 | \n", "0.499 | \n", "NaN | \n", "NaN | \n", "0.2845 | \n", "0.3115 | \n", "0.239715 | \n", "0.253644 | \n", "
33 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "3000 | \n", "0.379114 | \n", "0.269 | \n", "0.291 | \n", "0.304 | \n", "0.328 | \n", "0.138 | \n", "0.266 | \n", "... | \n", "0.362 | \n", "0.394 | \n", "0.519 | \n", "0.504 | \n", "NaN | \n", "NaN | \n", "0.3035 | \n", "0.3335 | \n", "0.250551 | \n", "0.262409 | \n", "
34 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "4000 | \n", "0.383025 | \n", "0.277 | \n", "0.289 | \n", "0.311 | \n", "0.338 | \n", "0.132 | \n", "0.280 | \n", "... | \n", "0.361 | \n", "0.393 | \n", "0.502 | \n", "0.496 | \n", "NaN | \n", "NaN | \n", "0.3105 | \n", "0.3375 | \n", "0.249887 | \n", "0.263702 | \n", "
35 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "5000 | \n", "0.387223 | \n", "0.290 | \n", "0.306 | \n", "0.327 | \n", "0.356 | \n", "0.138 | \n", "0.276 | \n", "... | \n", "0.365 | \n", "0.389 | \n", "0.515 | \n", "0.511 | \n", "NaN | \n", "NaN | \n", "0.3190 | \n", "0.3380 | \n", "0.252621 | \n", "0.266785 | \n", "
36 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "6000 | \n", "0.394011 | \n", "0.303 | \n", "0.305 | \n", "0.332 | \n", "0.356 | \n", "0.142 | \n", "0.288 | \n", "... | \n", "0.375 | \n", "0.397 | \n", "0.540 | \n", "0.521 | \n", "NaN | \n", "NaN | \n", "0.3280 | \n", "0.3515 | \n", "0.252255 | \n", "0.265589 | \n", "
37 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "7000 | \n", "0.398090 | \n", "0.316 | \n", "0.305 | \n", "0.337 | \n", "0.359 | \n", "0.142 | \n", "0.302 | \n", "... | \n", "0.372 | \n", "0.401 | \n", "0.531 | \n", "0.510 | \n", "NaN | \n", "NaN | \n", "0.3320 | \n", "0.3550 | \n", "0.250146 | \n", "0.267719 | \n", "
38 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "8000 | \n", "0.398513 | \n", "0.326 | \n", "0.315 | \n", "0.339 | \n", "0.372 | \n", "0.150 | \n", "0.288 | \n", "... | \n", "0.372 | \n", "0.396 | \n", "0.532 | \n", "0.508 | \n", "NaN | \n", "NaN | \n", "0.3365 | \n", "0.3630 | \n", "0.258433 | \n", "0.274100 | \n", "
39 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "9000 | \n", "0.397494 | \n", "0.310 | \n", "0.314 | \n", "0.345 | \n", "0.374 | \n", "0.140 | \n", "0.274 | \n", "... | \n", "0.364 | \n", "0.392 | \n", "0.529 | \n", "0.506 | \n", "NaN | \n", "NaN | \n", "0.3445 | \n", "0.3610 | \n", "0.258927 | \n", "0.271955 | \n", "
40 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "10000 | \n", "0.402640 | \n", "0.321 | \n", "0.327 | \n", "0.347 | \n", "0.383 | \n", "0.156 | \n", "0.280 | \n", "... | \n", "0.376 | \n", "0.397 | \n", "0.529 | \n", "0.513 | \n", "NaN | \n", "NaN | \n", "0.3445 | \n", "0.3650 | \n", "0.258294 | \n", "0.272123 | \n", "
41 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "11000 | \n", "0.402599 | \n", "0.318 | \n", "0.322 | \n", "0.348 | \n", "0.381 | \n", "0.160 | \n", "0.284 | \n", "... | \n", "0.367 | \n", "0.387 | \n", "0.538 | \n", "0.516 | \n", "NaN | \n", "NaN | \n", "0.3490 | \n", "0.3660 | \n", "0.259610 | \n", "0.276792 | \n", "
42 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "12000 | \n", "0.407442 | \n", "0.328 | \n", "0.319 | \n", "0.349 | \n", "0.395 | \n", "0.162 | \n", "0.290 | \n", "... | \n", "0.367 | \n", "0.407 | \n", "0.528 | \n", "0.510 | \n", "NaN | \n", "NaN | \n", "0.3510 | \n", "0.3700 | \n", "0.260350 | \n", "0.279535 | \n", "
43 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "13000 | \n", "0.405577 | \n", "0.324 | \n", "0.318 | \n", "0.350 | \n", "0.385 | \n", "0.158 | \n", "0.290 | \n", "... | \n", "0.373 | \n", "0.396 | \n", "0.538 | \n", "0.510 | \n", "NaN | \n", "NaN | \n", "0.3540 | \n", "0.3730 | \n", "0.258481 | \n", "0.274616 | \n", "
44 | \n", "cross_minhash_dump_CC-MAIN-2013-48 | \n", "6 | \n", "13500 | \n", "0.405000 | \n", "0.320 | \n", "0.312 | \n", "0.354 | \n", "0.393 | \n", "0.152 | \n", "0.288 | \n", "... | \n", "0.367 | \n", "0.396 | \n", "0.528 | \n", "0.513 | \n", "0.785 | \n", "0.675 | \n", "0.3590 | \n", "0.3660 | \n", "0.260174 | \n", "0.278002 | \n", "
45 rows × 22 columns
\n", "