{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "138889b92720ce2e", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T14:36:31.336129Z", "start_time": "2024-05-13T14:36:31.323847Z" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | runname | \n", "seed | \n", "steps | \n", "agg_score | \n", "commonsense_qa/acc | \n", "commonsense_qa/acc_norm | \n", "hellaswag/acc | \n", "hellaswag/acc_norm | \n", "openbookqa/acc | \n", "openbookqa/acc_norm | \n", "... | \n", "siqa/acc | \n", "siqa/acc_norm | \n", "winogrande/acc | \n", "winogrande/acc_norm | \n", "sciq/acc | \n", "sciq/acc_norm | \n", "arc/acc | \n", "arc/acc_norm | \n", "mmlu/acc | \n", "mmlu/acc_norm | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "filtering-baseline-2019-18-40gt | \n", "5 | \n", "0 | \n", "0.330953 | \n", "0.186 | \n", "0.233 | \n", "0.272 | \n", "0.258 | \n", "0.166 | \n", "0.286 | \n", "... | \n", "0.367 | \n", "0.362 | \n", "0.516 | \n", "0.497 | \n", "0.210 | \n", "0.202 | \n", "0.2190 | \n", "0.2515 | \n", "0.230285 | \n", "0.250127 | \n", "
1 | \n", "filtering-baseline-2019-18-40gt | \n", "5 | \n", "1000 | \n", "0.357474 | \n", "0.239 | \n", "0.271 | \n", "0.297 | \n", "0.287 | \n", "0.146 | \n", "0.260 | \n", "... | \n", "0.365 | \n", "0.396 | \n", "0.503 | \n", "0.486 | \n", "0.568 | \n", "0.502 | \n", "0.2665 | \n", "0.2855 | \n", "0.242526 | \n", "0.253291 | \n", "
2 | \n", "filtering-baseline-2019-18-40gt | \n", "5 | \n", "2000 | \n", "0.377436 | \n", "0.280 | \n", "0.284 | \n", "0.321 | \n", "0.332 | \n", "0.134 | \n", "0.268 | \n", "... | \n", "0.368 | \n", "0.399 | \n", "0.519 | \n", "0.502 | \n", "0.686 | \n", "0.590 | \n", "0.3030 | \n", "0.3215 | \n", "0.245745 | \n", "0.260988 | \n", "
3 | \n", "filtering-baseline-2019-18-40gt | \n", "5 | \n", "3000 | \n", "0.387994 | \n", "0.277 | \n", "0.291 | \n", "0.339 | \n", "0.359 | \n", "0.132 | \n", "0.280 | \n", "... | \n", "0.394 | \n", "0.404 | \n", "0.520 | \n", "0.503 | \n", "0.721 | \n", "0.622 | \n", "0.3210 | \n", "0.3385 | \n", "0.250427 | \n", "0.264451 | \n", "
4 | \n", "filtering-baseline-2019-18-40gt | \n", "5 | \n", "4000 | \n", "0.396110 | \n", "0.299 | \n", "0.315 | \n", "0.340 | \n", "0.366 | \n", "0.158 | \n", "0.286 | \n", "... | \n", "0.376 | \n", "0.399 | \n", "0.515 | \n", "0.500 | \n", "0.739 | \n", "0.620 | \n", "0.3320 | \n", "0.3445 | \n", "0.256134 | \n", "0.270382 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
250 | \n", "sm-baseline-c4 | \n", "6 | \n", "10000 | \n", "0.430443 | \n", "0.335 | \n", "0.326 | \n", "0.379 | \n", "0.474 | \n", "0.176 | \n", "0.340 | \n", "... | \n", "0.385 | \n", "0.406 | \n", "0.525 | \n", "0.523 | \n", "0.767 | \n", "0.675 | \n", "0.3765 | \n", "0.3750 | \n", "0.269139 | \n", "0.280545 | \n", "
251 | \n", "sm-baseline-c4 | \n", "6 | \n", "11000 | \n", "0.430776 | \n", "0.341 | \n", "0.323 | \n", "0.391 | \n", "0.481 | \n", "0.192 | \n", "0.346 | \n", "... | \n", "0.390 | \n", "0.405 | \n", "0.531 | \n", "0.515 | \n", "0.766 | \n", "0.676 | \n", "0.3775 | \n", "0.3770 | \n", "0.266895 | \n", "0.281210 | \n", "
252 | \n", "sm-baseline-c4 | \n", "6 | \n", "12000 | \n", "0.430352 | \n", "0.340 | \n", "0.319 | \n", "0.392 | \n", "0.475 | \n", "0.192 | \n", "0.342 | \n", "... | \n", "0.377 | \n", "0.395 | \n", "0.528 | \n", "0.518 | \n", "0.785 | \n", "0.688 | \n", "0.3755 | \n", "0.3840 | \n", "0.267159 | \n", "0.279819 | \n", "
253 | \n", "sm-baseline-c4 | \n", "6 | \n", "13000 | \n", "0.432136 | \n", "0.339 | \n", "0.326 | \n", "0.395 | \n", "0.477 | \n", "0.198 | \n", "0.348 | \n", "... | \n", "0.390 | \n", "0.405 | \n", "0.529 | \n", "0.518 | \n", "0.785 | \n", "0.682 | \n", "0.3780 | \n", "0.3825 | \n", "0.269719 | \n", "0.281585 | \n", "
254 | \n", "sm-baseline-c4 | \n", "6 | \n", "13500 | \n", "0.433866 | \n", "0.344 | \n", "0.328 | \n", "0.394 | \n", "0.484 | \n", "0.198 | \n", "0.334 | \n", "... | \n", "0.388 | \n", "0.406 | \n", "0.531 | \n", "0.523 | \n", "0.778 | \n", "0.682 | \n", "0.3795 | \n", "0.3845 | \n", "0.269601 | \n", "0.284425 | \n", "
255 rows × 22 columns
\n", "