{ "cells": [ { "cell_type": "code", "execution_count": 23, "id": "138889b92720ce2e", "metadata": { "ExecuteTime": { "end_time": "2024-04-30T13:28:07.130909Z", "start_time": "2024-04-30T13:28:06.470042Z" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
runnameseedstepsagg_scorecommonsense_qa/acccommonsense_qa/acc_normhellaswag/acchellaswag/acc_normopenbookqa/accopenbookqa/acc_norm...siqa/accsiqa/acc_normwinogrande/accwinogrande/acc_normsciq/accsciq/acc_normarc/accarc/acc_normmmlu/accmmlu/acc_norm
0deduped_removed_cross500.3308930.1860.2330.2720.2580.1660.286...0.3670.3620.5160.4970.2080.2020.21950.25100.2302940.250147
1deduped_removed_cross510000.3540900.2530.2570.2900.2780.1240.264...0.3680.3890.5090.4910.5820.5160.28250.29550.2395200.253223
2deduped_removed_cross520000.3736010.2740.2900.3130.3120.1160.258...0.3670.3970.5160.5050.6860.5820.30900.32000.2473200.262812
3deduped_removed_cross530000.3831220.3060.2920.3230.3350.1500.278...0.3710.4010.5130.5000.7120.6110.30750.34150.2485680.263474
4deduped_removed_cross540000.3902220.3000.2920.3240.3510.1440.278...0.3860.3950.5110.5110.7500.6580.32600.34450.2592460.273276
5deduped_removed_cross550000.4002390.3220.3080.3250.3640.1720.298...0.3820.3980.5180.5220.7510.6610.34700.35450.2584850.271414
6deduped_removed_cross560000.4014840.3150.3140.3410.3720.1620.314...0.3770.3900.4980.4920.7760.6690.35300.35650.2618420.276371
7deduped_removed_cross570000.4035330.3240.3150.3500.3860.1880.298...0.3760.3840.5180.5210.7690.6720.36250.35850.2655580.274768
8deduped_removed_cross580000.4117740.3440.3130.3520.4090.1700.310...0.3740.3910.5300.5210.7810.6770.35300.36150.2671410.283691
9deduped_removed_cross590000.4109930.3350.3220.3610.4040.1820.294...0.3740.3910.5260.5140.7690.6720.36300.37150.2664640.284446
10deduped_removed_cross5100000.4178830.3300.3200.3700.4170.1920.324...0.3890.3890.5180.5240.7850.6820.37350.37450.2680850.283562
11deduped_removed_cross5110000.4223250.3320.3280.3660.4260.1880.320...0.3980.3970.5350.5290.8010.6950.37750.38000.2674570.285596
12deduped_removed_cross5120000.4201670.3480.3240.3640.4340.1940.306...0.3770.3920.5410.5270.7900.6900.36800.37550.2675470.285836
13deduped_removed_cross5130000.4229130.3460.3300.3720.4380.1900.320...0.3920.3960.5400.5220.8020.7070.37600.38450.2711080.287802
14deduped_removed_cross5135000.4218680.3450.3220.3700.4310.2020.330...0.3870.3920.5400.5160.7970.7000.37900.38700.2695100.287944
15deduped_removed_cross600.3308930.1860.2330.2720.2580.1660.286...0.3670.3620.5160.4970.2080.2020.21950.25100.2302940.250147
16deduped_removed_cross610000.3600390.2360.2590.2830.2770.1300.274...0.3540.3860.5090.5070.5590.5000.25900.29700.2434550.254311
17deduped_removed_cross620000.3715640.2700.2830.3030.3050.1320.280...0.3770.3920.5220.5040.6650.5660.30400.31350.2490510.255010
18deduped_removed_cross630000.3837700.2830.2860.3230.3200.1560.296...0.3750.3940.5030.4970.7210.6260.31400.34100.2540150.266158
19deduped_removed_cross640000.3910820.2930.2980.3390.3610.1600.292...0.3800.3990.5050.4940.7190.6150.33750.33750.2566960.268152
20deduped_removed_cross650000.3991300.3090.3110.3430.3760.1600.286...0.3920.4010.5250.5120.7330.6390.33900.35800.2574500.271040
21deduped_removed_cross660000.4027920.3260.3180.3530.3870.1760.284...0.3760.4050.5220.5140.7530.6640.34500.36450.2625490.273836
22deduped_removed_cross670000.4088460.3190.3190.3560.4070.1720.300...0.3860.3990.5210.5210.7640.6620.35850.36250.2627400.276266
23deduped_removed_cross680000.4114290.3140.3230.3610.4120.1680.286...0.3950.4040.5330.5110.7540.6460.35550.36900.2638750.278433
24deduped_removed_cross690000.4172790.3370.3290.3670.4210.1760.294...0.4070.4030.5320.5260.7750.6660.36050.37300.2651190.283235
25deduped_removed_cross6100000.4213990.3390.3220.3760.4260.1740.320...0.3970.4010.5420.5320.7640.6730.36750.38400.2724740.286190
26deduped_removed_cross6110000.4212040.3490.3370.3780.4280.1880.314...0.4030.3980.5300.516NaNNaN0.36900.37800.2691310.288633
27deduped_removed_cross6120000.4216670.3420.3260.3830.4340.1740.310...0.3990.3960.5380.525NaNNaN0.36600.38100.2706910.287333
28deduped_removed_cross6130000.4249790.3490.3360.3830.4400.1780.314...0.4010.3920.5350.526NaNNaN0.37850.39050.2689100.289335
29deduped_removed_cross6135000.4253560.3470.3330.3860.4440.1860.322...0.4060.3920.5430.5270.7830.6820.37450.38900.2708690.289845
30cross_minhash_dump_CC-MAIN-2013-48600.3310180.1860.2330.2720.2580.1660.286...0.3670.3620.5150.497NaNNaN0.21950.25200.2302280.250147
31cross_minhash_dump_CC-MAIN-2013-48610000.3494940.2170.2480.2880.2860.1040.244...0.3660.3800.4990.4920.5460.4840.25650.27800.2396510.253956
32cross_minhash_dump_CC-MAIN-2013-48620000.3678930.2450.2800.2980.2880.1280.280...0.3660.3830.5190.499NaNNaN0.28450.31150.2397150.253644
33cross_minhash_dump_CC-MAIN-2013-48630000.3791140.2690.2910.3040.3280.1380.266...0.3620.3940.5190.504NaNNaN0.30350.33350.2505510.262409
34cross_minhash_dump_CC-MAIN-2013-48640000.3830250.2770.2890.3110.3380.1320.280...0.3610.3930.5020.496NaNNaN0.31050.33750.2498870.263702
35cross_minhash_dump_CC-MAIN-2013-48650000.3872230.2900.3060.3270.3560.1380.276...0.3650.3890.5150.511NaNNaN0.31900.33800.2526210.266785
36cross_minhash_dump_CC-MAIN-2013-48660000.3940110.3030.3050.3320.3560.1420.288...0.3750.3970.5400.521NaNNaN0.32800.35150.2522550.265589
37cross_minhash_dump_CC-MAIN-2013-48670000.3980900.3160.3050.3370.3590.1420.302...0.3720.4010.5310.510NaNNaN0.33200.35500.2501460.267719
38cross_minhash_dump_CC-MAIN-2013-48680000.3985130.3260.3150.3390.3720.1500.288...0.3720.3960.5320.508NaNNaN0.33650.36300.2584330.274100
39cross_minhash_dump_CC-MAIN-2013-48690000.3974940.3100.3140.3450.3740.1400.274...0.3640.3920.5290.506NaNNaN0.34450.36100.2589270.271955
40cross_minhash_dump_CC-MAIN-2013-486100000.4026400.3210.3270.3470.3830.1560.280...0.3760.3970.5290.513NaNNaN0.34450.36500.2582940.272123
41cross_minhash_dump_CC-MAIN-2013-486110000.4025990.3180.3220.3480.3810.1600.284...0.3670.3870.5380.516NaNNaN0.34900.36600.2596100.276792
42cross_minhash_dump_CC-MAIN-2013-486120000.4074420.3280.3190.3490.3950.1620.290...0.3670.4070.5280.510NaNNaN0.35100.37000.2603500.279535
43cross_minhash_dump_CC-MAIN-2013-486130000.4055770.3240.3180.3500.3850.1580.290...0.3730.3960.5380.510NaNNaN0.35400.37300.2584810.274616
44cross_minhash_dump_CC-MAIN-2013-486135000.4050000.3200.3120.3540.3930.1520.288...0.3670.3960.5280.5130.7850.6750.35900.36600.2601740.278002
\n", "

45 rows × 22 columns

\n", "
" ], "text/plain": [ " runname seed steps agg_score \\\n", "0 deduped_removed_cross 5 0 0.330893 \n", "1 deduped_removed_cross 5 1000 0.354090 \n", "2 deduped_removed_cross 5 2000 0.373601 \n", "3 deduped_removed_cross 5 3000 0.383122 \n", "4 deduped_removed_cross 5 4000 0.390222 \n", "5 deduped_removed_cross 5 5000 0.400239 \n", "6 deduped_removed_cross 5 6000 0.401484 \n", "7 deduped_removed_cross 5 7000 0.403533 \n", "8 deduped_removed_cross 5 8000 0.411774 \n", "9 deduped_removed_cross 5 9000 0.410993 \n", "10 deduped_removed_cross 5 10000 0.417883 \n", "11 deduped_removed_cross 5 11000 0.422325 \n", "12 deduped_removed_cross 5 12000 0.420167 \n", "13 deduped_removed_cross 5 13000 0.422913 \n", "14 deduped_removed_cross 5 13500 0.421868 \n", "15 deduped_removed_cross 6 0 0.330893 \n", "16 deduped_removed_cross 6 1000 0.360039 \n", "17 deduped_removed_cross 6 2000 0.371564 \n", "18 deduped_removed_cross 6 3000 0.383770 \n", "19 deduped_removed_cross 6 4000 0.391082 \n", "20 deduped_removed_cross 6 5000 0.399130 \n", "21 deduped_removed_cross 6 6000 0.402792 \n", "22 deduped_removed_cross 6 7000 0.408846 \n", "23 deduped_removed_cross 6 8000 0.411429 \n", "24 deduped_removed_cross 6 9000 0.417279 \n", "25 deduped_removed_cross 6 10000 0.421399 \n", "26 deduped_removed_cross 6 11000 0.421204 \n", "27 deduped_removed_cross 6 12000 0.421667 \n", "28 deduped_removed_cross 6 13000 0.424979 \n", "29 deduped_removed_cross 6 13500 0.425356 \n", "30 cross_minhash_dump_CC-MAIN-2013-48 6 0 0.331018 \n", "31 cross_minhash_dump_CC-MAIN-2013-48 6 1000 0.349494 \n", "32 cross_minhash_dump_CC-MAIN-2013-48 6 2000 0.367893 \n", "33 cross_minhash_dump_CC-MAIN-2013-48 6 3000 0.379114 \n", "34 cross_minhash_dump_CC-MAIN-2013-48 6 4000 0.383025 \n", "35 cross_minhash_dump_CC-MAIN-2013-48 6 5000 0.387223 \n", "36 cross_minhash_dump_CC-MAIN-2013-48 6 6000 0.394011 \n", "37 cross_minhash_dump_CC-MAIN-2013-48 6 7000 0.398090 \n", "38 cross_minhash_dump_CC-MAIN-2013-48 6 8000 0.398513 \n", "39 cross_minhash_dump_CC-MAIN-2013-48 6 9000 0.397494 \n", "40 cross_minhash_dump_CC-MAIN-2013-48 6 10000 0.402640 \n", "41 cross_minhash_dump_CC-MAIN-2013-48 6 11000 0.402599 \n", "42 cross_minhash_dump_CC-MAIN-2013-48 6 12000 0.407442 \n", "43 cross_minhash_dump_CC-MAIN-2013-48 6 13000 0.405577 \n", "44 cross_minhash_dump_CC-MAIN-2013-48 6 13500 0.405000 \n", "\n", " commonsense_qa/acc commonsense_qa/acc_norm hellaswag/acc \\\n", "0 0.186 0.233 0.272 \n", "1 0.253 0.257 0.290 \n", "2 0.274 0.290 0.313 \n", "3 0.306 0.292 0.323 \n", "4 0.300 0.292 0.324 \n", "5 0.322 0.308 0.325 \n", "6 0.315 0.314 0.341 \n", "7 0.324 0.315 0.350 \n", "8 0.344 0.313 0.352 \n", "9 0.335 0.322 0.361 \n", "10 0.330 0.320 0.370 \n", "11 0.332 0.328 0.366 \n", "12 0.348 0.324 0.364 \n", "13 0.346 0.330 0.372 \n", "14 0.345 0.322 0.370 \n", "15 0.186 0.233 0.272 \n", "16 0.236 0.259 0.283 \n", "17 0.270 0.283 0.303 \n", "18 0.283 0.286 0.323 \n", "19 0.293 0.298 0.339 \n", "20 0.309 0.311 0.343 \n", "21 0.326 0.318 0.353 \n", "22 0.319 0.319 0.356 \n", "23 0.314 0.323 0.361 \n", "24 0.337 0.329 0.367 \n", "25 0.339 0.322 0.376 \n", "26 0.349 0.337 0.378 \n", "27 0.342 0.326 0.383 \n", "28 0.349 0.336 0.383 \n", "29 0.347 0.333 0.386 \n", "30 0.186 0.233 0.272 \n", "31 0.217 0.248 0.288 \n", "32 0.245 0.280 0.298 \n", "33 0.269 0.291 0.304 \n", "34 0.277 0.289 0.311 \n", "35 0.290 0.306 0.327 \n", "36 0.303 0.305 0.332 \n", "37 0.316 0.305 0.337 \n", "38 0.326 0.315 0.339 \n", "39 0.310 0.314 0.345 \n", "40 0.321 0.327 0.347 \n", "41 0.318 0.322 0.348 \n", "42 0.328 0.319 0.349 \n", "43 0.324 0.318 0.350 \n", "44 0.320 0.312 0.354 \n", "\n", " hellaswag/acc_norm openbookqa/acc openbookqa/acc_norm ... siqa/acc \\\n", "0 0.258 0.166 0.286 ... 0.367 \n", "1 0.278 0.124 0.264 ... 0.368 \n", "2 0.312 0.116 0.258 ... 0.367 \n", "3 0.335 0.150 0.278 ... 0.371 \n", "4 0.351 0.144 0.278 ... 0.386 \n", "5 0.364 0.172 0.298 ... 0.382 \n", "6 0.372 0.162 0.314 ... 0.377 \n", "7 0.386 0.188 0.298 ... 0.376 \n", "8 0.409 0.170 0.310 ... 0.374 \n", "9 0.404 0.182 0.294 ... 0.374 \n", "10 0.417 0.192 0.324 ... 0.389 \n", "11 0.426 0.188 0.320 ... 0.398 \n", "12 0.434 0.194 0.306 ... 0.377 \n", "13 0.438 0.190 0.320 ... 0.392 \n", "14 0.431 0.202 0.330 ... 0.387 \n", "15 0.258 0.166 0.286 ... 0.367 \n", "16 0.277 0.130 0.274 ... 0.354 \n", "17 0.305 0.132 0.280 ... 0.377 \n", "18 0.320 0.156 0.296 ... 0.375 \n", "19 0.361 0.160 0.292 ... 0.380 \n", "20 0.376 0.160 0.286 ... 0.392 \n", "21 0.387 0.176 0.284 ... 0.376 \n", "22 0.407 0.172 0.300 ... 0.386 \n", "23 0.412 0.168 0.286 ... 0.395 \n", "24 0.421 0.176 0.294 ... 0.407 \n", "25 0.426 0.174 0.320 ... 0.397 \n", "26 0.428 0.188 0.314 ... 0.403 \n", "27 0.434 0.174 0.310 ... 0.399 \n", "28 0.440 0.178 0.314 ... 0.401 \n", "29 0.444 0.186 0.322 ... 0.406 \n", "30 0.258 0.166 0.286 ... 0.367 \n", "31 0.286 0.104 0.244 ... 0.366 \n", "32 0.288 0.128 0.280 ... 0.366 \n", "33 0.328 0.138 0.266 ... 0.362 \n", "34 0.338 0.132 0.280 ... 0.361 \n", "35 0.356 0.138 0.276 ... 0.365 \n", "36 0.356 0.142 0.288 ... 0.375 \n", "37 0.359 0.142 0.302 ... 0.372 \n", "38 0.372 0.150 0.288 ... 0.372 \n", "39 0.374 0.140 0.274 ... 0.364 \n", "40 0.383 0.156 0.280 ... 0.376 \n", "41 0.381 0.160 0.284 ... 0.367 \n", "42 0.395 0.162 0.290 ... 0.367 \n", "43 0.385 0.158 0.290 ... 0.373 \n", "44 0.393 0.152 0.288 ... 0.367 \n", "\n", " siqa/acc_norm winogrande/acc winogrande/acc_norm sciq/acc \\\n", "0 0.362 0.516 0.497 0.208 \n", "1 0.389 0.509 0.491 0.582 \n", "2 0.397 0.516 0.505 0.686 \n", "3 0.401 0.513 0.500 0.712 \n", "4 0.395 0.511 0.511 0.750 \n", "5 0.398 0.518 0.522 0.751 \n", "6 0.390 0.498 0.492 0.776 \n", "7 0.384 0.518 0.521 0.769 \n", "8 0.391 0.530 0.521 0.781 \n", "9 0.391 0.526 0.514 0.769 \n", "10 0.389 0.518 0.524 0.785 \n", "11 0.397 0.535 0.529 0.801 \n", "12 0.392 0.541 0.527 0.790 \n", "13 0.396 0.540 0.522 0.802 \n", "14 0.392 0.540 0.516 0.797 \n", "15 0.362 0.516 0.497 0.208 \n", "16 0.386 0.509 0.507 0.559 \n", "17 0.392 0.522 0.504 0.665 \n", "18 0.394 0.503 0.497 0.721 \n", "19 0.399 0.505 0.494 0.719 \n", "20 0.401 0.525 0.512 0.733 \n", "21 0.405 0.522 0.514 0.753 \n", "22 0.399 0.521 0.521 0.764 \n", "23 0.404 0.533 0.511 0.754 \n", "24 0.403 0.532 0.526 0.775 \n", "25 0.401 0.542 0.532 0.764 \n", "26 0.398 0.530 0.516 NaN \n", "27 0.396 0.538 0.525 NaN \n", "28 0.392 0.535 0.526 NaN \n", "29 0.392 0.543 0.527 0.783 \n", "30 0.362 0.515 0.497 NaN \n", "31 0.380 0.499 0.492 0.546 \n", "32 0.383 0.519 0.499 NaN \n", "33 0.394 0.519 0.504 NaN \n", "34 0.393 0.502 0.496 NaN \n", "35 0.389 0.515 0.511 NaN \n", "36 0.397 0.540 0.521 NaN \n", "37 0.401 0.531 0.510 NaN \n", "38 0.396 0.532 0.508 NaN \n", "39 0.392 0.529 0.506 NaN \n", "40 0.397 0.529 0.513 NaN \n", "41 0.387 0.538 0.516 NaN \n", "42 0.407 0.528 0.510 NaN \n", "43 0.396 0.538 0.510 NaN \n", "44 0.396 0.528 0.513 0.785 \n", "\n", " sciq/acc_norm arc/acc arc/acc_norm mmlu/acc mmlu/acc_norm \n", "0 0.202 0.2195 0.2510 0.230294 0.250147 \n", "1 0.516 0.2825 0.2955 0.239520 0.253223 \n", "2 0.582 0.3090 0.3200 0.247320 0.262812 \n", "3 0.611 0.3075 0.3415 0.248568 0.263474 \n", "4 0.658 0.3260 0.3445 0.259246 0.273276 \n", "5 0.661 0.3470 0.3545 0.258485 0.271414 \n", "6 0.669 0.3530 0.3565 0.261842 0.276371 \n", "7 0.672 0.3625 0.3585 0.265558 0.274768 \n", "8 0.677 0.3530 0.3615 0.267141 0.283691 \n", "9 0.672 0.3630 0.3715 0.266464 0.284446 \n", "10 0.682 0.3735 0.3745 0.268085 0.283562 \n", "11 0.695 0.3775 0.3800 0.267457 0.285596 \n", "12 0.690 0.3680 0.3755 0.267547 0.285836 \n", "13 0.707 0.3760 0.3845 0.271108 0.287802 \n", "14 0.700 0.3790 0.3870 0.269510 0.287944 \n", "15 0.202 0.2195 0.2510 0.230294 0.250147 \n", "16 0.500 0.2590 0.2970 0.243455 0.254311 \n", "17 0.566 0.3040 0.3135 0.249051 0.255010 \n", "18 0.626 0.3140 0.3410 0.254015 0.266158 \n", "19 0.615 0.3375 0.3375 0.256696 0.268152 \n", "20 0.639 0.3390 0.3580 0.257450 0.271040 \n", "21 0.664 0.3450 0.3645 0.262549 0.273836 \n", "22 0.662 0.3585 0.3625 0.262740 0.276266 \n", "23 0.646 0.3555 0.3690 0.263875 0.278433 \n", "24 0.666 0.3605 0.3730 0.265119 0.283235 \n", "25 0.673 0.3675 0.3840 0.272474 0.286190 \n", "26 NaN 0.3690 0.3780 0.269131 0.288633 \n", "27 NaN 0.3660 0.3810 0.270691 0.287333 \n", "28 NaN 0.3785 0.3905 0.268910 0.289335 \n", "29 0.682 0.3745 0.3890 0.270869 0.289845 \n", "30 NaN 0.2195 0.2520 0.230228 0.250147 \n", "31 0.484 0.2565 0.2780 0.239651 0.253956 \n", "32 NaN 0.2845 0.3115 0.239715 0.253644 \n", "33 NaN 0.3035 0.3335 0.250551 0.262409 \n", "34 NaN 0.3105 0.3375 0.249887 0.263702 \n", "35 NaN 0.3190 0.3380 0.252621 0.266785 \n", "36 NaN 0.3280 0.3515 0.252255 0.265589 \n", "37 NaN 0.3320 0.3550 0.250146 0.267719 \n", "38 NaN 0.3365 0.3630 0.258433 0.274100 \n", "39 NaN 0.3445 0.3610 0.258927 0.271955 \n", "40 NaN 0.3445 0.3650 0.258294 0.272123 \n", "41 NaN 0.3490 0.3660 0.259610 0.276792 \n", "42 NaN 0.3510 0.3700 0.260350 0.279535 \n", "43 NaN 0.3540 0.3730 0.258481 0.274616 \n", "44 0.675 0.3590 0.3660 0.260174 0.278002 \n", "\n", "[45 rows x 22 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "from matplotlib.figure import Figure\n", "\n", "df = pd.read_csv(\"../src_data/removed_data_cross_dedup.csv\")\n", "df" ] }, { "cell_type": "code", "execution_count": 24, "id": "b610f43caefdf01", "metadata": { "ExecuteTime": { "end_time": "2024-04-30T13:29:05.776714Z", "start_time": "2024-04-30T13:29:05.774546Z" }, "collapsed": false }, "outputs": [], "source": [ "runs_mapping = {\n", " \"deduped_removed_cross\": \"Originally removed data\",\n", " \"cross_minhash_dump_CC-MAIN-2013-48\": \"Originally kept data\",\n", "}" ] }, { "cell_type": "code", "execution_count": 25, "id": "18b2dde6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['runname', 'seed', 'steps', 'agg_score', 'commonsense_qa/acc',\n", " 'commonsense_qa/acc_norm', 'hellaswag/acc', 'hellaswag/acc_norm',\n", " 'openbookqa/acc', 'openbookqa/acc_norm', 'piqa/acc', 'piqa/acc_norm',\n", " 'siqa/acc', 'siqa/acc_norm', 'winogrande/acc', 'winogrande/acc_norm',\n", " 'sciq/acc', 'sciq/acc_norm', 'arc/acc', 'arc/acc_norm', 'mmlu/acc',\n", " 'mmlu/acc_norm'],\n", " dtype='object')" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 27, "id": "initial_id", "metadata": { "ExecuteTime": { "end_time": "2024-04-30T13:31:10.740797Z", "start_time": "2024-04-30T13:31:10.661359Z" }, "collapsed": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import json\n", "import os\n", "from matplotlib import pyplot as plt\n", "metrics = ['agg_score', 'commonsense_qa/acc_norm', 'hellaswag/acc_norm', 'openbookqa/acc_norm', 'piqa/acc_norm',\n", " 'siqa/acc_norm', 'winogrande/acc_norm', 'arc/acc_norm', 'mmlu/acc_norm']\n", "\n", "def normalize_runname(runname):\n", " return runname.replace(\"/\", \"_\")\n", "\n", "grouped = (\n", " df.groupby([\"runname\", \"steps\"])\n", " .agg(\n", " {\n", " key: \"mean\" for key in metrics\n", " }\n", " )\n", " .reset_index()\n", ")\n", "\n", "file_id=\"../assets/data/plots/removed_data_dedup\"\n", "files = {}\n", "for metric in metrics:\n", " datas = {}\n", " for name, group in grouped.groupby(\"runname\"):\n", " group = group[[\"steps\", metric]].sort_values(by=\"steps\")\n", " group = group.set_index(\"steps\")\n", " rolling_avg = group\n", " # rolling_avg = group.rolling(window=5).mean()\n", " datas[name] = {\n", " \"x\": (rolling_avg.index * 2048 * 1024 * 1e-9).tolist(),\n", " \"y\": rolling_avg[metric].tolist(),\n", " \"label\": runs_mapping[name],\n", " }\n", " # Sort the datata based on the steps\n", " datas = {k: v for k, v in sorted(datas.items(), key=lambda x: -x[1][\"y\"][-1])}\n", " # Create a folder\n", " os.makedirs(f\"{file_id}\", exist_ok=True)\n", " with open(f\"{file_id}/{normalize_runname(metric)}.json\", \"w\") as f:\n", " json.dump({\n", " \"data\": datas,\n", " \"layout\": {\n", " \"title\": {\n", " \"text\": \"The originally removed data outperforms the kept data\"\n", " },\n", " }\n", " }, f)\n", " files[metric] = {\"file\": f\"{normalize_runname(metric)}.json\"}\n", "# Create index\n", "with open(f\"{file_id}/index.json\", \"w\") as f:\n", " json.dump({\n", " \"files\": files,\n", " \"settings\": {\n", " \"defaultMetric\": \"agg_score\",\n", " \"slider\":{\"min\":0,\"max\":10,\"default\":0}\n", " }\n", " }, f)\n", " \n", "\n", "# Add labels and legend\n", "plt.xlabel(\"Training tokens (billions)\")\n", "plt.ylabel(\"Agg Score\")\n", "plt.title(\"The originally removed data outperforms the kept data\")\n", "plt.legend()\n", "\n", "# Show the plot\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 3, "id": "af28ebbd054cdc33", "metadata": { "ExecuteTime": { "end_time": "2024-04-30T12:52:05.836260Z", "start_time": "2024-04-30T12:52:05.834381Z" }, "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 5 }