|
{ |
|
"best_metric": 0.23276936632271397, |
|
"best_model_checkpoint": "esm2_t6_8M_finetune_2023-10-08_00-58-24/checkpoint-42015", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 42015, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0005701249285950224, |
|
"loss": 0.65, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005700293047710233, |
|
"loss": 0.5381, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005698699554923001, |
|
"loss": 0.5093, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005696469163951857, |
|
"loss": 0.4747, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005693602373593884, |
|
"loss": 0.4731, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0005690099824968371, |
|
"loss": 0.454, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000568596230137343, |
|
"loss": 0.4384, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005681190728110825, |
|
"loss": 0.4432, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005675786172279044, |
|
"loss": 0.4288, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005669749842534648, |
|
"loss": 0.4278, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000566308308882198, |
|
"loss": 0.4289, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005655787402071262, |
|
"loss": 0.4171, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005647864413865174, |
|
"loss": 0.4262, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005639315896073965, |
|
"loss": 0.4155, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005630143760459205, |
|
"loss": 0.4042, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000562035005824624, |
|
"loss": 0.4102, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005609936979665467, |
|
"loss": 0.403, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005598906853462516, |
|
"loss": 0.3957, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005587262146377457, |
|
"loss": 0.3939, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005575005462593147, |
|
"loss": 0.4017, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005562139543152841, |
|
"loss": 0.3957, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005548736130124954, |
|
"loss": 0.3983, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005534663515857835, |
|
"loss": 0.3824, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005519990687871066, |
|
"loss": 0.3809, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005504720927546211, |
|
"loss": 0.3862, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005488857649760757, |
|
"loss": 0.3723, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005472404402124425, |
|
"loss": 0.3708, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005455364864185798, |
|
"loss": 0.372, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005437742846609435, |
|
"loss": 0.3652, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000541954229032367, |
|
"loss": 0.3534, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005400767265639273, |
|
"loss": 0.3557, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005381421971339184, |
|
"loss": 0.3563, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0005361510733739505, |
|
"loss": 0.3641, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0005341141758547804, |
|
"loss": 0.365, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0005320114891528482, |
|
"loss": 0.3507, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000529853579171728, |
|
"loss": 0.347, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0005276409284991003, |
|
"loss": 0.349, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0005253740319646683, |
|
"loss": 0.3518, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0005230533965294961, |
|
"loss": 0.3329, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0005206795411726335, |
|
"loss": 0.3401, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0005182529967750532, |
|
"loss": 0.3342, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000515774306000927, |
|
"loss": 0.3409, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0005132440231762652, |
|
"loss": 0.331, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00051066271416495, |
|
"loss": 0.3356, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0005080309562421869, |
|
"loss": 0.3235, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0005053493379654053, |
|
"loss": 0.3292, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0005026184590426352, |
|
"loss": 0.3198, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004998389301983908, |
|
"loss": 0.3262, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004970113730370897, |
|
"loss": 0.3239, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004941364199040399, |
|
"loss": 0.3177, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004912147137440239, |
|
"loss": 0.3103, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000488246907957513, |
|
"loss": 0.3169, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00048524884437437616, |
|
"loss": 0.3071, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004822064618785885, |
|
"loss": 0.3091, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00047910481733056954, |
|
"loss": 0.305, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.000475959781372279, |
|
"loss": 0.3086, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004727720573489078, |
|
"loss": 0.3034, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00046954235815226125, |
|
"loss": 0.2921, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00046627140606133027, |
|
"loss": 0.3043, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00046295993258076314, |
|
"loss": 0.2905, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00045960867827727475, |
|
"loss": 0.2969, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00045621839261402846, |
|
"loss": 0.2938, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00045278983378302856, |
|
"loss": 0.2934, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004493237685355611, |
|
"loss": 0.2844, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00044582097201072027, |
|
"loss": 0.2882, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00044228222756205834, |
|
"loss": 0.2936, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00043870832658239985, |
|
"loss": 0.2776, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00043510006832685687, |
|
"loss": 0.2811, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0004314582597340859, |
|
"loss": 0.2821, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00042778371524582784, |
|
"loss": 0.287, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00042407725662476785, |
|
"loss": 0.2789, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00042033971277076005, |
|
"loss": 0.2763, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004165719195354547, |
|
"loss": 0.2695, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00041277471953537104, |
|
"loss": 0.2635, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004089489619634578, |
|
"loss": 0.2636, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004050955023991823, |
|
"loss": 0.2632, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004012152026171918, |
|
"loss": 0.2542, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.000397328524919439, |
|
"loss": 0.2705, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0003933972771535795, |
|
"loss": 0.2513, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0003894418053214825, |
|
"loss": 0.245, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00038546299401147295, |
|
"loss": 0.2578, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0003814617330314382, |
|
"loss": 0.2577, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0003774389172098332, |
|
"loss": 0.2491, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0003733954461955649, |
|
"loss": 0.2563, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00036933222425679775, |
|
"loss": 0.2552, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00036525016007872573, |
|
"loss": 0.236, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00036115016656035776, |
|
"loss": 0.2482, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00035703316061035936, |
|
"loss": 0.2412, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0003529000629419989, |
|
"loss": 0.2402, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00034877257538830126, |
|
"loss": 0.2383, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00034461013949659567, |
|
"loss": 0.2303, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00034043439012889894, |
|
"loss": 0.2368, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00033624626113565846, |
|
"loss": 0.227, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0003320466891358589, |
|
"loss": 0.2321, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00032783661330756047, |
|
"loss": 0.2307, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00032361697517786374, |
|
"loss": 0.2209, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0003193887184123502, |
|
"loss": 0.2283, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00031515278860404374, |
|
"loss": 0.2231, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00031091013306194153, |
|
"loss": 0.213, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0003066617005991611, |
|
"loss": 0.2227, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0003024084413207512, |
|
"loss": 0.2106, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002981726001518289, |
|
"loss": 0.2156, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00029391255391089834, |
|
"loss": 0.2185, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002896505320303282, |
|
"loss": 0.2133, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00028538748765425824, |
|
"loss": 0.2169, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002811243741554959, |
|
"loss": 0.2082, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00027686214492230696, |
|
"loss": 0.2052, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00027262304895711635, |
|
"loss": 0.2166, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00026836543109541225, |
|
"loss": 0.211, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00026411155086609875, |
|
"loss": 0.2014, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002598623595925447, |
|
"loss": 0.2029, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002556188075494962, |
|
"loss": 0.1988, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00025138184375056156, |
|
"loss": 0.2003, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002471524157359758, |
|
"loss": 0.197, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002429314693606973, |
|
"loss": 0.1958, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00023871994858287913, |
|
"loss": 0.1848, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00023451879525276522, |
|
"loss": 0.1925, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002303289489020594, |
|
"loss": 0.1923, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00022615134653381023, |
|
"loss": 0.191, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00022200771021072758, |
|
"loss": 0.1906, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00021785732279544682, |
|
"loss": 0.1905, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002137219684748498, |
|
"loss": 0.1884, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00020960257206557216, |
|
"loss": 0.1844, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002055000548154766, |
|
"loss": 0.1865, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00020141533419762732, |
|
"loss": 0.1729, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00019734932370511055, |
|
"loss": 0.1717, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00019330293264674297, |
|
"loss": 0.1797, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001892770659437178, |
|
"loss": 0.1756, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018527262392723065, |
|
"loss": 0.184, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018129050213713306, |
|
"loss": 0.1806, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00017733159112165648, |
|
"loss": 0.175, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00017339677623825358, |
|
"loss": 0.1746, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00016948693745559923, |
|
"loss": 0.1667, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00016560294915679837, |
|
"loss": 0.1737, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00016174567994384052, |
|
"loss": 0.1624, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00015793507085198864, |
|
"loss": 0.1667, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00015413367721145552, |
|
"loss": 0.1679, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00015036156760590242, |
|
"loss": 0.1626, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00014661958561714702, |
|
"loss": 0.1614, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00014290856808936924, |
|
"loss": 0.1603, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0001392293449419613, |
|
"loss": 0.1611, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001355827389839281, |
|
"loss": 0.156, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001319695657298769, |
|
"loss": 0.1618, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.000128390633217638, |
|
"loss": 0.1653, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00012484674182755833, |
|
"loss": 0.1573, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00012133868410350671, |
|
"loss": 0.1629, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00011786724457563275, |
|
"loss": 0.1531, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00011443319958491592, |
|
"loss": 0.1491, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00011103731710954869, |
|
"loss": 0.1443, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00010768035659318755, |
|
"loss": 0.1454, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00010436306877511392, |
|
"loss": 0.1524, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00010108619552234125, |
|
"loss": 0.1476, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.78665447379777e-05, |
|
"loss": 0.1519, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.467247876055098e-05, |
|
"loss": 0.1421, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.152099451915894e-05, |
|
"loss": 0.1445, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.841279680106426e-05, |
|
"loss": 0.1378, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.534858071307888e-05, |
|
"loss": 0.1484, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.232903152611165e-05, |
|
"loss": 0.1434, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.935482452191686e-05, |
|
"loss": 0.1342, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.642662484207666e-05, |
|
"loss": 0.139, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.354508733925122e-05, |
|
"loss": 0.1411, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.071085643072893e-05, |
|
"loss": 0.1319, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.792456595431225e-05, |
|
"loss": 0.1287, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.518683902656733e-05, |
|
"loss": 0.1273, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.249828790347295e-05, |
|
"loss": 0.129, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5.9859513843497324e-05, |
|
"loss": 0.1326, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5.727110697313506e-05, |
|
"loss": 0.1302, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.47336461549329e-05, |
|
"loss": 0.1487, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.224769885803498e-05, |
|
"loss": 0.1282, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.981382103127605e-05, |
|
"loss": 0.1278, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.7432556978851023e-05, |
|
"loss": 0.13, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.51044392385881e-05, |
|
"loss": 0.1281, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.28412263737391e-05, |
|
"loss": 0.1285, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.063164624396037e-05, |
|
"loss": 0.1288, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.846549409375182e-05, |
|
"loss": 0.1275, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.635449361928196e-05, |
|
"loss": 0.1249, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.42991169175479e-05, |
|
"loss": 0.1258, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.229982364603461e-05, |
|
"loss": 0.1285, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.0357060919918606e-05, |
|
"loss": 0.1303, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.8471263212076872e-05, |
|
"loss": 0.1306, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.6642852255922315e-05, |
|
"loss": 0.1253, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.487223695108933e-05, |
|
"loss": 0.1175, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3159813271988392e-05, |
|
"loss": 0.1273, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.151408710314706e-05, |
|
"loss": 0.1316, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.9918886837489768e-05, |
|
"loss": 0.121, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.838298594800929e-05, |
|
"loss": 0.1307, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.69067279183791e-05, |
|
"loss": 0.1169, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.549044289394571e-05, |
|
"loss": 0.1202, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4134447607895461e-05, |
|
"loss": 0.1139, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.2839045310422171e-05, |
|
"loss": 0.1289, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1604525700908673e-05, |
|
"loss": 0.1333, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.0431164863139752e-05, |
|
"loss": 0.1165, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.319225203559418e-06, |
|
"loss": 0.121, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.268955392587713e-06, |
|
"loss": 0.1265, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.2805903090082e-06, |
|
"loss": 0.114, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.3543509874411355e-06, |
|
"loss": 0.1191, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.4904445689116835e-06, |
|
"loss": 0.1085, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.689064254525586e-06, |
|
"loss": 0.1171, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.9503892622624656e-06, |
|
"loss": 0.1127, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.2745847868960813e-06, |
|
"loss": 0.1206, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.6618019630506124e-06, |
|
"loss": 0.1167, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1121778314016173e-06, |
|
"loss": 0.116, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6258353080285549e-06, |
|
"loss": 0.1155, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.202883156926388e-06, |
|
"loss": 0.1112, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.450552439147354e-07, |
|
"loss": 0.124, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.488353993419233e-07, |
|
"loss": 0.1186, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.1624678365224445e-07, |
|
"loss": 0.1128, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.473414121760922e-07, |
|
"loss": 0.1269, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.215705833867654e-08, |
|
"loss": 0.1186, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.172452126860464e-10, |
|
"loss": 0.1156, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9232681404353718, |
|
"eval_auc": 0.7327222270892191, |
|
"eval_f1": 0.23276936632271397, |
|
"eval_loss": 1.4070135354995728, |
|
"eval_mcc": 0.2532717443913893, |
|
"eval_precision": 0.14886006058928025, |
|
"eval_recall": 0.5334820519181167, |
|
"eval_runtime": 1911.9976, |
|
"eval_samples_per_second": 66.763, |
|
"eval_steps_per_second": 5.564, |
|
"step": 42015 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 42015, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 2.0189159728820676e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|