|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1289213579716373, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00040004211081201384, |
|
"loss": 8.3496, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000400168442509171, |
|
"loss": 8.2272, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000400378992874836, |
|
"loss": 7.6879, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004006737582146567, |
|
"loss": 7.4747, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004010527333566261, |
|
"loss": 7.2829, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004015159116511832, |
|
"loss": 7.1171, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00040206328497132196, |
|
"loss": 6.9445, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004026948437127389, |
|
"loss": 6.8391, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004034105767939909, |
|
"loss": 6.7131, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00040421047165670534, |
|
"loss": 6.6113, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004050945142657896, |
|
"loss": 6.4966, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004060626891096795, |
|
"loss": 6.3979, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004071149792006148, |
|
"loss": 6.3116, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00040825136607492915, |
|
"loss": 6.2301, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004094718297933883, |
|
"loss": 6.123, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004107763489415231, |
|
"loss": 6.0802, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00041216490063001633, |
|
"loss": 6.0029, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00041363746049510354, |
|
"loss": 5.9471, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004151940026989945, |
|
"loss": 5.9132, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004168344999303346, |
|
"loss": 5.8561, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00041855892340467854, |
|
"loss": 5.8044, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004203672428649916, |
|
"loss": 5.734, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004222594265821944, |
|
"loss": 5.7245, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004242354413557057, |
|
"loss": 5.6867, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00042629525251402893, |
|
"loss": 5.6387, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004284388239153662, |
|
"loss": 5.6119, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004306661179482429, |
|
"loss": 5.5533, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004329770955321787, |
|
"loss": 5.517, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004353717161183629, |
|
"loss": 5.4864, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004378499376903721, |
|
"loss": 5.4671, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00044041171676490604, |
|
"loss": 5.4412, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004430570083925455, |
|
"loss": 5.4108, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004457857661585539, |
|
"loss": 5.3807, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004485979421836768, |
|
"loss": 5.3353, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004514934871249904, |
|
"loss": 5.3277, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00045447235017676696, |
|
"loss": 5.2979, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00045753447907136494, |
|
"loss": 5.2791, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000460679820080143, |
|
"loss": 5.2494, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00046390831801440893, |
|
"loss": 5.2175, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004672199162263843, |
|
"loss": 5.2038, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004706145566101966, |
|
"loss": 5.1835, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004740921796029061, |
|
"loss": 5.1691, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004776527241855382, |
|
"loss": 5.1582, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004812961278841711, |
|
"loss": 5.1504, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004850223267710129, |
|
"loss": 5.1162, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004888312554655432, |
|
"loss": 5.0957, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004927228471356421, |
|
"loss": 5.079, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004966970334987757, |
|
"loss": 5.0572, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005007537448231871, |
|
"loss": 5.0342, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005048929099291249, |
|
"loss": 5.0106, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005091144561900837, |
|
"loss": 5.0155, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005134183095340927, |
|
"loss": 4.9817, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005178043944449977, |
|
"loss": 4.9742, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005222726339638023, |
|
"loss": 4.9299, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005268229496900086, |
|
"loss": 4.9208, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005314552617829947, |
|
"loss": 4.8617, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005361694889634196, |
|
"loss": 4.7952, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005409655485146408, |
|
"loss": 4.7641, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005458433562841782, |
|
"loss": 4.7361, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005508028266851747, |
|
"loss": 4.7023, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000555843872697916, |
|
"loss": 4.6561, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005609664058713396, |
|
"loss": 4.63, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005661703363245996, |
|
"loss": 4.6307, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005714555727486404, |
|
"loss": 4.5881, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005768220224077955, |
|
"loss": 4.5489, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005822695911414169, |
|
"loss": 4.5521, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005877981833655298, |
|
"loss": 4.5165, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005934077020745051, |
|
"loss": 4.505, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005990980488427659, |
|
"loss": 4.4863, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000604869123826509, |
|
"loss": 4.5071, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0006107208257654633, |
|
"loss": 4.4501, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0006166530519846631, |
|
"loss": 4.4623, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0006226656983962468, |
|
"loss": 4.4336, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0006287586595012887, |
|
"loss": 4.4335, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000634931828391647, |
|
"loss": 4.4142, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0006411850967518416, |
|
"loss": 4.4145, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0006475183548609511, |
|
"loss": 4.3842, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0006539314915945428, |
|
"loss": 4.3748, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0006604243944266178, |
|
"loss": 4.3815, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0006669969494315867, |
|
"loss": 4.352, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0006736490412862749, |
|
"loss": 4.3575, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000680380553271933, |
|
"loss": 4.3416, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0006871913672762998, |
|
"loss": 4.341, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0006940813637956594, |
|
"loss": 4.3183, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007010504219369541, |
|
"loss": 4.3145, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007080984194198885, |
|
"loss": 4.3065, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007152252325790948, |
|
"loss": 4.2805, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007224307363662818, |
|
"loss": 4.2804, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007297148043524434, |
|
"loss": 4.2996, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007370773087300737, |
|
"loss": 4.2743, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007445181203154048, |
|
"loss": 4.2621, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007520371085506811, |
|
"loss": 4.2548, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007596341415064441, |
|
"loss": 4.2643, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007673090858838494, |
|
"loss": 4.266, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007750618070170041, |
|
"loss": 4.2503, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007828921688753324, |
|
"loss": 4.2093, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007908000340659631, |
|
"loss": 4.2449, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0007987852638361333, |
|
"loss": 4.2158, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0008068477180756314, |
|
"loss": 4.202, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0008149872553192515, |
|
"loss": 4.2065, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0008232037327492777, |
|
"loss": 4.1773, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0008314970061979818, |
|
"loss": 4.1904, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0008398669301501703, |
|
"loss": 4.1868, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0008483133577457148, |
|
"loss": 4.2006, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0008568361407821495, |
|
"loss": 4.1467, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0008654351297172607, |
|
"loss": 4.1585, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0008741101736717116, |
|
"loss": 4.1547, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0008828611204316911, |
|
"loss": 4.1557, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0008916878164515838, |
|
"loss": 4.1496, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009005901068566691, |
|
"loss": 4.1434, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009095678354458306, |
|
"loss": 4.1173, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009186208446943008, |
|
"loss": 4.1364, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009277489757564244, |
|
"loss": 4.1445, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009369520684684475, |
|
"loss": 4.1156, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009462299613513248, |
|
"loss": 4.1033, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009555824916135536, |
|
"loss": 4.1187, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009650094951540386, |
|
"loss": 4.0823, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009745108065649499, |
|
"loss": 4.0624, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009840862591346498, |
|
"loss": 4.0845, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009937356848506058, |
|
"loss": 4.0483, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.001003458914402332, |
|
"loss": 4.0512, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0010132557771843787, |
|
"loss": 4.0606, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0010231261012993067, |
|
"loss": 4.046, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0010330697135607168, |
|
"loss": 4.0315, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00104308643949628, |
|
"loss": 4.0179, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.001053176103350803, |
|
"loss": 4.0351, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0010633385280893123, |
|
"loss": 4.02, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0010735735354001595, |
|
"loss": 4.0201, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0010838809456981471, |
|
"loss": 4.0044, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.001094260578127686, |
|
"loss": 3.9914, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0011047122505659646, |
|
"loss": 3.9991, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0011152357796261423, |
|
"loss": 4.0109, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0011258309806605731, |
|
"loss": 4.0405, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0011364976677640387, |
|
"loss": 4.0349, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0011472356537770186, |
|
"loss": 4.0312, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0011580447502889633, |
|
"loss": 4.0185, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0011689247676416152, |
|
"loss": 4.011, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0011798755149323176, |
|
"loss": 3.9898, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.001190896800017379, |
|
"loss": 3.981, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0012019884295154416, |
|
"loss": 3.949, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0012131502088108658, |
|
"loss": 3.9896, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0012243819420571598, |
|
"loss": 3.9951, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0012356834321804039, |
|
"loss": 4.0361, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0012470544808827113, |
|
"loss": 4.1212, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.001258494888645708, |
|
"loss": 4.0721, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0012700044547340368, |
|
"loss": 4.0311, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0012815829771988738, |
|
"loss": 4.0114, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.001293230252881479, |
|
"loss": 3.9868, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0013049460774167514, |
|
"loss": 3.9881, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0013167302452368242, |
|
"loss": 3.9705, |
|
"step": 30000 |
|
} |
|
], |
|
"max_steps": 500000, |
|
"num_train_epochs": 3, |
|
"total_flos": 4.781489946624e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|