|
{ |
|
"best_metric": 2.4363410472869873, |
|
"best_model_checkpoint": "/home/seemdog/manchu_BERT/1002_BERT_DA_1.0/checkpoint-86000", |
|
"epoch": 9.964620917517031, |
|
"global_step": 213000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9766081871345035e-05, |
|
"loss": 6.1581, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 5.5598931312561035, |
|
"eval_runtime": 54.891, |
|
"eval_samples_per_second": 120.767, |
|
"eval_steps_per_second": 1.895, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.953216374269006e-05, |
|
"loss": 5.3713, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 4.843267440795898, |
|
"eval_runtime": 54.8945, |
|
"eval_samples_per_second": 120.759, |
|
"eval_steps_per_second": 1.895, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9298245614035086e-05, |
|
"loss": 4.7624, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 4.427705764770508, |
|
"eval_runtime": 54.9095, |
|
"eval_samples_per_second": 120.726, |
|
"eval_steps_per_second": 1.894, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.906432748538012e-05, |
|
"loss": 4.2884, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 4.152446746826172, |
|
"eval_runtime": 54.9536, |
|
"eval_samples_per_second": 120.629, |
|
"eval_steps_per_second": 1.893, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.883040935672515e-05, |
|
"loss": 3.908, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 3.943004608154297, |
|
"eval_runtime": 54.9769, |
|
"eval_samples_per_second": 120.578, |
|
"eval_steps_per_second": 1.892, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.859649122807018e-05, |
|
"loss": 3.6357, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 3.7840378284454346, |
|
"eval_runtime": 54.9612, |
|
"eval_samples_per_second": 120.612, |
|
"eval_steps_per_second": 1.892, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.836257309941521e-05, |
|
"loss": 3.442, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 3.6515119075775146, |
|
"eval_runtime": 55.0182, |
|
"eval_samples_per_second": 120.487, |
|
"eval_steps_per_second": 1.89, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8128654970760235e-05, |
|
"loss": 3.2982, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 3.5147831439971924, |
|
"eval_runtime": 54.9459, |
|
"eval_samples_per_second": 120.646, |
|
"eval_steps_per_second": 1.893, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.789473684210526e-05, |
|
"loss": 3.1681, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 3.4453866481781006, |
|
"eval_runtime": 54.9741, |
|
"eval_samples_per_second": 120.584, |
|
"eval_steps_per_second": 1.892, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.7660818713450294e-05, |
|
"loss": 3.0515, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 3.3482985496520996, |
|
"eval_runtime": 54.9922, |
|
"eval_samples_per_second": 120.544, |
|
"eval_steps_per_second": 1.891, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.7426900584795326e-05, |
|
"loss": 2.9408, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 3.274308919906616, |
|
"eval_runtime": 55.0307, |
|
"eval_samples_per_second": 120.46, |
|
"eval_steps_per_second": 1.89, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.719298245614036e-05, |
|
"loss": 2.8601, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 3.2094714641571045, |
|
"eval_runtime": 54.9444, |
|
"eval_samples_per_second": 120.649, |
|
"eval_steps_per_second": 1.893, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.695906432748538e-05, |
|
"loss": 2.7866, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 3.1299281120300293, |
|
"eval_runtime": 54.9484, |
|
"eval_samples_per_second": 120.64, |
|
"eval_steps_per_second": 1.893, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.672514619883041e-05, |
|
"loss": 2.7094, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 3.096022844314575, |
|
"eval_runtime": 55.155, |
|
"eval_samples_per_second": 120.189, |
|
"eval_steps_per_second": 1.886, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.649122807017544e-05, |
|
"loss": 2.6424, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 3.060807228088379, |
|
"eval_runtime": 55.1935, |
|
"eval_samples_per_second": 120.105, |
|
"eval_steps_per_second": 1.884, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.625730994152047e-05, |
|
"loss": 2.5729, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 3.0170695781707764, |
|
"eval_runtime": 55.2064, |
|
"eval_samples_per_second": 120.077, |
|
"eval_steps_per_second": 1.884, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.60233918128655e-05, |
|
"loss": 2.5108, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.9729015827178955, |
|
"eval_runtime": 55.2048, |
|
"eval_samples_per_second": 120.08, |
|
"eval_steps_per_second": 1.884, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.5789473684210527e-05, |
|
"loss": 2.4538, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 2.9392964839935303, |
|
"eval_runtime": 55.2009, |
|
"eval_samples_per_second": 120.089, |
|
"eval_steps_per_second": 1.884, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.555555555555556e-05, |
|
"loss": 2.3941, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 2.900946617126465, |
|
"eval_runtime": 55.1868, |
|
"eval_samples_per_second": 120.119, |
|
"eval_steps_per_second": 1.885, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.5321637426900585e-05, |
|
"loss": 2.3341, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 2.87040376663208, |
|
"eval_runtime": 55.0611, |
|
"eval_samples_per_second": 120.393, |
|
"eval_steps_per_second": 1.889, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.508771929824562e-05, |
|
"loss": 2.2797, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 2.8554604053497314, |
|
"eval_runtime": 54.9944, |
|
"eval_samples_per_second": 120.54, |
|
"eval_steps_per_second": 1.891, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.485380116959065e-05, |
|
"loss": 2.2284, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 2.8280177116394043, |
|
"eval_runtime": 54.9695, |
|
"eval_samples_per_second": 120.594, |
|
"eval_steps_per_second": 1.892, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.4619883040935676e-05, |
|
"loss": 2.1651, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 2.7877776622772217, |
|
"eval_runtime": 54.9786, |
|
"eval_samples_per_second": 120.574, |
|
"eval_steps_per_second": 1.892, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.43859649122807e-05, |
|
"loss": 2.1267, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 2.7796318531036377, |
|
"eval_runtime": 55.0112, |
|
"eval_samples_per_second": 120.503, |
|
"eval_steps_per_second": 1.891, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.4152046783625734e-05, |
|
"loss": 2.0887, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.7155935764312744, |
|
"eval_runtime": 54.9846, |
|
"eval_samples_per_second": 120.561, |
|
"eval_steps_per_second": 1.891, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.3918128654970766e-05, |
|
"loss": 2.0477, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 2.7347090244293213, |
|
"eval_runtime": 54.9797, |
|
"eval_samples_per_second": 120.572, |
|
"eval_steps_per_second": 1.892, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.368421052631579e-05, |
|
"loss": 2.0055, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 2.7260184288024902, |
|
"eval_runtime": 54.9686, |
|
"eval_samples_per_second": 120.596, |
|
"eval_steps_per_second": 1.892, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.345029239766082e-05, |
|
"loss": 1.9738, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.7053301334381104, |
|
"eval_runtime": 54.975, |
|
"eval_samples_per_second": 120.582, |
|
"eval_steps_per_second": 1.892, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.321637426900585e-05, |
|
"loss": 1.9336, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 2.6540746688842773, |
|
"eval_runtime": 54.9866, |
|
"eval_samples_per_second": 120.557, |
|
"eval_steps_per_second": 1.891, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.298245614035088e-05, |
|
"loss": 1.9008, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 2.6721866130828857, |
|
"eval_runtime": 54.9707, |
|
"eval_samples_per_second": 120.592, |
|
"eval_steps_per_second": 1.892, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.274853801169591e-05, |
|
"loss": 1.8603, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 2.6387619972229004, |
|
"eval_runtime": 54.9719, |
|
"eval_samples_per_second": 120.589, |
|
"eval_steps_per_second": 1.892, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.251461988304094e-05, |
|
"loss": 1.8291, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 2.640782594680786, |
|
"eval_runtime": 54.9643, |
|
"eval_samples_per_second": 120.606, |
|
"eval_steps_per_second": 1.892, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.228070175438597e-05, |
|
"loss": 1.8059, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 2.614128589630127, |
|
"eval_runtime": 54.9538, |
|
"eval_samples_per_second": 120.629, |
|
"eval_steps_per_second": 1.893, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.204678362573099e-05, |
|
"loss": 1.7663, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 2.618607997894287, |
|
"eval_runtime": 55.0051, |
|
"eval_samples_per_second": 120.516, |
|
"eval_steps_per_second": 1.891, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.1812865497076025e-05, |
|
"loss": 1.7322, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 2.6462574005126953, |
|
"eval_runtime": 54.9802, |
|
"eval_samples_per_second": 120.571, |
|
"eval_steps_per_second": 1.892, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.157894736842106e-05, |
|
"loss": 1.7187, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.5989272594451904, |
|
"eval_runtime": 54.9619, |
|
"eval_samples_per_second": 120.611, |
|
"eval_steps_per_second": 1.892, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.134502923976608e-05, |
|
"loss": 1.6852, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 2.5719058513641357, |
|
"eval_runtime": 54.9667, |
|
"eval_samples_per_second": 120.6, |
|
"eval_steps_per_second": 1.892, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.111111111111111e-05, |
|
"loss": 1.6649, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_loss": 2.57804012298584, |
|
"eval_runtime": 54.9675, |
|
"eval_samples_per_second": 120.598, |
|
"eval_steps_per_second": 1.892, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.087719298245614e-05, |
|
"loss": 1.6285, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.5606088638305664, |
|
"eval_runtime": 55.1929, |
|
"eval_samples_per_second": 120.106, |
|
"eval_steps_per_second": 1.884, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.0643274853801174e-05, |
|
"loss": 1.6033, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 2.570094585418701, |
|
"eval_runtime": 55.1572, |
|
"eval_samples_per_second": 120.184, |
|
"eval_steps_per_second": 1.886, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.04093567251462e-05, |
|
"loss": 1.5833, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 2.5516393184661865, |
|
"eval_runtime": 55.1223, |
|
"eval_samples_per_second": 120.26, |
|
"eval_steps_per_second": 1.887, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.017543859649123e-05, |
|
"loss": 1.5701, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 2.544060707092285, |
|
"eval_runtime": 54.9919, |
|
"eval_samples_per_second": 120.545, |
|
"eval_steps_per_second": 1.891, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.994152046783626e-05, |
|
"loss": 1.5252, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 2.545295476913452, |
|
"eval_runtime": 54.9924, |
|
"eval_samples_per_second": 120.544, |
|
"eval_steps_per_second": 1.891, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.970760233918129e-05, |
|
"loss": 1.5019, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 2.547807216644287, |
|
"eval_runtime": 55.007, |
|
"eval_samples_per_second": 120.512, |
|
"eval_steps_per_second": 1.891, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 1.4789, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 2.541635036468506, |
|
"eval_runtime": 54.9822, |
|
"eval_samples_per_second": 120.566, |
|
"eval_steps_per_second": 1.892, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.923976608187135e-05, |
|
"loss": 1.4611, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 2.526390790939331, |
|
"eval_runtime": 54.9826, |
|
"eval_samples_per_second": 120.565, |
|
"eval_steps_per_second": 1.892, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.9005847953216374e-05, |
|
"loss": 1.4413, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 2.5193886756896973, |
|
"eval_runtime": 54.9793, |
|
"eval_samples_per_second": 120.573, |
|
"eval_steps_per_second": 1.892, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.877192982456141e-05, |
|
"loss": 1.4106, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 2.504810094833374, |
|
"eval_runtime": 55.0248, |
|
"eval_samples_per_second": 120.473, |
|
"eval_steps_per_second": 1.89, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.853801169590643e-05, |
|
"loss": 1.3928, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 2.5266056060791016, |
|
"eval_runtime": 55.1287, |
|
"eval_samples_per_second": 120.246, |
|
"eval_steps_per_second": 1.886, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.8304093567251465e-05, |
|
"loss": 1.3857, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 2.5026743412017822, |
|
"eval_runtime": 55.0968, |
|
"eval_samples_per_second": 120.315, |
|
"eval_steps_per_second": 1.888, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.80701754385965e-05, |
|
"loss": 1.3682, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 2.5191988945007324, |
|
"eval_runtime": 55.0835, |
|
"eval_samples_per_second": 120.345, |
|
"eval_steps_per_second": 1.888, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.783625730994152e-05, |
|
"loss": 1.337, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 2.4917993545532227, |
|
"eval_runtime": 55.1615, |
|
"eval_samples_per_second": 120.175, |
|
"eval_steps_per_second": 1.885, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.760233918128655e-05, |
|
"loss": 1.3314, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 2.503882646560669, |
|
"eval_runtime": 55.1711, |
|
"eval_samples_per_second": 120.153, |
|
"eval_steps_per_second": 1.885, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.736842105263158e-05, |
|
"loss": 1.3213, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 2.5335164070129395, |
|
"eval_runtime": 55.1504, |
|
"eval_samples_per_second": 120.199, |
|
"eval_steps_per_second": 1.886, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.713450292397661e-05, |
|
"loss": 1.2901, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 2.5040109157562256, |
|
"eval_runtime": 55.1836, |
|
"eval_samples_per_second": 120.126, |
|
"eval_steps_per_second": 1.885, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.690058479532164e-05, |
|
"loss": 1.2927, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 2.4990580081939697, |
|
"eval_runtime": 55.1982, |
|
"eval_samples_per_second": 120.095, |
|
"eval_steps_per_second": 1.884, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 1.2631, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 2.500002861022949, |
|
"eval_runtime": 55.1671, |
|
"eval_samples_per_second": 120.162, |
|
"eval_steps_per_second": 1.885, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 3.64327485380117e-05, |
|
"loss": 1.2526, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 2.484260320663452, |
|
"eval_runtime": 55.0693, |
|
"eval_samples_per_second": 120.376, |
|
"eval_steps_per_second": 1.889, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.6198830409356724e-05, |
|
"loss": 1.2371, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 2.480639696121216, |
|
"eval_runtime": 55.0676, |
|
"eval_samples_per_second": 120.379, |
|
"eval_steps_per_second": 1.889, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.5964912280701756e-05, |
|
"loss": 1.2194, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 2.480283498764038, |
|
"eval_runtime": 54.981, |
|
"eval_samples_per_second": 120.569, |
|
"eval_steps_per_second": 1.892, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.573099415204679e-05, |
|
"loss": 1.2103, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_loss": 2.4655823707580566, |
|
"eval_runtime": 54.9896, |
|
"eval_samples_per_second": 120.55, |
|
"eval_steps_per_second": 1.891, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.5497076023391815e-05, |
|
"loss": 1.1954, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 2.467862367630005, |
|
"eval_runtime": 55.0349, |
|
"eval_samples_per_second": 120.451, |
|
"eval_steps_per_second": 1.89, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.526315789473684e-05, |
|
"loss": 1.1841, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 2.4734864234924316, |
|
"eval_runtime": 55.0767, |
|
"eval_samples_per_second": 120.359, |
|
"eval_steps_per_second": 1.888, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.502923976608187e-05, |
|
"loss": 1.1697, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 2.4691245555877686, |
|
"eval_runtime": 55.01, |
|
"eval_samples_per_second": 120.505, |
|
"eval_steps_per_second": 1.891, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.4795321637426905e-05, |
|
"loss": 1.1488, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_loss": 2.50709867477417, |
|
"eval_runtime": 55.0061, |
|
"eval_samples_per_second": 120.514, |
|
"eval_steps_per_second": 1.891, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.456140350877193e-05, |
|
"loss": 1.1343, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_loss": 2.464665412902832, |
|
"eval_runtime": 54.9972, |
|
"eval_samples_per_second": 120.533, |
|
"eval_steps_per_second": 1.891, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.432748538011696e-05, |
|
"loss": 1.1285, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 2.4716575145721436, |
|
"eval_runtime": 54.9735, |
|
"eval_samples_per_second": 120.585, |
|
"eval_steps_per_second": 1.892, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 3.409356725146199e-05, |
|
"loss": 1.1124, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_loss": 2.476966619491577, |
|
"eval_runtime": 55.0007, |
|
"eval_samples_per_second": 120.526, |
|
"eval_steps_per_second": 1.891, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.385964912280702e-05, |
|
"loss": 1.1097, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 2.487794876098633, |
|
"eval_runtime": 54.9919, |
|
"eval_samples_per_second": 120.545, |
|
"eval_steps_per_second": 1.891, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.362573099415205e-05, |
|
"loss": 1.0956, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_loss": 2.4818880558013916, |
|
"eval_runtime": 55.0269, |
|
"eval_samples_per_second": 120.468, |
|
"eval_steps_per_second": 1.89, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.339181286549708e-05, |
|
"loss": 1.088, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"eval_loss": 2.4609289169311523, |
|
"eval_runtime": 54.9477, |
|
"eval_samples_per_second": 120.642, |
|
"eval_steps_per_second": 1.893, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.3157894736842106e-05, |
|
"loss": 1.0728, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 2.4839322566986084, |
|
"eval_runtime": 54.9672, |
|
"eval_samples_per_second": 120.599, |
|
"eval_steps_per_second": 1.892, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 3.292397660818713e-05, |
|
"loss": 1.0587, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 2.4727675914764404, |
|
"eval_runtime": 55.0507, |
|
"eval_samples_per_second": 120.416, |
|
"eval_steps_per_second": 1.889, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.2690058479532164e-05, |
|
"loss": 1.0534, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_loss": 2.4812207221984863, |
|
"eval_runtime": 54.9899, |
|
"eval_samples_per_second": 120.549, |
|
"eval_steps_per_second": 1.891, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.24561403508772e-05, |
|
"loss": 1.0455, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_loss": 2.469550609588623, |
|
"eval_runtime": 54.9765, |
|
"eval_samples_per_second": 120.579, |
|
"eval_steps_per_second": 1.892, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 3.222222222222223e-05, |
|
"loss": 1.0402, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_loss": 2.458113431930542, |
|
"eval_runtime": 54.9925, |
|
"eval_samples_per_second": 120.544, |
|
"eval_steps_per_second": 1.891, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 3.198830409356725e-05, |
|
"loss": 1.0227, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 2.4712133407592773, |
|
"eval_runtime": 54.9707, |
|
"eval_samples_per_second": 120.592, |
|
"eval_steps_per_second": 1.892, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.175438596491228e-05, |
|
"loss": 1.0172, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 2.4822046756744385, |
|
"eval_runtime": 54.9842, |
|
"eval_samples_per_second": 120.562, |
|
"eval_steps_per_second": 1.891, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 3.152046783625731e-05, |
|
"loss": 0.9947, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_loss": 2.455008029937744, |
|
"eval_runtime": 54.9636, |
|
"eval_samples_per_second": 120.607, |
|
"eval_steps_per_second": 1.892, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.128654970760234e-05, |
|
"loss": 0.9924, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 2.440960168838501, |
|
"eval_runtime": 54.9708, |
|
"eval_samples_per_second": 120.591, |
|
"eval_steps_per_second": 1.892, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 3.105263157894737e-05, |
|
"loss": 0.9863, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_loss": 2.454493761062622, |
|
"eval_runtime": 54.966, |
|
"eval_samples_per_second": 120.602, |
|
"eval_steps_per_second": 1.892, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 3.08187134502924e-05, |
|
"loss": 0.9793, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 2.482584238052368, |
|
"eval_runtime": 55.0651, |
|
"eval_samples_per_second": 120.385, |
|
"eval_steps_per_second": 1.889, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 3.058479532163743e-05, |
|
"loss": 0.9639, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_loss": 2.4847776889801025, |
|
"eval_runtime": 55.089, |
|
"eval_samples_per_second": 120.332, |
|
"eval_steps_per_second": 1.888, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.035087719298246e-05, |
|
"loss": 0.9584, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 2.4647934436798096, |
|
"eval_runtime": 55.1206, |
|
"eval_samples_per_second": 120.263, |
|
"eval_steps_per_second": 1.887, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 3.0116959064327488e-05, |
|
"loss": 0.9508, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_loss": 2.445103406906128, |
|
"eval_runtime": 55.0978, |
|
"eval_samples_per_second": 120.313, |
|
"eval_steps_per_second": 1.888, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 2.9883040935672517e-05, |
|
"loss": 0.9425, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_loss": 2.4363410472869873, |
|
"eval_runtime": 55.0773, |
|
"eval_samples_per_second": 120.358, |
|
"eval_steps_per_second": 1.888, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.9649122807017543e-05, |
|
"loss": 0.9301, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"eval_loss": 2.4576821327209473, |
|
"eval_runtime": 55.0908, |
|
"eval_samples_per_second": 120.329, |
|
"eval_steps_per_second": 1.888, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.9415204678362572e-05, |
|
"loss": 0.922, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_loss": 2.487666130065918, |
|
"eval_runtime": 55.1028, |
|
"eval_samples_per_second": 120.302, |
|
"eval_steps_per_second": 1.887, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 2.9181286549707604e-05, |
|
"loss": 0.9102, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_loss": 2.462902784347534, |
|
"eval_runtime": 55.0955, |
|
"eval_samples_per_second": 120.318, |
|
"eval_steps_per_second": 1.888, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 2.8947368421052634e-05, |
|
"loss": 0.9081, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"eval_loss": 2.4494595527648926, |
|
"eval_runtime": 55.0849, |
|
"eval_samples_per_second": 120.341, |
|
"eval_steps_per_second": 1.888, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.8713450292397666e-05, |
|
"loss": 0.8956, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_loss": 2.466681718826294, |
|
"eval_runtime": 55.0767, |
|
"eval_samples_per_second": 120.359, |
|
"eval_steps_per_second": 1.888, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.847953216374269e-05, |
|
"loss": 0.8932, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"eval_loss": 2.4637372493743896, |
|
"eval_runtime": 55.0713, |
|
"eval_samples_per_second": 120.371, |
|
"eval_steps_per_second": 1.888, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.824561403508772e-05, |
|
"loss": 0.8845, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 2.4586174488067627, |
|
"eval_runtime": 55.0741, |
|
"eval_samples_per_second": 120.365, |
|
"eval_steps_per_second": 1.888, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.801169590643275e-05, |
|
"loss": 0.877, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_loss": 2.471717357635498, |
|
"eval_runtime": 55.0727, |
|
"eval_samples_per_second": 120.368, |
|
"eval_steps_per_second": 1.888, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.8713, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_loss": 2.4618284702301025, |
|
"eval_runtime": 55.0799, |
|
"eval_samples_per_second": 120.352, |
|
"eval_steps_per_second": 1.888, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 2.754385964912281e-05, |
|
"loss": 0.8768, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"eval_loss": 2.4480040073394775, |
|
"eval_runtime": 55.1696, |
|
"eval_samples_per_second": 120.157, |
|
"eval_steps_per_second": 1.885, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 2.7309941520467834e-05, |
|
"loss": 0.8662, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_loss": 2.468902349472046, |
|
"eval_runtime": 55.1714, |
|
"eval_samples_per_second": 120.153, |
|
"eval_steps_per_second": 1.885, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 2.7076023391812866e-05, |
|
"loss": 0.8622, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 2.4613983631134033, |
|
"eval_runtime": 55.1613, |
|
"eval_samples_per_second": 120.175, |
|
"eval_steps_per_second": 1.885, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 2.6842105263157896e-05, |
|
"loss": 0.8497, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_loss": 2.488284111022949, |
|
"eval_runtime": 55.1664, |
|
"eval_samples_per_second": 120.164, |
|
"eval_steps_per_second": 1.885, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 2.6608187134502928e-05, |
|
"loss": 0.8399, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"eval_loss": 2.486598253250122, |
|
"eval_runtime": 55.142, |
|
"eval_samples_per_second": 120.217, |
|
"eval_steps_per_second": 1.886, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.6374269005847957e-05, |
|
"loss": 0.8397, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_loss": 2.490933895111084, |
|
"eval_runtime": 55.1377, |
|
"eval_samples_per_second": 120.226, |
|
"eval_steps_per_second": 1.886, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.6140350877192983e-05, |
|
"loss": 0.8266, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_loss": 2.4587643146514893, |
|
"eval_runtime": 55.0944, |
|
"eval_samples_per_second": 120.321, |
|
"eval_steps_per_second": 1.888, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 2.5906432748538012e-05, |
|
"loss": 0.8231, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"eval_loss": 2.4951488971710205, |
|
"eval_runtime": 55.155, |
|
"eval_samples_per_second": 120.189, |
|
"eval_steps_per_second": 1.886, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 2.567251461988304e-05, |
|
"loss": 0.8189, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"eval_loss": 2.458134889602661, |
|
"eval_runtime": 55.0735, |
|
"eval_samples_per_second": 120.366, |
|
"eval_steps_per_second": 1.888, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 2.5438596491228074e-05, |
|
"loss": 0.8155, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"eval_loss": 2.448225736618042, |
|
"eval_runtime": 55.0955, |
|
"eval_samples_per_second": 120.318, |
|
"eval_steps_per_second": 1.888, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 2.5204678362573103e-05, |
|
"loss": 0.8059, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_loss": 2.489133358001709, |
|
"eval_runtime": 55.1106, |
|
"eval_samples_per_second": 120.285, |
|
"eval_steps_per_second": 1.887, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 2.4970760233918132e-05, |
|
"loss": 0.8085, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_loss": 2.491405487060547, |
|
"eval_runtime": 55.0557, |
|
"eval_samples_per_second": 120.405, |
|
"eval_steps_per_second": 1.889, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.4736842105263158e-05, |
|
"loss": 0.7851, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 2.486567735671997, |
|
"eval_runtime": 55.0714, |
|
"eval_samples_per_second": 120.371, |
|
"eval_steps_per_second": 1.888, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 2.450292397660819e-05, |
|
"loss": 0.7827, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_loss": 2.480097532272339, |
|
"eval_runtime": 55.0814, |
|
"eval_samples_per_second": 120.349, |
|
"eval_steps_per_second": 1.888, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 2.4269005847953216e-05, |
|
"loss": 0.7813, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 2.4855968952178955, |
|
"eval_runtime": 55.078, |
|
"eval_samples_per_second": 120.357, |
|
"eval_steps_per_second": 1.888, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 2.4035087719298245e-05, |
|
"loss": 0.7829, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_loss": 2.462341785430908, |
|
"eval_runtime": 55.0705, |
|
"eval_samples_per_second": 120.373, |
|
"eval_steps_per_second": 1.888, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 2.3801169590643278e-05, |
|
"loss": 0.7724, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 2.478029251098633, |
|
"eval_runtime": 55.0837, |
|
"eval_samples_per_second": 120.344, |
|
"eval_steps_per_second": 1.888, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 2.3567251461988303e-05, |
|
"loss": 0.7646, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"eval_loss": 2.4587323665618896, |
|
"eval_runtime": 55.1053, |
|
"eval_samples_per_second": 120.297, |
|
"eval_steps_per_second": 1.887, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.7604, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_loss": 2.453984498977661, |
|
"eval_runtime": 55.0903, |
|
"eval_samples_per_second": 120.33, |
|
"eval_steps_per_second": 1.888, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 2.309941520467836e-05, |
|
"loss": 0.7518, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"eval_loss": 2.488924026489258, |
|
"eval_runtime": 55.1009, |
|
"eval_samples_per_second": 120.307, |
|
"eval_steps_per_second": 1.887, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 2.2865497076023394e-05, |
|
"loss": 0.7515, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"eval_loss": 2.4510860443115234, |
|
"eval_runtime": 55.088, |
|
"eval_samples_per_second": 120.335, |
|
"eval_steps_per_second": 1.888, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 2.2631578947368423e-05, |
|
"loss": 0.7511, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"eval_loss": 2.468933343887329, |
|
"eval_runtime": 55.0676, |
|
"eval_samples_per_second": 120.379, |
|
"eval_steps_per_second": 1.889, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 2.2397660818713452e-05, |
|
"loss": 0.7424, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"eval_loss": 2.4676008224487305, |
|
"eval_runtime": 55.1052, |
|
"eval_samples_per_second": 120.297, |
|
"eval_steps_per_second": 1.887, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 2.216374269005848e-05, |
|
"loss": 0.7327, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 2.482384443283081, |
|
"eval_runtime": 55.0883, |
|
"eval_samples_per_second": 120.334, |
|
"eval_steps_per_second": 1.888, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 2.1929824561403507e-05, |
|
"loss": 0.7349, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_loss": 2.450364351272583, |
|
"eval_runtime": 55.0642, |
|
"eval_samples_per_second": 120.387, |
|
"eval_steps_per_second": 1.889, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 2.169590643274854e-05, |
|
"loss": 0.7307, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 2.4753456115722656, |
|
"eval_runtime": 55.0827, |
|
"eval_samples_per_second": 120.346, |
|
"eval_steps_per_second": 1.888, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 2.146198830409357e-05, |
|
"loss": 0.7269, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"eval_loss": 2.463690757751465, |
|
"eval_runtime": 55.087, |
|
"eval_samples_per_second": 120.337, |
|
"eval_steps_per_second": 1.888, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 2.1228070175438598e-05, |
|
"loss": 0.7175, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"eval_loss": 2.4744393825531006, |
|
"eval_runtime": 55.0809, |
|
"eval_samples_per_second": 120.35, |
|
"eval_steps_per_second": 1.888, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 2.0994152046783627e-05, |
|
"loss": 0.7178, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"eval_loss": 2.4851980209350586, |
|
"eval_runtime": 55.0877, |
|
"eval_samples_per_second": 120.335, |
|
"eval_steps_per_second": 1.888, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 2.0760233918128656e-05, |
|
"loss": 0.7048, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"eval_loss": 2.5102007389068604, |
|
"eval_runtime": 55.1078, |
|
"eval_samples_per_second": 120.291, |
|
"eval_steps_per_second": 1.887, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 2.0526315789473685e-05, |
|
"loss": 0.7072, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"eval_loss": 2.5026237964630127, |
|
"eval_runtime": 55.1176, |
|
"eval_samples_per_second": 120.27, |
|
"eval_steps_per_second": 1.887, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 2.0292397660818714e-05, |
|
"loss": 0.7054, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_loss": 2.4804298877716064, |
|
"eval_runtime": 55.0663, |
|
"eval_samples_per_second": 120.382, |
|
"eval_steps_per_second": 1.889, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 2.0058479532163744e-05, |
|
"loss": 0.7019, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_loss": 2.4398744106292725, |
|
"eval_runtime": 54.9972, |
|
"eval_samples_per_second": 120.533, |
|
"eval_steps_per_second": 1.891, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 1.9824561403508773e-05, |
|
"loss": 0.6942, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_loss": 2.4618844985961914, |
|
"eval_runtime": 55.1004, |
|
"eval_samples_per_second": 120.308, |
|
"eval_steps_per_second": 1.887, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.9590643274853802e-05, |
|
"loss": 0.6842, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"eval_loss": 2.496403217315674, |
|
"eval_runtime": 55.0871, |
|
"eval_samples_per_second": 120.337, |
|
"eval_steps_per_second": 1.888, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 1.935672514619883e-05, |
|
"loss": 0.6859, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 2.483705520629883, |
|
"eval_runtime": 55.0869, |
|
"eval_samples_per_second": 120.337, |
|
"eval_steps_per_second": 1.888, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 1.9122807017543863e-05, |
|
"loss": 0.6742, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"eval_loss": 2.489377498626709, |
|
"eval_runtime": 55.1198, |
|
"eval_samples_per_second": 120.265, |
|
"eval_steps_per_second": 1.887, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.6818, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_loss": 2.507904052734375, |
|
"eval_runtime": 55.1222, |
|
"eval_samples_per_second": 120.26, |
|
"eval_steps_per_second": 1.887, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 1.8654970760233918e-05, |
|
"loss": 0.6742, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 2.4935832023620605, |
|
"eval_runtime": 55.1223, |
|
"eval_samples_per_second": 120.26, |
|
"eval_steps_per_second": 1.887, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 1.8421052631578947e-05, |
|
"loss": 0.6756, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"eval_loss": 2.512763023376465, |
|
"eval_runtime": 55.167, |
|
"eval_samples_per_second": 120.162, |
|
"eval_steps_per_second": 1.885, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 1.8187134502923976e-05, |
|
"loss": 0.6635, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_loss": 2.5170469284057617, |
|
"eval_runtime": 55.1756, |
|
"eval_samples_per_second": 120.144, |
|
"eval_steps_per_second": 1.885, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 1.795321637426901e-05, |
|
"loss": 0.6645, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_loss": 2.5008370876312256, |
|
"eval_runtime": 55.1095, |
|
"eval_samples_per_second": 120.288, |
|
"eval_steps_per_second": 1.887, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 1.7719298245614035e-05, |
|
"loss": 0.6617, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"eval_loss": 2.503709316253662, |
|
"eval_runtime": 55.1047, |
|
"eval_samples_per_second": 120.298, |
|
"eval_steps_per_second": 1.887, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 1.7485380116959067e-05, |
|
"loss": 0.6574, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_loss": 2.4953572750091553, |
|
"eval_runtime": 55.0727, |
|
"eval_samples_per_second": 120.368, |
|
"eval_steps_per_second": 1.888, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1.7251461988304093e-05, |
|
"loss": 0.6519, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_loss": 2.519571304321289, |
|
"eval_runtime": 55.1072, |
|
"eval_samples_per_second": 120.293, |
|
"eval_steps_per_second": 1.887, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.7017543859649125e-05, |
|
"loss": 0.6453, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 2.485342502593994, |
|
"eval_runtime": 55.0939, |
|
"eval_samples_per_second": 120.322, |
|
"eval_steps_per_second": 1.888, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 1.6783625730994155e-05, |
|
"loss": 0.6445, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"eval_loss": 2.485079765319824, |
|
"eval_runtime": 55.093, |
|
"eval_samples_per_second": 120.324, |
|
"eval_steps_per_second": 1.888, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 1.654970760233918e-05, |
|
"loss": 0.643, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"eval_loss": 2.4923973083496094, |
|
"eval_runtime": 55.1032, |
|
"eval_samples_per_second": 120.302, |
|
"eval_steps_per_second": 1.887, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.6315789473684213e-05, |
|
"loss": 0.6373, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 2.5037529468536377, |
|
"eval_runtime": 55.0798, |
|
"eval_samples_per_second": 120.353, |
|
"eval_steps_per_second": 1.888, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.608187134502924e-05, |
|
"loss": 0.6292, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"eval_loss": 2.488449811935425, |
|
"eval_runtime": 55.097, |
|
"eval_samples_per_second": 120.315, |
|
"eval_steps_per_second": 1.888, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 1.584795321637427e-05, |
|
"loss": 0.6386, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"eval_loss": 2.482603073120117, |
|
"eval_runtime": 55.1088, |
|
"eval_samples_per_second": 120.289, |
|
"eval_steps_per_second": 1.887, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 1.56140350877193e-05, |
|
"loss": 0.6357, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"eval_loss": 2.482375144958496, |
|
"eval_runtime": 55.1247, |
|
"eval_samples_per_second": 120.255, |
|
"eval_steps_per_second": 1.887, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 1.538011695906433e-05, |
|
"loss": 0.6251, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"eval_loss": 2.4937736988067627, |
|
"eval_runtime": 55.1287, |
|
"eval_samples_per_second": 120.246, |
|
"eval_steps_per_second": 1.886, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 1.5146198830409358e-05, |
|
"loss": 0.624, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_loss": 2.5023653507232666, |
|
"eval_runtime": 55.1273, |
|
"eval_samples_per_second": 120.249, |
|
"eval_steps_per_second": 1.887, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.4912280701754386e-05, |
|
"loss": 0.6238, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_loss": 2.520798444747925, |
|
"eval_runtime": 55.0799, |
|
"eval_samples_per_second": 120.352, |
|
"eval_steps_per_second": 1.888, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.4678362573099417e-05, |
|
"loss": 0.6165, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"eval_loss": 2.5339748859405518, |
|
"eval_runtime": 55.1169, |
|
"eval_samples_per_second": 120.272, |
|
"eval_steps_per_second": 1.887, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 1.4444444444444444e-05, |
|
"loss": 0.6119, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 2.5113964080810547, |
|
"eval_runtime": 55.0891, |
|
"eval_samples_per_second": 120.332, |
|
"eval_steps_per_second": 1.888, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 1.4210526315789475e-05, |
|
"loss": 0.6089, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"eval_loss": 2.52811861038208, |
|
"eval_runtime": 55.112, |
|
"eval_samples_per_second": 120.282, |
|
"eval_steps_per_second": 1.887, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.3976608187134504e-05, |
|
"loss": 0.6035, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_loss": 2.5194358825683594, |
|
"eval_runtime": 55.1145, |
|
"eval_samples_per_second": 120.277, |
|
"eval_steps_per_second": 1.887, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 1.3742690058479531e-05, |
|
"loss": 0.6018, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_loss": 2.5066628456115723, |
|
"eval_runtime": 55.0979, |
|
"eval_samples_per_second": 120.313, |
|
"eval_steps_per_second": 1.888, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1.3508771929824562e-05, |
|
"loss": 0.6016, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"eval_loss": 2.490973711013794, |
|
"eval_runtime": 54.9953, |
|
"eval_samples_per_second": 120.538, |
|
"eval_steps_per_second": 1.891, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 1.327485380116959e-05, |
|
"loss": 0.6013, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 2.489246368408203, |
|
"eval_runtime": 54.9888, |
|
"eval_samples_per_second": 120.552, |
|
"eval_steps_per_second": 1.891, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 1.304093567251462e-05, |
|
"loss": 0.5958, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"eval_loss": 2.528749704360962, |
|
"eval_runtime": 54.994, |
|
"eval_samples_per_second": 120.54, |
|
"eval_steps_per_second": 1.891, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 1.2807017543859651e-05, |
|
"loss": 0.5925, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_loss": 2.528515100479126, |
|
"eval_runtime": 54.9798, |
|
"eval_samples_per_second": 120.571, |
|
"eval_steps_per_second": 1.892, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 1.2573099415204679e-05, |
|
"loss": 0.5908, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"eval_loss": 2.510267734527588, |
|
"eval_runtime": 55.0014, |
|
"eval_samples_per_second": 120.524, |
|
"eval_steps_per_second": 1.891, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 1.2339181286549708e-05, |
|
"loss": 0.587, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"eval_loss": 2.533625602722168, |
|
"eval_runtime": 54.9987, |
|
"eval_samples_per_second": 120.53, |
|
"eval_steps_per_second": 1.891, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 1.2105263157894737e-05, |
|
"loss": 0.5851, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_loss": 2.538762331008911, |
|
"eval_runtime": 54.9696, |
|
"eval_samples_per_second": 120.594, |
|
"eval_steps_per_second": 1.892, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 1.1871345029239766e-05, |
|
"loss": 0.579, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"eval_loss": 2.5098183155059814, |
|
"eval_runtime": 54.9924, |
|
"eval_samples_per_second": 120.544, |
|
"eval_steps_per_second": 1.891, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 1.1637426900584795e-05, |
|
"loss": 0.5764, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"eval_loss": 2.5329983234405518, |
|
"eval_runtime": 55.0148, |
|
"eval_samples_per_second": 120.495, |
|
"eval_steps_per_second": 1.89, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 1.1403508771929824e-05, |
|
"loss": 0.5781, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"eval_loss": 2.512319803237915, |
|
"eval_runtime": 54.9674, |
|
"eval_samples_per_second": 120.599, |
|
"eval_steps_per_second": 1.892, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 1.1169590643274855e-05, |
|
"loss": 0.5758, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"eval_loss": 2.5034148693084717, |
|
"eval_runtime": 54.9854, |
|
"eval_samples_per_second": 120.559, |
|
"eval_steps_per_second": 1.891, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 1.0935672514619884e-05, |
|
"loss": 0.5792, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_loss": 2.525723934173584, |
|
"eval_runtime": 55.017, |
|
"eval_samples_per_second": 120.49, |
|
"eval_steps_per_second": 1.89, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 1.0701754385964913e-05, |
|
"loss": 0.5745, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"eval_loss": 2.526042938232422, |
|
"eval_runtime": 54.987, |
|
"eval_samples_per_second": 120.556, |
|
"eval_steps_per_second": 1.891, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 1.0467836257309941e-05, |
|
"loss": 0.5702, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"eval_loss": 2.5171217918395996, |
|
"eval_runtime": 54.976, |
|
"eval_samples_per_second": 120.58, |
|
"eval_steps_per_second": 1.892, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 1.023391812865497e-05, |
|
"loss": 0.5714, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": 2.509648323059082, |
|
"eval_runtime": 54.9828, |
|
"eval_samples_per_second": 120.565, |
|
"eval_steps_per_second": 1.892, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5692, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.4963207244873047, |
|
"eval_runtime": 54.9818, |
|
"eval_samples_per_second": 120.567, |
|
"eval_steps_per_second": 1.892, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 9.76608187134503e-06, |
|
"loss": 0.5541, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_loss": 2.5158822536468506, |
|
"eval_runtime": 54.9875, |
|
"eval_samples_per_second": 120.555, |
|
"eval_steps_per_second": 1.891, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 9.532163742690059e-06, |
|
"loss": 0.5609, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"eval_loss": 2.52651047706604, |
|
"eval_runtime": 54.9727, |
|
"eval_samples_per_second": 120.587, |
|
"eval_steps_per_second": 1.892, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 9.298245614035088e-06, |
|
"loss": 0.5567, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"eval_loss": 2.529944658279419, |
|
"eval_runtime": 54.9646, |
|
"eval_samples_per_second": 120.605, |
|
"eval_steps_per_second": 1.892, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 9.064327485380117e-06, |
|
"loss": 0.5593, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"eval_loss": 2.5352935791015625, |
|
"eval_runtime": 54.9969, |
|
"eval_samples_per_second": 120.534, |
|
"eval_steps_per_second": 1.891, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 8.830409356725146e-06, |
|
"loss": 0.5537, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"eval_loss": 2.5415403842926025, |
|
"eval_runtime": 54.9924, |
|
"eval_samples_per_second": 120.544, |
|
"eval_steps_per_second": 1.891, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 8.596491228070176e-06, |
|
"loss": 0.5465, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"eval_loss": 2.5204358100891113, |
|
"eval_runtime": 55.0062, |
|
"eval_samples_per_second": 120.514, |
|
"eval_steps_per_second": 1.891, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 8.362573099415205e-06, |
|
"loss": 0.548, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_loss": 2.5008552074432373, |
|
"eval_runtime": 54.9691, |
|
"eval_samples_per_second": 120.595, |
|
"eval_steps_per_second": 1.892, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 8.128654970760234e-06, |
|
"loss": 0.5477, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"eval_loss": 2.5255722999572754, |
|
"eval_runtime": 54.9912, |
|
"eval_samples_per_second": 120.547, |
|
"eval_steps_per_second": 1.891, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 7.894736842105263e-06, |
|
"loss": 0.5393, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"eval_loss": 2.49310564994812, |
|
"eval_runtime": 54.9871, |
|
"eval_samples_per_second": 120.556, |
|
"eval_steps_per_second": 1.891, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 7.660818713450294e-06, |
|
"loss": 0.5441, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"eval_loss": 2.5206234455108643, |
|
"eval_runtime": 54.9863, |
|
"eval_samples_per_second": 120.557, |
|
"eval_steps_per_second": 1.891, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 7.426900584795322e-06, |
|
"loss": 0.5419, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"eval_loss": 2.511657476425171, |
|
"eval_runtime": 54.9931, |
|
"eval_samples_per_second": 120.542, |
|
"eval_steps_per_second": 1.891, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.192982456140351e-06, |
|
"loss": 0.5377, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_loss": 2.534726142883301, |
|
"eval_runtime": 55.0074, |
|
"eval_samples_per_second": 120.511, |
|
"eval_steps_per_second": 1.891, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 6.95906432748538e-06, |
|
"loss": 0.5375, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"eval_loss": 2.4978044033050537, |
|
"eval_runtime": 55.0077, |
|
"eval_samples_per_second": 120.51, |
|
"eval_steps_per_second": 1.891, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 6.725146198830409e-06, |
|
"loss": 0.5375, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"eval_loss": 2.4929347038269043, |
|
"eval_runtime": 54.9953, |
|
"eval_samples_per_second": 120.537, |
|
"eval_steps_per_second": 1.891, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 6.4912280701754385e-06, |
|
"loss": 0.5354, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"eval_loss": 2.4908556938171387, |
|
"eval_runtime": 55.0037, |
|
"eval_samples_per_second": 120.519, |
|
"eval_steps_per_second": 1.891, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 6.2573099415204685e-06, |
|
"loss": 0.5318, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"eval_loss": 2.531054973602295, |
|
"eval_runtime": 54.9993, |
|
"eval_samples_per_second": 120.529, |
|
"eval_steps_per_second": 1.891, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.023391812865498e-06, |
|
"loss": 0.5338, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_loss": 2.5138602256774902, |
|
"eval_runtime": 54.9949, |
|
"eval_samples_per_second": 120.539, |
|
"eval_steps_per_second": 1.891, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 5.789473684210527e-06, |
|
"loss": 0.5247, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"eval_loss": 2.5182831287384033, |
|
"eval_runtime": 54.9996, |
|
"eval_samples_per_second": 120.528, |
|
"eval_steps_per_second": 1.891, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.5249, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_loss": 2.5073628425598145, |
|
"eval_runtime": 54.9824, |
|
"eval_samples_per_second": 120.566, |
|
"eval_steps_per_second": 1.892, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 5.321637426900585e-06, |
|
"loss": 0.5266, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"eval_loss": 2.5005078315734863, |
|
"eval_runtime": 54.9464, |
|
"eval_samples_per_second": 120.645, |
|
"eval_steps_per_second": 1.893, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 5.087719298245614e-06, |
|
"loss": 0.5279, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_loss": 2.5144731998443604, |
|
"eval_runtime": 54.9856, |
|
"eval_samples_per_second": 120.559, |
|
"eval_steps_per_second": 1.891, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.853801169590644e-06, |
|
"loss": 0.5231, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_loss": 2.5163862705230713, |
|
"eval_runtime": 54.965, |
|
"eval_samples_per_second": 120.604, |
|
"eval_steps_per_second": 1.892, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 4.619883040935673e-06, |
|
"loss": 0.5157, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"eval_loss": 2.4902589321136475, |
|
"eval_runtime": 54.9685, |
|
"eval_samples_per_second": 120.596, |
|
"eval_steps_per_second": 1.892, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 4.3859649122807014e-06, |
|
"loss": 0.5153, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"eval_loss": 2.5248496532440186, |
|
"eval_runtime": 55.0107, |
|
"eval_samples_per_second": 120.504, |
|
"eval_steps_per_second": 1.891, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 4.152046783625731e-06, |
|
"loss": 0.5238, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"eval_loss": 2.4956910610198975, |
|
"eval_runtime": 54.9681, |
|
"eval_samples_per_second": 120.597, |
|
"eval_steps_per_second": 1.892, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 3.9181286549707605e-06, |
|
"loss": 0.5229, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"eval_loss": 2.509634256362915, |
|
"eval_runtime": 55.0395, |
|
"eval_samples_per_second": 120.441, |
|
"eval_steps_per_second": 1.89, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 3.6842105263157892e-06, |
|
"loss": 0.5099, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 2.505375862121582, |
|
"eval_runtime": 54.9659, |
|
"eval_samples_per_second": 120.602, |
|
"eval_steps_per_second": 1.892, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 3.4502923976608188e-06, |
|
"loss": 0.5164, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"eval_loss": 2.512755870819092, |
|
"eval_runtime": 54.9727, |
|
"eval_samples_per_second": 120.587, |
|
"eval_steps_per_second": 1.892, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 3.216374269005848e-06, |
|
"loss": 0.5147, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"eval_loss": 2.5104758739471436, |
|
"eval_runtime": 54.9829, |
|
"eval_samples_per_second": 120.565, |
|
"eval_steps_per_second": 1.891, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 2.9824561403508774e-06, |
|
"loss": 0.5092, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"eval_loss": 2.5510807037353516, |
|
"eval_runtime": 54.9886, |
|
"eval_samples_per_second": 120.552, |
|
"eval_steps_per_second": 1.891, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 2.7485380116959066e-06, |
|
"loss": 0.5123, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"eval_loss": 2.4837098121643066, |
|
"eval_runtime": 54.9612, |
|
"eval_samples_per_second": 120.612, |
|
"eval_steps_per_second": 1.892, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 2.5146198830409357e-06, |
|
"loss": 0.5077, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"eval_loss": 2.5026121139526367, |
|
"eval_runtime": 55.0018, |
|
"eval_samples_per_second": 120.523, |
|
"eval_steps_per_second": 1.891, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 2.2807017543859652e-06, |
|
"loss": 0.5112, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"eval_loss": 2.514636278152466, |
|
"eval_runtime": 54.9811, |
|
"eval_samples_per_second": 120.569, |
|
"eval_steps_per_second": 1.892, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 2.0467836257309943e-06, |
|
"loss": 0.5033, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"eval_loss": 2.537416696548462, |
|
"eval_runtime": 54.983, |
|
"eval_samples_per_second": 120.565, |
|
"eval_steps_per_second": 1.891, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 1.8128654970760235e-06, |
|
"loss": 0.5111, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"eval_loss": 2.515895366668701, |
|
"eval_runtime": 54.9923, |
|
"eval_samples_per_second": 120.544, |
|
"eval_steps_per_second": 1.891, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 1.5789473684210528e-06, |
|
"loss": 0.5119, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"eval_loss": 2.5189149379730225, |
|
"eval_runtime": 54.9887, |
|
"eval_samples_per_second": 120.552, |
|
"eval_steps_per_second": 1.891, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 1.345029239766082e-06, |
|
"loss": 0.5022, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"eval_loss": 2.506300926208496, |
|
"eval_runtime": 54.9799, |
|
"eval_samples_per_second": 120.571, |
|
"eval_steps_per_second": 1.892, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 1.1111111111111112e-06, |
|
"loss": 0.5051, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_loss": 2.4811651706695557, |
|
"eval_runtime": 54.958, |
|
"eval_samples_per_second": 120.619, |
|
"eval_steps_per_second": 1.892, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 8.771929824561404e-07, |
|
"loss": 0.5028, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"eval_loss": 2.4914138317108154, |
|
"eval_runtime": 55.0024, |
|
"eval_samples_per_second": 120.522, |
|
"eval_steps_per_second": 1.891, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 6.432748538011697e-07, |
|
"loss": 0.5066, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"eval_loss": 2.5056285858154297, |
|
"eval_runtime": 54.9649, |
|
"eval_samples_per_second": 120.604, |
|
"eval_steps_per_second": 1.892, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 4.093567251461989e-07, |
|
"loss": 0.5058, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"eval_loss": 2.53446102142334, |
|
"eval_runtime": 54.9817, |
|
"eval_samples_per_second": 120.567, |
|
"eval_steps_per_second": 1.892, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 1.7543859649122808e-07, |
|
"loss": 0.507, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"eval_loss": 2.507356882095337, |
|
"eval_runtime": 55.001, |
|
"eval_samples_per_second": 120.525, |
|
"eval_steps_per_second": 1.891, |
|
"step": 213000 |
|
} |
|
], |
|
"max_steps": 213750, |
|
"num_train_epochs": 10, |
|
"total_flos": 4.4847043698061394e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|