|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.7790697674418605, |
|
"global_step": 1300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.1321089267730713, |
|
"eval_runtime": 5.3447, |
|
"eval_samples_per_second": 7.11, |
|
"eval_steps_per_second": 1.871, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.7755846977233887, |
|
"eval_runtime": 6.2618, |
|
"eval_samples_per_second": 6.069, |
|
"eval_steps_per_second": 1.597, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.661117672920227, |
|
"eval_runtime": 5.1457, |
|
"eval_samples_per_second": 7.385, |
|
"eval_steps_per_second": 1.943, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.6071234941482544, |
|
"eval_runtime": 6.3033, |
|
"eval_samples_per_second": 6.029, |
|
"eval_steps_per_second": 1.586, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.5763286352157593, |
|
"eval_runtime": 5.1664, |
|
"eval_samples_per_second": 7.355, |
|
"eval_steps_per_second": 1.936, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.54438316822052, |
|
"eval_runtime": 12.625, |
|
"eval_samples_per_second": 3.01, |
|
"eval_steps_per_second": 0.792, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.529874563217163, |
|
"eval_runtime": 5.1855, |
|
"eval_samples_per_second": 7.328, |
|
"eval_steps_per_second": 1.928, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.511551856994629, |
|
"eval_runtime": 5.1943, |
|
"eval_samples_per_second": 7.316, |
|
"eval_steps_per_second": 1.925, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.499739170074463, |
|
"eval_runtime": 5.1845, |
|
"eval_samples_per_second": 7.33, |
|
"eval_steps_per_second": 1.929, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.482990026473999, |
|
"eval_runtime": 5.191, |
|
"eval_samples_per_second": 7.32, |
|
"eval_steps_per_second": 1.926, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.4811052083969116, |
|
"eval_runtime": 5.2095, |
|
"eval_samples_per_second": 7.294, |
|
"eval_steps_per_second": 1.92, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.470807671546936, |
|
"eval_runtime": 5.1982, |
|
"eval_samples_per_second": 7.31, |
|
"eval_steps_per_second": 1.924, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.4635131359100342, |
|
"eval_runtime": 5.22, |
|
"eval_samples_per_second": 7.28, |
|
"eval_steps_per_second": 1.916, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.4525114297866821, |
|
"eval_runtime": 5.22, |
|
"eval_samples_per_second": 7.28, |
|
"eval_steps_per_second": 1.916, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.4475783109664917, |
|
"eval_runtime": 5.2098, |
|
"eval_samples_per_second": 7.294, |
|
"eval_steps_per_second": 1.919, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.4456068277359009, |
|
"eval_runtime": 5.2045, |
|
"eval_samples_per_second": 7.301, |
|
"eval_steps_per_second": 1.921, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.4422187805175781, |
|
"eval_runtime": 5.2064, |
|
"eval_samples_per_second": 7.299, |
|
"eval_steps_per_second": 1.921, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.4375722408294678, |
|
"eval_runtime": 5.2062, |
|
"eval_samples_per_second": 7.299, |
|
"eval_steps_per_second": 1.921, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.435781717300415, |
|
"eval_runtime": 5.2273, |
|
"eval_samples_per_second": 7.27, |
|
"eval_steps_per_second": 1.913, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.854651162790698e-06, |
|
"loss": 1.5857, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.4250125885009766, |
|
"eval_runtime": 5.2302, |
|
"eval_samples_per_second": 7.266, |
|
"eval_steps_per_second": 1.912, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.4228482246398926, |
|
"eval_runtime": 5.1419, |
|
"eval_samples_per_second": 7.39, |
|
"eval_steps_per_second": 1.945, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.41592538356781, |
|
"eval_runtime": 5.1705, |
|
"eval_samples_per_second": 7.349, |
|
"eval_steps_per_second": 1.934, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.4095944166183472, |
|
"eval_runtime": 5.1762, |
|
"eval_samples_per_second": 7.341, |
|
"eval_steps_per_second": 1.932, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.405040979385376, |
|
"eval_runtime": 5.1734, |
|
"eval_samples_per_second": 7.345, |
|
"eval_steps_per_second": 1.933, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.4017434120178223, |
|
"eval_runtime": 5.1936, |
|
"eval_samples_per_second": 7.317, |
|
"eval_steps_per_second": 1.925, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.3985930681228638, |
|
"eval_runtime": 5.1856, |
|
"eval_samples_per_second": 7.328, |
|
"eval_steps_per_second": 1.928, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.3949148654937744, |
|
"eval_runtime": 6.2986, |
|
"eval_samples_per_second": 6.033, |
|
"eval_steps_per_second": 1.588, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.3919830322265625, |
|
"eval_runtime": 5.193, |
|
"eval_samples_per_second": 7.318, |
|
"eval_steps_per_second": 1.926, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.389172077178955, |
|
"eval_runtime": 5.1972, |
|
"eval_samples_per_second": 7.312, |
|
"eval_steps_per_second": 1.924, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.3876895904541016, |
|
"eval_runtime": 5.215, |
|
"eval_samples_per_second": 7.287, |
|
"eval_steps_per_second": 1.918, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.387901782989502, |
|
"eval_runtime": 5.2122, |
|
"eval_samples_per_second": 7.291, |
|
"eval_steps_per_second": 1.919, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.3818577527999878, |
|
"eval_runtime": 5.2068, |
|
"eval_samples_per_second": 7.298, |
|
"eval_steps_per_second": 1.921, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.3780750036239624, |
|
"eval_runtime": 5.2225, |
|
"eval_samples_per_second": 7.276, |
|
"eval_steps_per_second": 1.915, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.374315857887268, |
|
"eval_runtime": 5.2084, |
|
"eval_samples_per_second": 7.296, |
|
"eval_steps_per_second": 1.92, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.372006893157959, |
|
"eval_runtime": 5.209, |
|
"eval_samples_per_second": 7.295, |
|
"eval_steps_per_second": 1.92, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.3736425638198853, |
|
"eval_runtime": 5.2311, |
|
"eval_samples_per_second": 7.264, |
|
"eval_steps_per_second": 1.912, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.3681703805923462, |
|
"eval_runtime": 5.2032, |
|
"eval_samples_per_second": 7.303, |
|
"eval_steps_per_second": 1.922, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.3626172542572021, |
|
"eval_runtime": 5.2168, |
|
"eval_samples_per_second": 7.284, |
|
"eval_steps_per_second": 1.917, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.3621835708618164, |
|
"eval_runtime": 5.2269, |
|
"eval_samples_per_second": 7.27, |
|
"eval_steps_per_second": 1.913, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.709302325581395e-06, |
|
"loss": 1.4034, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.364233374595642, |
|
"eval_runtime": 5.3231, |
|
"eval_samples_per_second": 7.139, |
|
"eval_steps_per_second": 1.879, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.3565268516540527, |
|
"eval_runtime": 5.1533, |
|
"eval_samples_per_second": 7.374, |
|
"eval_steps_per_second": 1.941, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.35232412815094, |
|
"eval_runtime": 14.9101, |
|
"eval_samples_per_second": 2.549, |
|
"eval_steps_per_second": 0.671, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.3503801822662354, |
|
"eval_runtime": 5.1561, |
|
"eval_samples_per_second": 7.37, |
|
"eval_steps_per_second": 1.939, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.3449465036392212, |
|
"eval_runtime": 5.1639, |
|
"eval_samples_per_second": 7.359, |
|
"eval_steps_per_second": 1.937, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.3519093990325928, |
|
"eval_runtime": 5.1765, |
|
"eval_samples_per_second": 7.341, |
|
"eval_steps_per_second": 1.932, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.352266550064087, |
|
"eval_runtime": 17.2964, |
|
"eval_samples_per_second": 2.197, |
|
"eval_steps_per_second": 0.578, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.3380497694015503, |
|
"eval_runtime": 5.1906, |
|
"eval_samples_per_second": 7.321, |
|
"eval_steps_per_second": 1.927, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.340211033821106, |
|
"eval_runtime": 6.4362, |
|
"eval_samples_per_second": 5.904, |
|
"eval_steps_per_second": 1.554, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.335599660873413, |
|
"eval_runtime": 5.1978, |
|
"eval_samples_per_second": 7.311, |
|
"eval_steps_per_second": 1.924, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.3313099145889282, |
|
"eval_runtime": 5.1628, |
|
"eval_samples_per_second": 7.36, |
|
"eval_steps_per_second": 1.937, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.334537386894226, |
|
"eval_runtime": 7.3524, |
|
"eval_samples_per_second": 5.168, |
|
"eval_steps_per_second": 1.36, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.3304086923599243, |
|
"eval_runtime": 5.1618, |
|
"eval_samples_per_second": 7.362, |
|
"eval_steps_per_second": 1.937, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.3271808624267578, |
|
"eval_runtime": 5.1719, |
|
"eval_samples_per_second": 7.347, |
|
"eval_steps_per_second": 1.934, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.324944019317627, |
|
"eval_runtime": 5.1801, |
|
"eval_samples_per_second": 7.336, |
|
"eval_steps_per_second": 1.93, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.3218427896499634, |
|
"eval_runtime": 5.1851, |
|
"eval_samples_per_second": 7.329, |
|
"eval_steps_per_second": 1.929, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.3217554092407227, |
|
"eval_runtime": 5.2075, |
|
"eval_samples_per_second": 7.297, |
|
"eval_steps_per_second": 1.92, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 1.3211233615875244, |
|
"eval_runtime": 5.198, |
|
"eval_samples_per_second": 7.31, |
|
"eval_steps_per_second": 1.924, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.3166344165802002, |
|
"eval_runtime": 6.1478, |
|
"eval_samples_per_second": 6.181, |
|
"eval_steps_per_second": 1.627, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 1.3159008026123047, |
|
"eval_runtime": 5.2103, |
|
"eval_samples_per_second": 7.293, |
|
"eval_steps_per_second": 1.919, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.563953488372094e-06, |
|
"loss": 1.3513, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.3075319528579712, |
|
"eval_runtime": 5.2012, |
|
"eval_samples_per_second": 7.306, |
|
"eval_steps_per_second": 1.923, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 1.3096569776535034, |
|
"eval_runtime": 5.144, |
|
"eval_samples_per_second": 7.387, |
|
"eval_steps_per_second": 1.944, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 1.3026896715164185, |
|
"eval_runtime": 5.1657, |
|
"eval_samples_per_second": 7.356, |
|
"eval_steps_per_second": 1.936, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 1.2962864637374878, |
|
"eval_runtime": 5.1706, |
|
"eval_samples_per_second": 7.349, |
|
"eval_steps_per_second": 1.934, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.294777512550354, |
|
"eval_runtime": 5.1816, |
|
"eval_samples_per_second": 7.334, |
|
"eval_steps_per_second": 1.93, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 1.2898850440979004, |
|
"eval_runtime": 6.0458, |
|
"eval_samples_per_second": 6.285, |
|
"eval_steps_per_second": 1.654, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.2885679006576538, |
|
"eval_runtime": 5.1898, |
|
"eval_samples_per_second": 7.322, |
|
"eval_steps_per_second": 1.927, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 1.288139820098877, |
|
"eval_runtime": 5.1979, |
|
"eval_samples_per_second": 7.311, |
|
"eval_steps_per_second": 1.924, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.281440258026123, |
|
"eval_runtime": 5.1891, |
|
"eval_samples_per_second": 7.323, |
|
"eval_steps_per_second": 1.927, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.2789274454116821, |
|
"eval_runtime": 5.2144, |
|
"eval_samples_per_second": 7.288, |
|
"eval_steps_per_second": 1.918, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.273104190826416, |
|
"eval_runtime": 5.1983, |
|
"eval_samples_per_second": 7.31, |
|
"eval_steps_per_second": 1.924, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 1.2735443115234375, |
|
"eval_runtime": 5.2215, |
|
"eval_samples_per_second": 7.278, |
|
"eval_steps_per_second": 1.915, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 1.2782071828842163, |
|
"eval_runtime": 5.6853, |
|
"eval_samples_per_second": 6.684, |
|
"eval_steps_per_second": 1.759, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 1.2720850706100464, |
|
"eval_runtime": 5.222, |
|
"eval_samples_per_second": 7.277, |
|
"eval_steps_per_second": 1.915, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 1.2730863094329834, |
|
"eval_runtime": 5.2055, |
|
"eval_samples_per_second": 7.3, |
|
"eval_steps_per_second": 1.921, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.2692456245422363, |
|
"eval_runtime": 7.9591, |
|
"eval_samples_per_second": 4.774, |
|
"eval_steps_per_second": 1.256, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 1.2623660564422607, |
|
"eval_runtime": 5.1922, |
|
"eval_samples_per_second": 7.319, |
|
"eval_steps_per_second": 1.926, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 1.264005422592163, |
|
"eval_runtime": 5.2073, |
|
"eval_samples_per_second": 7.297, |
|
"eval_steps_per_second": 1.92, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 1.264898419380188, |
|
"eval_runtime": 5.1964, |
|
"eval_samples_per_second": 7.313, |
|
"eval_steps_per_second": 1.924, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 1.2602505683898926, |
|
"eval_runtime": 5.2183, |
|
"eval_samples_per_second": 7.282, |
|
"eval_steps_per_second": 1.916, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 9.418604651162791e-06, |
|
"loss": 1.2432, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 1.2597508430480957, |
|
"eval_runtime": 5.2008, |
|
"eval_samples_per_second": 7.307, |
|
"eval_steps_per_second": 1.923, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 1.256094217300415, |
|
"eval_runtime": 5.1612, |
|
"eval_samples_per_second": 7.363, |
|
"eval_steps_per_second": 1.938, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 1.2484766244888306, |
|
"eval_runtime": 5.1459, |
|
"eval_samples_per_second": 7.384, |
|
"eval_steps_per_second": 1.943, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 1.2463735342025757, |
|
"eval_runtime": 5.1578, |
|
"eval_samples_per_second": 7.367, |
|
"eval_steps_per_second": 1.939, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 1.244008183479309, |
|
"eval_runtime": 5.1699, |
|
"eval_samples_per_second": 7.35, |
|
"eval_steps_per_second": 1.934, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.2463507652282715, |
|
"eval_runtime": 5.5355, |
|
"eval_samples_per_second": 6.865, |
|
"eval_steps_per_second": 1.807, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 1.23945152759552, |
|
"eval_runtime": 5.1986, |
|
"eval_samples_per_second": 7.31, |
|
"eval_steps_per_second": 1.924, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 1.2383465766906738, |
|
"eval_runtime": 5.2615, |
|
"eval_samples_per_second": 7.222, |
|
"eval_steps_per_second": 1.901, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 1.240387201309204, |
|
"eval_runtime": 5.2049, |
|
"eval_samples_per_second": 7.301, |
|
"eval_steps_per_second": 1.921, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 1.2330259084701538, |
|
"eval_runtime": 5.1959, |
|
"eval_samples_per_second": 7.313, |
|
"eval_steps_per_second": 1.925, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.2376052141189575, |
|
"eval_runtime": 5.1991, |
|
"eval_samples_per_second": 7.309, |
|
"eval_steps_per_second": 1.923, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 1.2420060634613037, |
|
"eval_runtime": 14.5358, |
|
"eval_samples_per_second": 2.614, |
|
"eval_steps_per_second": 0.688, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 1.2296521663665771, |
|
"eval_runtime": 7.0505, |
|
"eval_samples_per_second": 5.39, |
|
"eval_steps_per_second": 1.418, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 1.226386547088623, |
|
"eval_runtime": 5.1853, |
|
"eval_samples_per_second": 7.328, |
|
"eval_steps_per_second": 1.929, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 1.2350796461105347, |
|
"eval_runtime": 5.1891, |
|
"eval_samples_per_second": 7.323, |
|
"eval_steps_per_second": 1.927, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.2271411418914795, |
|
"eval_runtime": 5.187, |
|
"eval_samples_per_second": 7.326, |
|
"eval_steps_per_second": 1.928, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 1.2260706424713135, |
|
"eval_runtime": 5.1939, |
|
"eval_samples_per_second": 7.316, |
|
"eval_steps_per_second": 1.925, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 1.2366853952407837, |
|
"eval_runtime": 5.2167, |
|
"eval_samples_per_second": 7.284, |
|
"eval_steps_per_second": 1.917, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 1.2304943799972534, |
|
"eval_runtime": 5.2057, |
|
"eval_samples_per_second": 7.3, |
|
"eval_steps_per_second": 1.921, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 1.219275951385498, |
|
"eval_runtime": 5.1983, |
|
"eval_samples_per_second": 7.31, |
|
"eval_steps_per_second": 1.924, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 9.273255813953488e-06, |
|
"loss": 1.1933, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 1.220719575881958, |
|
"eval_runtime": 8.1461, |
|
"eval_samples_per_second": 4.665, |
|
"eval_steps_per_second": 1.228, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 1.2208986282348633, |
|
"eval_runtime": 5.1444, |
|
"eval_samples_per_second": 7.387, |
|
"eval_steps_per_second": 1.944, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 1.2238763570785522, |
|
"eval_runtime": 5.1637, |
|
"eval_samples_per_second": 7.359, |
|
"eval_steps_per_second": 1.937, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 1.219651699066162, |
|
"eval_runtime": 5.177, |
|
"eval_samples_per_second": 7.34, |
|
"eval_steps_per_second": 1.932, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 1.2128971815109253, |
|
"eval_runtime": 5.1833, |
|
"eval_samples_per_second": 7.331, |
|
"eval_steps_per_second": 1.929, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.2134791612625122, |
|
"eval_runtime": 6.8202, |
|
"eval_samples_per_second": 5.572, |
|
"eval_steps_per_second": 1.466, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 1.2144546508789062, |
|
"eval_runtime": 5.1795, |
|
"eval_samples_per_second": 7.337, |
|
"eval_steps_per_second": 1.931, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 1.2091519832611084, |
|
"eval_runtime": 5.1715, |
|
"eval_samples_per_second": 7.348, |
|
"eval_steps_per_second": 1.934, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 1.2074944972991943, |
|
"eval_runtime": 5.1783, |
|
"eval_samples_per_second": 7.338, |
|
"eval_steps_per_second": 1.931, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 1.2082455158233643, |
|
"eval_runtime": 5.1756, |
|
"eval_samples_per_second": 7.342, |
|
"eval_steps_per_second": 1.932, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.2062183618545532, |
|
"eval_runtime": 5.183, |
|
"eval_samples_per_second": 7.332, |
|
"eval_steps_per_second": 1.929, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 1.2009626626968384, |
|
"eval_runtime": 5.6305, |
|
"eval_samples_per_second": 6.749, |
|
"eval_steps_per_second": 1.776, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 1.200234293937683, |
|
"eval_runtime": 5.2064, |
|
"eval_samples_per_second": 7.299, |
|
"eval_steps_per_second": 1.921, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 1.1972352266311646, |
|
"eval_runtime": 5.3956, |
|
"eval_samples_per_second": 7.043, |
|
"eval_steps_per_second": 1.853, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 1.199525237083435, |
|
"eval_runtime": 5.2172, |
|
"eval_samples_per_second": 7.284, |
|
"eval_steps_per_second": 1.917, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 1.1978554725646973, |
|
"eval_runtime": 5.5745, |
|
"eval_samples_per_second": 6.817, |
|
"eval_steps_per_second": 1.794, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 1.1939880847930908, |
|
"eval_runtime": 5.2146, |
|
"eval_samples_per_second": 7.287, |
|
"eval_steps_per_second": 1.918, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 1.1886717081069946, |
|
"eval_runtime": 5.2166, |
|
"eval_samples_per_second": 7.284, |
|
"eval_steps_per_second": 1.917, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 1.1879463195800781, |
|
"eval_runtime": 5.2066, |
|
"eval_samples_per_second": 7.298, |
|
"eval_steps_per_second": 1.921, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 1.1874936819076538, |
|
"eval_runtime": 5.2058, |
|
"eval_samples_per_second": 7.3, |
|
"eval_steps_per_second": 1.921, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.127906976744186e-06, |
|
"loss": 1.1598, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 1.184380054473877, |
|
"eval_runtime": 5.2949, |
|
"eval_samples_per_second": 7.177, |
|
"eval_steps_per_second": 1.889, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 1.1831018924713135, |
|
"eval_runtime": 5.1436, |
|
"eval_samples_per_second": 7.388, |
|
"eval_steps_per_second": 1.944, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 1.1859960556030273, |
|
"eval_runtime": 5.1652, |
|
"eval_samples_per_second": 7.357, |
|
"eval_steps_per_second": 1.936, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 1.186689853668213, |
|
"eval_runtime": 5.1749, |
|
"eval_samples_per_second": 7.343, |
|
"eval_steps_per_second": 1.932, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 1.1849379539489746, |
|
"eval_runtime": 5.8205, |
|
"eval_samples_per_second": 6.529, |
|
"eval_steps_per_second": 1.718, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.1795214414596558, |
|
"eval_runtime": 5.1765, |
|
"eval_samples_per_second": 7.341, |
|
"eval_steps_per_second": 1.932, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 1.1802735328674316, |
|
"eval_runtime": 7.6116, |
|
"eval_samples_per_second": 4.992, |
|
"eval_steps_per_second": 1.314, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 1.179026484489441, |
|
"eval_runtime": 5.2022, |
|
"eval_samples_per_second": 7.305, |
|
"eval_steps_per_second": 1.922, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 1.1783987283706665, |
|
"eval_runtime": 5.1881, |
|
"eval_samples_per_second": 7.324, |
|
"eval_steps_per_second": 1.927, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 1.1762430667877197, |
|
"eval_runtime": 5.2146, |
|
"eval_samples_per_second": 7.287, |
|
"eval_steps_per_second": 1.918, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.180951476097107, |
|
"eval_runtime": 5.1987, |
|
"eval_samples_per_second": 7.31, |
|
"eval_steps_per_second": 1.924, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 1.175215244293213, |
|
"eval_runtime": 5.222, |
|
"eval_samples_per_second": 7.277, |
|
"eval_steps_per_second": 1.915, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 1.1738007068634033, |
|
"eval_runtime": 5.2057, |
|
"eval_samples_per_second": 7.3, |
|
"eval_steps_per_second": 1.921, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 1.1722891330718994, |
|
"eval_runtime": 5.1993, |
|
"eval_samples_per_second": 7.309, |
|
"eval_steps_per_second": 1.923, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 1.1736668348312378, |
|
"eval_runtime": 5.202, |
|
"eval_samples_per_second": 7.305, |
|
"eval_steps_per_second": 1.922, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 1.1691397428512573, |
|
"eval_runtime": 5.2043, |
|
"eval_samples_per_second": 7.302, |
|
"eval_steps_per_second": 1.922, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 1.1707478761672974, |
|
"eval_runtime": 5.2058, |
|
"eval_samples_per_second": 7.3, |
|
"eval_steps_per_second": 1.921, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 1.17750084400177, |
|
"eval_runtime": 5.2084, |
|
"eval_samples_per_second": 7.296, |
|
"eval_steps_per_second": 1.92, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 1.1693485975265503, |
|
"eval_runtime": 5.2105, |
|
"eval_samples_per_second": 7.293, |
|
"eval_steps_per_second": 1.919, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 1.174012541770935, |
|
"eval_runtime": 8.4066, |
|
"eval_samples_per_second": 4.52, |
|
"eval_steps_per_second": 1.19, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 8.982558139534884e-06, |
|
"loss": 1.1203, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 1.1877881288528442, |
|
"eval_runtime": 5.1939, |
|
"eval_samples_per_second": 7.316, |
|
"eval_steps_per_second": 1.925, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 1.1876509189605713, |
|
"eval_runtime": 5.1556, |
|
"eval_samples_per_second": 7.371, |
|
"eval_steps_per_second": 1.94, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 1.1674307584762573, |
|
"eval_runtime": 5.149, |
|
"eval_samples_per_second": 7.38, |
|
"eval_steps_per_second": 1.942, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 1.167423129081726, |
|
"eval_runtime": 5.1595, |
|
"eval_samples_per_second": 7.365, |
|
"eval_steps_per_second": 1.938, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 1.177182674407959, |
|
"eval_runtime": 8.3224, |
|
"eval_samples_per_second": 4.566, |
|
"eval_steps_per_second": 1.202, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.1748428344726562, |
|
"eval_runtime": 5.1694, |
|
"eval_samples_per_second": 7.351, |
|
"eval_steps_per_second": 1.934, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 1.1770687103271484, |
|
"eval_runtime": 6.4645, |
|
"eval_samples_per_second": 5.878, |
|
"eval_steps_per_second": 1.547, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 1.1737899780273438, |
|
"eval_runtime": 5.1852, |
|
"eval_samples_per_second": 7.329, |
|
"eval_steps_per_second": 1.929, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 1.164935827255249, |
|
"eval_runtime": 5.1999, |
|
"eval_samples_per_second": 7.308, |
|
"eval_steps_per_second": 1.923, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 1.1611236333847046, |
|
"eval_runtime": 5.1943, |
|
"eval_samples_per_second": 7.316, |
|
"eval_steps_per_second": 1.925, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 1.16485595703125, |
|
"eval_runtime": 5.2046, |
|
"eval_samples_per_second": 7.301, |
|
"eval_steps_per_second": 1.921, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.1628483533859253, |
|
"eval_runtime": 5.1977, |
|
"eval_samples_per_second": 7.311, |
|
"eval_steps_per_second": 1.924, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_loss": 1.1594696044921875, |
|
"eval_runtime": 5.1698, |
|
"eval_samples_per_second": 7.35, |
|
"eval_steps_per_second": 1.934, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 1.1594704389572144, |
|
"eval_runtime": 5.1839, |
|
"eval_samples_per_second": 7.33, |
|
"eval_steps_per_second": 1.929, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 1.1618760824203491, |
|
"eval_runtime": 5.1971, |
|
"eval_samples_per_second": 7.312, |
|
"eval_steps_per_second": 1.924, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 1.1532135009765625, |
|
"eval_runtime": 5.1859, |
|
"eval_samples_per_second": 7.327, |
|
"eval_steps_per_second": 1.928, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 1.148254156112671, |
|
"eval_runtime": 5.1949, |
|
"eval_samples_per_second": 7.315, |
|
"eval_steps_per_second": 1.925, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 1.1526851654052734, |
|
"eval_runtime": 5.2153, |
|
"eval_samples_per_second": 7.286, |
|
"eval_steps_per_second": 1.917, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_loss": 1.1553150415420532, |
|
"eval_runtime": 7.2932, |
|
"eval_samples_per_second": 5.21, |
|
"eval_steps_per_second": 1.371, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 1.1568715572357178, |
|
"eval_runtime": 5.2156, |
|
"eval_samples_per_second": 7.286, |
|
"eval_steps_per_second": 1.917, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 8.837209302325582e-06, |
|
"loss": 1.0236, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 1.1593568325042725, |
|
"eval_runtime": 15.4054, |
|
"eval_samples_per_second": 2.467, |
|
"eval_steps_per_second": 0.649, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 1.1579800844192505, |
|
"eval_runtime": 5.1382, |
|
"eval_samples_per_second": 7.396, |
|
"eval_steps_per_second": 1.946, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 1.1529394388198853, |
|
"eval_runtime": 5.1435, |
|
"eval_samples_per_second": 7.388, |
|
"eval_steps_per_second": 1.944, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 1.1465649604797363, |
|
"eval_runtime": 5.1543, |
|
"eval_samples_per_second": 7.372, |
|
"eval_steps_per_second": 1.94, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 1.1523265838623047, |
|
"eval_runtime": 5.1668, |
|
"eval_samples_per_second": 7.355, |
|
"eval_steps_per_second": 1.935, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.1473236083984375, |
|
"eval_runtime": 5.1929, |
|
"eval_samples_per_second": 7.318, |
|
"eval_steps_per_second": 1.926, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 1.1426836252212524, |
|
"eval_runtime": 5.1805, |
|
"eval_samples_per_second": 7.335, |
|
"eval_steps_per_second": 1.93, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 1.1456120014190674, |
|
"eval_runtime": 5.1879, |
|
"eval_samples_per_second": 7.325, |
|
"eval_steps_per_second": 1.928, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_loss": 1.1546478271484375, |
|
"eval_runtime": 6.9177, |
|
"eval_samples_per_second": 5.493, |
|
"eval_steps_per_second": 1.446, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 1.150542140007019, |
|
"eval_runtime": 5.2006, |
|
"eval_samples_per_second": 7.307, |
|
"eval_steps_per_second": 1.923, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 1.1451619863510132, |
|
"eval_runtime": 5.209, |
|
"eval_samples_per_second": 7.295, |
|
"eval_steps_per_second": 1.92, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 1.1448664665222168, |
|
"eval_runtime": 5.1984, |
|
"eval_samples_per_second": 7.31, |
|
"eval_steps_per_second": 1.924, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 1.142190933227539, |
|
"eval_runtime": 5.2014, |
|
"eval_samples_per_second": 7.306, |
|
"eval_steps_per_second": 1.923, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 1.1360749006271362, |
|
"eval_runtime": 5.213, |
|
"eval_samples_per_second": 7.289, |
|
"eval_steps_per_second": 1.918, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 1.1361669301986694, |
|
"eval_runtime": 5.225, |
|
"eval_samples_per_second": 7.273, |
|
"eval_steps_per_second": 1.914, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 1.152858018875122, |
|
"eval_runtime": 5.2072, |
|
"eval_samples_per_second": 7.298, |
|
"eval_steps_per_second": 1.92, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 1.1414388418197632, |
|
"eval_runtime": 5.2264, |
|
"eval_samples_per_second": 7.271, |
|
"eval_steps_per_second": 1.913, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 1.1355433464050293, |
|
"eval_runtime": 5.2274, |
|
"eval_samples_per_second": 7.269, |
|
"eval_steps_per_second": 1.913, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 1.140533447265625, |
|
"eval_runtime": 5.2091, |
|
"eval_samples_per_second": 7.295, |
|
"eval_steps_per_second": 1.92, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 1.1398884057998657, |
|
"eval_runtime": 5.2089, |
|
"eval_samples_per_second": 7.295, |
|
"eval_steps_per_second": 1.92, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 8.69186046511628e-06, |
|
"loss": 1.005, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.1313834190368652, |
|
"eval_runtime": 7.9714, |
|
"eval_samples_per_second": 4.767, |
|
"eval_steps_per_second": 1.254, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 1.1292839050292969, |
|
"eval_runtime": 5.1391, |
|
"eval_samples_per_second": 7.394, |
|
"eval_steps_per_second": 1.946, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_loss": 1.1339421272277832, |
|
"eval_runtime": 5.1448, |
|
"eval_samples_per_second": 7.386, |
|
"eval_steps_per_second": 1.944, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 1.1322623491287231, |
|
"eval_runtime": 5.1737, |
|
"eval_samples_per_second": 7.345, |
|
"eval_steps_per_second": 1.933, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 1.1225138902664185, |
|
"eval_runtime": 5.1822, |
|
"eval_samples_per_second": 7.333, |
|
"eval_steps_per_second": 1.93, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 1.117431640625, |
|
"eval_runtime": 5.1739, |
|
"eval_samples_per_second": 7.345, |
|
"eval_steps_per_second": 1.933, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.1242172718048096, |
|
"eval_runtime": 7.2847, |
|
"eval_samples_per_second": 5.216, |
|
"eval_steps_per_second": 1.373, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 1.1318007707595825, |
|
"eval_runtime": 5.4547, |
|
"eval_samples_per_second": 6.966, |
|
"eval_steps_per_second": 1.833, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 1.1215720176696777, |
|
"eval_runtime": 5.1989, |
|
"eval_samples_per_second": 7.309, |
|
"eval_steps_per_second": 1.923, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 1.1199665069580078, |
|
"eval_runtime": 5.1934, |
|
"eval_samples_per_second": 7.317, |
|
"eval_steps_per_second": 1.926, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 1.1283185482025146, |
|
"eval_runtime": 5.2047, |
|
"eval_samples_per_second": 7.301, |
|
"eval_steps_per_second": 1.921, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 1.1272627115249634, |
|
"eval_runtime": 5.2028, |
|
"eval_samples_per_second": 7.304, |
|
"eval_steps_per_second": 1.922, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_loss": 1.116969347000122, |
|
"eval_runtime": 5.2048, |
|
"eval_samples_per_second": 7.301, |
|
"eval_steps_per_second": 1.921, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 1.1125539541244507, |
|
"eval_runtime": 5.2168, |
|
"eval_samples_per_second": 7.284, |
|
"eval_steps_per_second": 1.917, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 1.1195616722106934, |
|
"eval_runtime": 5.1986, |
|
"eval_samples_per_second": 7.31, |
|
"eval_steps_per_second": 1.924, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 1.1192817687988281, |
|
"eval_runtime": 5.2203, |
|
"eval_samples_per_second": 7.279, |
|
"eval_steps_per_second": 1.916, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_loss": 1.1130948066711426, |
|
"eval_runtime": 9.4606, |
|
"eval_samples_per_second": 4.017, |
|
"eval_steps_per_second": 1.057, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 1.1146584749221802, |
|
"eval_runtime": 5.2013, |
|
"eval_samples_per_second": 7.306, |
|
"eval_steps_per_second": 1.923, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_loss": 1.118652582168579, |
|
"eval_runtime": 5.201, |
|
"eval_samples_per_second": 7.306, |
|
"eval_steps_per_second": 1.923, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 1.1133984327316284, |
|
"eval_runtime": 5.2035, |
|
"eval_samples_per_second": 7.303, |
|
"eval_steps_per_second": 1.922, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.546511627906978e-06, |
|
"loss": 1.0003, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 1.1122275590896606, |
|
"eval_runtime": 5.5704, |
|
"eval_samples_per_second": 6.822, |
|
"eval_steps_per_second": 1.795, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 1.1242709159851074, |
|
"eval_runtime": 5.1423, |
|
"eval_samples_per_second": 7.39, |
|
"eval_steps_per_second": 1.945, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 1.1163734197616577, |
|
"eval_runtime": 5.159, |
|
"eval_samples_per_second": 7.366, |
|
"eval_steps_per_second": 1.938, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 1.1083950996398926, |
|
"eval_runtime": 5.1716, |
|
"eval_samples_per_second": 7.348, |
|
"eval_steps_per_second": 1.934, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_loss": 1.1112630367279053, |
|
"eval_runtime": 5.1839, |
|
"eval_samples_per_second": 7.33, |
|
"eval_steps_per_second": 1.929, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 1.1141904592514038, |
|
"eval_runtime": 5.1752, |
|
"eval_samples_per_second": 7.343, |
|
"eval_steps_per_second": 1.932, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 1.109470248222351, |
|
"eval_runtime": 5.1964, |
|
"eval_samples_per_second": 7.313, |
|
"eval_steps_per_second": 1.924, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_loss": 1.114129662513733, |
|
"eval_runtime": 5.1874, |
|
"eval_samples_per_second": 7.325, |
|
"eval_steps_per_second": 1.928, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_loss": 1.1438744068145752, |
|
"eval_runtime": 6.8038, |
|
"eval_samples_per_second": 5.585, |
|
"eval_steps_per_second": 1.47, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_loss": 1.1356358528137207, |
|
"eval_runtime": 5.186, |
|
"eval_samples_per_second": 7.327, |
|
"eval_steps_per_second": 1.928, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 1.1280732154846191, |
|
"eval_runtime": 5.1897, |
|
"eval_samples_per_second": 7.322, |
|
"eval_steps_per_second": 1.927, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_loss": 1.130995750427246, |
|
"eval_runtime": 5.1866, |
|
"eval_samples_per_second": 7.327, |
|
"eval_steps_per_second": 1.928, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_loss": 1.1382439136505127, |
|
"eval_runtime": 5.2111, |
|
"eval_samples_per_second": 7.292, |
|
"eval_steps_per_second": 1.919, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 1.142386555671692, |
|
"eval_runtime": 5.1981, |
|
"eval_samples_per_second": 7.31, |
|
"eval_steps_per_second": 1.924, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_loss": 1.126651406288147, |
|
"eval_runtime": 5.2225, |
|
"eval_samples_per_second": 7.276, |
|
"eval_steps_per_second": 1.915, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_loss": 1.1165131330490112, |
|
"eval_runtime": 5.2221, |
|
"eval_samples_per_second": 7.277, |
|
"eval_steps_per_second": 1.915, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_loss": 1.1212615966796875, |
|
"eval_runtime": 5.2824, |
|
"eval_samples_per_second": 7.194, |
|
"eval_steps_per_second": 1.893, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_loss": 1.134791374206543, |
|
"eval_runtime": 5.2245, |
|
"eval_samples_per_second": 7.273, |
|
"eval_steps_per_second": 1.914, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 1.1270241737365723, |
|
"eval_runtime": 5.3762, |
|
"eval_samples_per_second": 7.068, |
|
"eval_steps_per_second": 1.86, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_loss": 1.130293846130371, |
|
"eval_runtime": 5.2263, |
|
"eval_samples_per_second": 7.271, |
|
"eval_steps_per_second": 1.913, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 8.401162790697675e-06, |
|
"loss": 0.9125, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 1.1280182600021362, |
|
"eval_runtime": 5.2073, |
|
"eval_samples_per_second": 7.297, |
|
"eval_steps_per_second": 1.92, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_loss": 1.1197805404663086, |
|
"eval_runtime": 5.1419, |
|
"eval_samples_per_second": 7.39, |
|
"eval_steps_per_second": 1.945, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 1.1133369207382202, |
|
"eval_runtime": 5.1481, |
|
"eval_samples_per_second": 7.381, |
|
"eval_steps_per_second": 1.942, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_loss": 1.1289596557617188, |
|
"eval_runtime": 5.1768, |
|
"eval_samples_per_second": 7.34, |
|
"eval_steps_per_second": 1.932, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_loss": 1.1368350982666016, |
|
"eval_runtime": 5.1857, |
|
"eval_samples_per_second": 7.328, |
|
"eval_steps_per_second": 1.928, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_loss": 1.1226954460144043, |
|
"eval_runtime": 5.4937, |
|
"eval_samples_per_second": 6.917, |
|
"eval_steps_per_second": 1.82, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 1.1116739511489868, |
|
"eval_runtime": 5.1807, |
|
"eval_samples_per_second": 7.335, |
|
"eval_steps_per_second": 1.93, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 1.1072471141815186, |
|
"eval_runtime": 5.89, |
|
"eval_samples_per_second": 6.452, |
|
"eval_steps_per_second": 1.698, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_loss": 1.1166975498199463, |
|
"eval_runtime": 5.2072, |
|
"eval_samples_per_second": 7.298, |
|
"eval_steps_per_second": 1.92, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 1.119249939918518, |
|
"eval_runtime": 5.7107, |
|
"eval_samples_per_second": 6.654, |
|
"eval_steps_per_second": 1.751, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_loss": 1.1184831857681274, |
|
"eval_runtime": 5.2137, |
|
"eval_samples_per_second": 7.289, |
|
"eval_steps_per_second": 1.918, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"eval_loss": 1.1111912727355957, |
|
"eval_runtime": 5.9367, |
|
"eval_samples_per_second": 6.401, |
|
"eval_steps_per_second": 1.684, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 1.105357050895691, |
|
"eval_runtime": 5.2105, |
|
"eval_samples_per_second": 7.293, |
|
"eval_steps_per_second": 1.919, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_loss": 1.1098227500915527, |
|
"eval_runtime": 5.2189, |
|
"eval_samples_per_second": 7.281, |
|
"eval_steps_per_second": 1.916, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_loss": 1.1147123575210571, |
|
"eval_runtime": 5.2081, |
|
"eval_samples_per_second": 7.296, |
|
"eval_steps_per_second": 1.92, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 1.1088005304336548, |
|
"eval_runtime": 7.6087, |
|
"eval_samples_per_second": 4.994, |
|
"eval_steps_per_second": 1.314, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_loss": 1.1164624691009521, |
|
"eval_runtime": 5.2125, |
|
"eval_samples_per_second": 7.29, |
|
"eval_steps_per_second": 1.918, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 1.121812105178833, |
|
"eval_runtime": 5.2191, |
|
"eval_samples_per_second": 7.281, |
|
"eval_steps_per_second": 1.916, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_loss": 1.1162291765213013, |
|
"eval_runtime": 5.2203, |
|
"eval_samples_per_second": 7.279, |
|
"eval_steps_per_second": 1.916, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_loss": 1.1079984903335571, |
|
"eval_runtime": 5.2071, |
|
"eval_samples_per_second": 7.298, |
|
"eval_steps_per_second": 1.92, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 8.255813953488374e-06, |
|
"loss": 0.8748, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"eval_loss": 1.105452060699463, |
|
"eval_runtime": 5.2094, |
|
"eval_samples_per_second": 7.294, |
|
"eval_steps_per_second": 1.92, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 1.106619119644165, |
|
"eval_runtime": 5.1432, |
|
"eval_samples_per_second": 7.388, |
|
"eval_steps_per_second": 1.944, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_loss": 1.106793761253357, |
|
"eval_runtime": 5.1604, |
|
"eval_samples_per_second": 7.364, |
|
"eval_steps_per_second": 1.938, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_loss": 1.1173338890075684, |
|
"eval_runtime": 5.1605, |
|
"eval_samples_per_second": 7.364, |
|
"eval_steps_per_second": 1.938, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_loss": 1.114292025566101, |
|
"eval_runtime": 5.1855, |
|
"eval_samples_per_second": 7.328, |
|
"eval_steps_per_second": 1.928, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_loss": 1.1030842065811157, |
|
"eval_runtime": 6.5892, |
|
"eval_samples_per_second": 5.767, |
|
"eval_steps_per_second": 1.518, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"eval_loss": 1.1062043905258179, |
|
"eval_runtime": 5.1841, |
|
"eval_samples_per_second": 7.33, |
|
"eval_steps_per_second": 1.929, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_loss": 1.1063551902770996, |
|
"eval_runtime": 5.1918, |
|
"eval_samples_per_second": 7.319, |
|
"eval_steps_per_second": 1.926, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 1.1138606071472168, |
|
"eval_runtime": 5.2119, |
|
"eval_samples_per_second": 7.291, |
|
"eval_steps_per_second": 1.919, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_loss": 1.110581874847412, |
|
"eval_runtime": 5.1939, |
|
"eval_samples_per_second": 7.316, |
|
"eval_steps_per_second": 1.925, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_loss": 1.100100040435791, |
|
"eval_runtime": 5.1966, |
|
"eval_samples_per_second": 7.312, |
|
"eval_steps_per_second": 1.924, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 1.0952939987182617, |
|
"eval_runtime": 5.1974, |
|
"eval_samples_per_second": 7.311, |
|
"eval_steps_per_second": 1.924, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_loss": 1.0998646020889282, |
|
"eval_runtime": 5.19, |
|
"eval_samples_per_second": 7.322, |
|
"eval_steps_per_second": 1.927, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_loss": 1.1082687377929688, |
|
"eval_runtime": 5.4201, |
|
"eval_samples_per_second": 7.011, |
|
"eval_steps_per_second": 1.845, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_loss": 1.1146087646484375, |
|
"eval_runtime": 5.1949, |
|
"eval_samples_per_second": 7.315, |
|
"eval_steps_per_second": 1.925, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 1.1090608835220337, |
|
"eval_runtime": 5.2206, |
|
"eval_samples_per_second": 7.279, |
|
"eval_steps_per_second": 1.915, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_loss": 1.1007745265960693, |
|
"eval_runtime": 5.2062, |
|
"eval_samples_per_second": 7.299, |
|
"eval_steps_per_second": 1.921, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 1.0957188606262207, |
|
"eval_runtime": 5.2196, |
|
"eval_samples_per_second": 7.28, |
|
"eval_steps_per_second": 1.916, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_loss": 1.1012167930603027, |
|
"eval_runtime": 5.2206, |
|
"eval_samples_per_second": 7.279, |
|
"eval_steps_per_second": 1.915, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"eval_loss": 1.09498929977417, |
|
"eval_runtime": 5.2178, |
|
"eval_samples_per_second": 7.283, |
|
"eval_steps_per_second": 1.917, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 8.110465116279071e-06, |
|
"loss": 0.8599, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 1.0983973741531372, |
|
"eval_runtime": 5.203, |
|
"eval_samples_per_second": 7.303, |
|
"eval_steps_per_second": 1.922, |
|
"step": 1300 |
|
} |
|
], |
|
"max_steps": 6880, |
|
"num_train_epochs": 20, |
|
"total_flos": 5658055802880000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|