|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 16.99889502762431, |
|
"global_step": 5763, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.38, |
|
"gpu_memory": 2825061888, |
|
"learning_rate": 8.32e-06, |
|
"loss": 4.6062, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.664e-05, |
|
"loss": 2.7746, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bp": 0.021341648192077716, |
|
"eval_counts": [ |
|
342, |
|
58, |
|
18, |
|
6 |
|
], |
|
"eval_loss": 2.035790205001831, |
|
"eval_precisions": [ |
|
34.862385321100916, |
|
8.516886930983848, |
|
4.651162790697675, |
|
2.3529411764705883 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 35.7733, |
|
"eval_samples_per_second": 8.386, |
|
"eval_score": 0.16113155714674393, |
|
"eval_steps_per_second": 8.386, |
|
"eval_sys_len": 981, |
|
"eval_totals": [ |
|
981, |
|
681, |
|
387, |
|
255 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.4959999999999998e-05, |
|
"loss": 2.2201, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 3.2437898089171974e-05, |
|
"loss": 1.9599, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 3.1775477707006364e-05, |
|
"loss": 1.8228, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bp": 0.1919535866757935, |
|
"eval_counts": [ |
|
640, |
|
199, |
|
91, |
|
36 |
|
], |
|
"eval_loss": 1.740516185760498, |
|
"eval_precisions": [ |
|
35.67447045707915, |
|
13.319946452476573, |
|
7.526881720430108, |
|
3.896103896103896 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 51.5291, |
|
"eval_samples_per_second": 5.822, |
|
"eval_score": 2.0855597670386987, |
|
"eval_steps_per_second": 5.822, |
|
"eval_sys_len": 1794, |
|
"eval_totals": [ |
|
1794, |
|
1494, |
|
1209, |
|
924 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 3.111305732484076e-05, |
|
"loss": 1.7275, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 3.0450636942675155e-05, |
|
"loss": 1.614, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bp": 0.157930307305936, |
|
"eval_counts": [ |
|
662, |
|
239, |
|
127, |
|
66 |
|
], |
|
"eval_loss": 1.6653738021850586, |
|
"eval_precisions": [ |
|
39.61699581089168, |
|
17.432530999270604, |
|
11.598173515981735, |
|
8.02919708029197 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 46.1755, |
|
"eval_samples_per_second": 6.497, |
|
"eval_score": 2.515019790343611, |
|
"eval_steps_per_second": 6.497, |
|
"eval_sys_len": 1671, |
|
"eval_totals": [ |
|
1671, |
|
1371, |
|
1095, |
|
822 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.9788216560509553e-05, |
|
"loss": 1.561, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.9125796178343946e-05, |
|
"loss": 1.4029, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.8463375796178344e-05, |
|
"loss": 1.4541, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bp": 0.06814983706797134, |
|
"eval_counts": [ |
|
481, |
|
162, |
|
79, |
|
37 |
|
], |
|
"eval_loss": 1.6631227731704712, |
|
"eval_precisions": [ |
|
37.286821705426355, |
|
16.363636363636363, |
|
10.881542699724518, |
|
6.630824372759856 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 45.6186, |
|
"eval_samples_per_second": 6.576, |
|
"eval_score": 0.9871612910485801, |
|
"eval_steps_per_second": 6.576, |
|
"eval_sys_len": 1290, |
|
"eval_totals": [ |
|
1290, |
|
990, |
|
726, |
|
558 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.7800955414012737e-05, |
|
"loss": 1.4088, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.713853503184713e-05, |
|
"loss": 1.3351, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.647611464968153e-05, |
|
"loss": 1.3229, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bp": 0.23640264658354365, |
|
"eval_counts": [ |
|
633, |
|
216, |
|
105, |
|
58 |
|
], |
|
"eval_loss": 1.6731408834457397, |
|
"eval_precisions": [ |
|
32.5115562403698, |
|
13.114754098360656, |
|
7.658643326039387, |
|
5.239385727190605 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 51.3595, |
|
"eval_samples_per_second": 5.841, |
|
"eval_score": 2.703708498377427, |
|
"eval_steps_per_second": 5.841, |
|
"eval_sys_len": 1947, |
|
"eval_totals": [ |
|
1947, |
|
1647, |
|
1371, |
|
1107 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.5813694267515922e-05, |
|
"loss": 1.2429, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.515127388535032e-05, |
|
"loss": 1.2329, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bp": 0.07532276614122083, |
|
"eval_counts": [ |
|
579, |
|
202, |
|
98, |
|
55 |
|
], |
|
"eval_loss": 1.6539884805679321, |
|
"eval_precisions": [ |
|
43.665158371040725, |
|
19.68810916179337, |
|
13.01460823373174, |
|
9.499136442141623 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 46.8126, |
|
"eval_samples_per_second": 6.409, |
|
"eval_score": 1.3600028829560191, |
|
"eval_steps_per_second": 6.409, |
|
"eval_sys_len": 1326, |
|
"eval_totals": [ |
|
1326, |
|
1026, |
|
753, |
|
579 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 2034 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.4488853503184713e-05, |
|
"loss": 1.2504, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.3826433121019104e-05, |
|
"loss": 1.1421, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.31640127388535e-05, |
|
"loss": 1.1795, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bp": 0.17181721996808308, |
|
"eval_counts": [ |
|
768, |
|
262, |
|
133, |
|
70 |
|
], |
|
"eval_loss": 1.667359471321106, |
|
"eval_precisions": [ |
|
44.599303135888505, |
|
18.424753867791843, |
|
11.697449428320141, |
|
8.018327605956472 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 50.5053, |
|
"eval_samples_per_second": 5.94, |
|
"eval_score": 2.862812289607837, |
|
"eval_steps_per_second": 5.94, |
|
"eval_sys_len": 1722, |
|
"eval_totals": [ |
|
1722, |
|
1422, |
|
1137, |
|
873 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 2373 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.2501592356687895e-05, |
|
"loss": 1.0902, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.183917197452229e-05, |
|
"loss": 1.0705, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.1176751592356686e-05, |
|
"loss": 1.1128, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bp": 0.2669632643662467, |
|
"eval_counts": [ |
|
866, |
|
300, |
|
163, |
|
96 |
|
], |
|
"eval_loss": 1.708727240562439, |
|
"eval_precisions": [ |
|
42.26451927769644, |
|
17.152658662092623, |
|
11.20274914089347, |
|
8.226221079691516 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 53.4181, |
|
"eval_samples_per_second": 5.616, |
|
"eval_score": 4.291998839505449, |
|
"eval_steps_per_second": 5.616, |
|
"eval_sys_len": 2049, |
|
"eval_totals": [ |
|
2049, |
|
1749, |
|
1455, |
|
1167 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 2712 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 2.051433121019108e-05, |
|
"loss": 1.0162, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.9851910828025477e-05, |
|
"loss": 1.0183, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bp": 0.09731210069014802, |
|
"eval_counts": [ |
|
678, |
|
233, |
|
102, |
|
45 |
|
], |
|
"eval_loss": 1.7135441303253174, |
|
"eval_precisions": [ |
|
47.47899159663866, |
|
20.656028368794328, |
|
12.23021582733813, |
|
7.142857142857143 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 50.1778, |
|
"eval_samples_per_second": 5.979, |
|
"eval_score": 1.664870454299152, |
|
"eval_steps_per_second": 5.979, |
|
"eval_sys_len": 1428, |
|
"eval_totals": [ |
|
1428, |
|
1128, |
|
834, |
|
630 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 3051 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.918949044585987e-05, |
|
"loss": 1.0367, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.8527070063694264e-05, |
|
"loss": 0.9645, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.786464968152866e-05, |
|
"loss": 0.9616, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bp": 0.22930577411313655, |
|
"eval_counts": [ |
|
768, |
|
280, |
|
145, |
|
80 |
|
], |
|
"eval_loss": 1.736754298210144, |
|
"eval_precisions": [ |
|
39.93759750390016, |
|
17.25200246457178, |
|
10.837070254110612, |
|
7.428040854224698 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 57.956, |
|
"eval_samples_per_second": 5.176, |
|
"eval_score": 3.518980787396955, |
|
"eval_steps_per_second": 5.176, |
|
"eval_sys_len": 1923, |
|
"eval_totals": [ |
|
1923, |
|
1623, |
|
1338, |
|
1077 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.7202229299363055e-05, |
|
"loss": 0.9403, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.6539808917197452e-05, |
|
"loss": 0.9059, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.5877388535031846e-05, |
|
"loss": 0.9249, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bp": 0.1751321349922995, |
|
"eval_counts": [ |
|
748, |
|
240, |
|
115, |
|
63 |
|
], |
|
"eval_loss": 1.782728672027588, |
|
"eval_precisions": [ |
|
43.13725490196079, |
|
16.736401673640167, |
|
10.008703220191471, |
|
7.11864406779661 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 54.5903, |
|
"eval_samples_per_second": 5.495, |
|
"eval_score": 2.6374744638290037, |
|
"eval_steps_per_second": 5.495, |
|
"eval_sys_len": 1734, |
|
"eval_totals": [ |
|
1734, |
|
1434, |
|
1149, |
|
885 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 3729 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.5214968152866242e-05, |
|
"loss": 0.8587, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.4552547770700635e-05, |
|
"loss": 0.8739, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bp": 0.1555153512571023, |
|
"eval_counts": [ |
|
739, |
|
267, |
|
125, |
|
60 |
|
], |
|
"eval_loss": 1.8148356676101685, |
|
"eval_precisions": [ |
|
44.46450060168472, |
|
19.60352422907489, |
|
11.671335200746965, |
|
7.462686567164179 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 53.3032, |
|
"eval_samples_per_second": 5.628, |
|
"eval_score": 2.581452241674501, |
|
"eval_steps_per_second": 5.628, |
|
"eval_sys_len": 1662, |
|
"eval_totals": [ |
|
1662, |
|
1362, |
|
1071, |
|
804 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 4068 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.3890127388535031e-05, |
|
"loss": 0.8413, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.3227707006369426e-05, |
|
"loss": 0.8195, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 12.84, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.2565286624203822e-05, |
|
"loss": 0.823, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bp": 0.2187397058134024, |
|
"eval_counts": [ |
|
843, |
|
326, |
|
173, |
|
91 |
|
], |
|
"eval_loss": 1.8146471977233887, |
|
"eval_precisions": [ |
|
44.67408585055644, |
|
20.5419029615627, |
|
13.442113442113442, |
|
9.027777777777779 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 55.2439, |
|
"eval_samples_per_second": 5.43, |
|
"eval_score": 3.995892671984357, |
|
"eval_steps_per_second": 5.43, |
|
"eval_sys_len": 1887, |
|
"eval_totals": [ |
|
1887, |
|
1587, |
|
1287, |
|
1008 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 4407 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.1902866242038214e-05, |
|
"loss": 0.7992, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.124044585987261e-05, |
|
"loss": 0.7702, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 1.0578025477707005e-05, |
|
"loss": 0.7824, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bp": 0.16524048903893263, |
|
"eval_counts": [ |
|
719, |
|
244, |
|
108, |
|
52 |
|
], |
|
"eval_loss": 1.8748054504394531, |
|
"eval_precisions": [ |
|
42.34393404004712, |
|
17.453505007153076, |
|
9.72972972972973, |
|
6.081871345029239 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 54.7238, |
|
"eval_samples_per_second": 5.482, |
|
"eval_score": 2.389568242739576, |
|
"eval_steps_per_second": 5.482, |
|
"eval_sys_len": 1698, |
|
"eval_totals": [ |
|
1698, |
|
1398, |
|
1110, |
|
855 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 4746 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 9.9156050955414e-06, |
|
"loss": 0.7425, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 9.253184713375794e-06, |
|
"loss": 0.7501, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bp": 0.1953640836862138, |
|
"eval_counts": [ |
|
762, |
|
263, |
|
131, |
|
74 |
|
], |
|
"eval_loss": 1.9026106595993042, |
|
"eval_precisions": [ |
|
42.19269102990033, |
|
17.46347941567065, |
|
10.835401157981803, |
|
7.781282860147213 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 56.8759, |
|
"eval_samples_per_second": 5.275, |
|
"eval_score": 3.0843295492719487, |
|
"eval_steps_per_second": 5.275, |
|
"eval_sys_len": 1806, |
|
"eval_totals": [ |
|
1806, |
|
1506, |
|
1209, |
|
951 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 5085 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 8.59076433121019e-06, |
|
"loss": 0.7315, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 7.928343949044585e-06, |
|
"loss": 0.7011, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 7.265923566878981e-06, |
|
"loss": 0.7139, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bp": 0.23551335586741148, |
|
"eval_counts": [ |
|
816, |
|
277, |
|
129, |
|
72 |
|
], |
|
"eval_loss": 1.9286922216415405, |
|
"eval_precisions": [ |
|
41.97530864197531, |
|
16.849148418491485, |
|
9.57683741648107, |
|
6.70391061452514 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 58.3566, |
|
"eval_samples_per_second": 5.141, |
|
"eval_score": 3.4379225352028846, |
|
"eval_steps_per_second": 5.141, |
|
"eval_sys_len": 1944, |
|
"eval_totals": [ |
|
1944, |
|
1644, |
|
1347, |
|
1074 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 5424 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 6.6035031847133755e-06, |
|
"loss": 0.689, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 5.94108280254777e-06, |
|
"loss": 0.6788, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"gpu_memory": 2903643648, |
|
"learning_rate": 5.278662420382165e-06, |
|
"loss": 0.7053, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bp": 0.2934278208519596, |
|
"eval_counts": [ |
|
886, |
|
340, |
|
171, |
|
99 |
|
], |
|
"eval_loss": 1.9354726076126099, |
|
"eval_precisions": [ |
|
41.47940074906367, |
|
18.51851851851852, |
|
11.089494163424124, |
|
7.746478873239437 |
|
], |
|
"eval_ref_len": 4755, |
|
"eval_runtime": 60.6492, |
|
"eval_samples_per_second": 4.946, |
|
"eval_score": 4.702891790634525, |
|
"eval_steps_per_second": 4.946, |
|
"eval_sys_len": 2136, |
|
"eval_totals": [ |
|
2136, |
|
1836, |
|
1542, |
|
1278 |
|
], |
|
"gpu_memory": 2903643648, |
|
"step": 5763 |
|
} |
|
], |
|
"max_steps": 6780, |
|
"num_train_epochs": 20, |
|
"total_flos": 1765580040806400.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|