|
{ |
|
"best_metric": 0.7640588283538818, |
|
"best_model_checkpoint": "./model_20230703_attempt1/checkpoint-400", |
|
"epoch": 4.481792717086835, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 2.5488, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 2.3131, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 2.1798, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.6567, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.000147, |
|
"loss": 1.2086, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00017699999999999997, |
|
"loss": 0.8952, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00020699999999999996, |
|
"loss": 1.0646, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.000237, |
|
"loss": 0.7725, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.000267, |
|
"loss": 0.8192, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 0.7403, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00029764397905759156, |
|
"loss": 0.6473, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0002950261780104712, |
|
"loss": 0.7524, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00029240837696335075, |
|
"loss": 0.6775, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00028979057591623034, |
|
"loss": 0.5884, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00028717277486910994, |
|
"loss": 0.7117, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00028455497382198953, |
|
"loss": 0.5925, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00028193717277486907, |
|
"loss": 0.6675, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00027931937172774867, |
|
"loss": 0.6126, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00027670157068062826, |
|
"loss": 0.557, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00027408376963350786, |
|
"loss": 0.5362, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 0.8287817239761353, |
|
"eval_runtime": 34.4542, |
|
"eval_samples_per_second": 11.61, |
|
"eval_steps_per_second": 1.451, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0002714659685863874, |
|
"loss": 0.4951, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.000268848167539267, |
|
"loss": 0.4904, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.0002662303664921466, |
|
"loss": 0.4544, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.0002636125654450262, |
|
"loss": 0.5705, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0002609947643979057, |
|
"loss": 0.5654, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0002583769633507853, |
|
"loss": 0.445, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0002557591623036649, |
|
"loss": 0.3859, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.0002531413612565445, |
|
"loss": 0.5015, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00025052356020942404, |
|
"loss": 0.4177, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.00024790575916230364, |
|
"loss": 0.4322, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00024528795811518323, |
|
"loss": 0.3028, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.00024267015706806283, |
|
"loss": 0.3839, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.0002400523560209424, |
|
"loss": 0.3121, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.00023743455497382196, |
|
"loss": 0.5043, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.00023481675392670153, |
|
"loss": 0.3569, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.00023219895287958115, |
|
"loss": 0.2791, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00022958115183246072, |
|
"loss": 0.2648, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00022696335078534028, |
|
"loss": 0.2998, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.00022434554973821985, |
|
"loss": 0.3416, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.00022172774869109947, |
|
"loss": 0.3044, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_loss": 0.7640588283538818, |
|
"eval_runtime": 34.4837, |
|
"eval_samples_per_second": 11.6, |
|
"eval_steps_per_second": 1.45, |
|
"step": 400 |
|
} |
|
], |
|
"max_steps": 1246, |
|
"num_train_epochs": 14, |
|
"total_flos": 1.32826870923264e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|