|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.5045728266982867, |
|
"global_step": 420000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8208841872978234e-05, |
|
"loss": 0.0755, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6417683745956466e-05, |
|
"loss": 0.0721, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 0.06494459509849548, |
|
"eval_runtime": 754.7473, |
|
"eval_samples_per_second": 82.19, |
|
"eval_steps_per_second": 20.549, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.462652561893469e-05, |
|
"loss": 0.0704, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.2835367491912923e-05, |
|
"loss": 0.0701, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.06424280256032944, |
|
"eval_runtime": 676.9024, |
|
"eval_samples_per_second": 91.642, |
|
"eval_steps_per_second": 22.912, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.104420936489115e-05, |
|
"loss": 0.0713, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.925305123786938e-05, |
|
"loss": 0.0712, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 0.0624563954770565, |
|
"eval_runtime": 678.8069, |
|
"eval_samples_per_second": 91.385, |
|
"eval_steps_per_second": 22.847, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.746189311084761e-05, |
|
"loss": 0.0698, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.5670734983825844e-05, |
|
"loss": 0.0667, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.06348983943462372, |
|
"eval_runtime": 675.6836, |
|
"eval_samples_per_second": 91.808, |
|
"eval_steps_per_second": 22.953, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.3879576856804076e-05, |
|
"loss": 0.0657, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.20884187297823e-05, |
|
"loss": 0.0665, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 0.06186612322926521, |
|
"eval_runtime": 676.1915, |
|
"eval_samples_per_second": 91.739, |
|
"eval_steps_per_second": 22.936, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0297260602760537e-05, |
|
"loss": 0.0657, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8506102475738766e-05, |
|
"loss": 0.0653, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 0.061661478132009506, |
|
"eval_runtime": 677.4648, |
|
"eval_samples_per_second": 91.566, |
|
"eval_steps_per_second": 22.893, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6714944348716997e-05, |
|
"loss": 0.0658, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4923786221695226e-05, |
|
"loss": 0.0656, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.06098250672221184, |
|
"eval_runtime": 677.3485, |
|
"eval_samples_per_second": 91.582, |
|
"eval_steps_per_second": 22.897, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3132628094673455e-05, |
|
"loss": 0.0649, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1341469967651687e-05, |
|
"loss": 0.0648, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 0.06077565252780914, |
|
"eval_runtime": 677.1708, |
|
"eval_samples_per_second": 91.606, |
|
"eval_steps_per_second": 22.903, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9550311840629915e-05, |
|
"loss": 0.0642, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7759153713608147e-05, |
|
"loss": 0.0649, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 0.05941811203956604, |
|
"eval_runtime": 677.9109, |
|
"eval_samples_per_second": 91.506, |
|
"eval_steps_per_second": 22.878, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5967995586586376e-05, |
|
"loss": 0.0652, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4176837459564608e-05, |
|
"loss": 0.0638, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.06042506918311119, |
|
"eval_runtime": 678.1922, |
|
"eval_samples_per_second": 91.468, |
|
"eval_steps_per_second": 22.868, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2385679332542836e-05, |
|
"loss": 0.0621, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0594521205521066e-05, |
|
"loss": 0.0643, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.059569716453552246, |
|
"eval_runtime": 688.4187, |
|
"eval_samples_per_second": 90.109, |
|
"eval_steps_per_second": 22.528, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.803363078499297e-06, |
|
"loss": 0.0623, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.012204951477527e-06, |
|
"loss": 0.0646, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 0.059120796620845795, |
|
"eval_runtime": 715.8659, |
|
"eval_samples_per_second": 86.654, |
|
"eval_steps_per_second": 21.665, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.5522104682445576e-05, |
|
"loss": 0.0634, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.53429888697434e-05, |
|
"loss": 0.0648, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 0.06051425263285637, |
|
"eval_runtime": 694.96, |
|
"eval_samples_per_second": 89.261, |
|
"eval_steps_per_second": 22.316, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.516387305704122e-05, |
|
"loss": 0.0644, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.4984757244339046e-05, |
|
"loss": 0.0655, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.06069951504468918, |
|
"eval_runtime": 679.0968, |
|
"eval_samples_per_second": 91.346, |
|
"eval_steps_per_second": 22.838, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.4805641431636866e-05, |
|
"loss": 0.0652, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.462652561893469e-05, |
|
"loss": 0.0631, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.06102127209305763, |
|
"eval_runtime": 705.9715, |
|
"eval_samples_per_second": 87.869, |
|
"eval_steps_per_second": 21.968, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.444740980623252e-05, |
|
"loss": 0.064, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.426829399353034e-05, |
|
"loss": 0.0643, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.05971848592162132, |
|
"eval_runtime": 698.4495, |
|
"eval_samples_per_second": 88.815, |
|
"eval_steps_per_second": 22.205, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.408917818082816e-05, |
|
"loss": 0.0638, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.391006236812599e-05, |
|
"loss": 0.064, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 0.05945688858628273, |
|
"eval_runtime": 700.9514, |
|
"eval_samples_per_second": 88.498, |
|
"eval_steps_per_second": 22.126, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.373094655542381e-05, |
|
"loss": 0.0613, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.355183074272163e-05, |
|
"loss": 0.0635, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 0.06002034246921539, |
|
"eval_runtime": 697.4531, |
|
"eval_samples_per_second": 88.942, |
|
"eval_steps_per_second": 22.237, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.337271493001945e-05, |
|
"loss": 0.0635, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.319359911731728e-05, |
|
"loss": 0.0627, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 0.0593414306640625, |
|
"eval_runtime": 720.1404, |
|
"eval_samples_per_second": 86.14, |
|
"eval_steps_per_second": 21.536, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.30144833046151e-05, |
|
"loss": 0.0629, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.2835367491912923e-05, |
|
"loss": 0.0642, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 0.059706129133701324, |
|
"eval_runtime": 706.6838, |
|
"eval_samples_per_second": 87.78, |
|
"eval_steps_per_second": 21.946, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.265625167921074e-05, |
|
"loss": 0.062, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.247713586650857e-05, |
|
"loss": 0.0616, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 0.058674536645412445, |
|
"eval_runtime": 740.0142, |
|
"eval_samples_per_second": 83.827, |
|
"eval_steps_per_second": 20.958, |
|
"step": 420000 |
|
} |
|
], |
|
"max_steps": 2791490, |
|
"num_train_epochs": 10, |
|
"total_flos": 2.8075475590415155e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|