|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.7267080745341614, |
|
"eval_steps": 500, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.998766400914329e-05, |
|
"loss": 0.6951, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.995066821070679e-05, |
|
"loss": 0.5589, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9889049115077005e-05, |
|
"loss": 0.5485, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.980286753286195e-05, |
|
"loss": 0.5392, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9692208514878444e-05, |
|
"loss": 0.5347, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9557181268217227e-05, |
|
"loss": 0.5261, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.939791904846869e-05, |
|
"loss": 0.5298, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.9214579028215776e-05, |
|
"loss": 0.536, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.900734214192358e-05, |
|
"loss": 0.5012, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.877641290737884e-05, |
|
"loss": 0.5342, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.852201922385564e-05, |
|
"loss": 0.5324, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.8244412147206284e-05, |
|
"loss": 0.4863, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.794386564209953e-05, |
|
"loss": 0.4774, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.762067631165049e-05, |
|
"loss": 0.4957, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.72751631047092e-05, |
|
"loss": 0.4531, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.72751631047092e-05, |
|
"loss": 0.5678, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.72751631047092e-05, |
|
"loss": 0.5456, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.690766700109659e-05, |
|
"loss": 0.541, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.65185506750986e-05, |
|
"loss": 0.5035, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.610819813755038e-05, |
|
"loss": 0.4901, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.567701435686404e-05, |
|
"loss": 0.3193, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 0.3019, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.4753875309392266e-05, |
|
"loss": 0.2869, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.426283106939474e-05, |
|
"loss": 0.2998, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.375277674076149e-05, |
|
"loss": 0.2784, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.3224215685535294e-05, |
|
"loss": 0.3551, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.267766952966369e-05, |
|
"loss": 0.3224, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.211367764821722e-05, |
|
"loss": 0.2901, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.1532796633091296e-05, |
|
"loss": 0.2776, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.093559974371725e-05, |
|
"loss": 0.2993, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.0322676341324415e-05, |
|
"loss": 0.2978, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.969463130731183e-05, |
|
"loss": 0.2895, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.905208444630327e-05, |
|
"loss": 0.3016, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.8395669874474915e-05, |
|
"loss": 0.2613, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.7726035393759285e-05, |
|
"loss": 0.2993, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.704384185254288e-05, |
|
"loss": 0.2853, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.634976249348867e-05, |
|
"loss": 0.2578, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.564448228912682e-05, |
|
"loss": 0.2845, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.4928697265869515e-05, |
|
"loss": 0.2931, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.4203113817116957e-05, |
|
"loss": 0.3093, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.346844800613229e-05, |
|
"loss": 0.1741, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 0.1583, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.1974777650980735e-05, |
|
"loss": 0.135, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.121724717912138e-05, |
|
"loss": 0.1423, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.045358103491357e-05, |
|
"loss": 0.1372, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.9684532864643122e-05, |
|
"loss": 0.1249, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.8910861626005776e-05, |
|
"loss": 0.1222, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.8133330839107608e-05, |
|
"loss": 0.1607, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.7352707832962865e-05, |
|
"loss": 0.1427, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.656976298823284e-05, |
|
"loss": 0.12, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.578526897695321e-05, |
|
"loss": 0.1309, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1332, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.4214731023046793e-05, |
|
"loss": 0.1256, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.3430237011767167e-05, |
|
"loss": 0.1258, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.2647292167037144e-05, |
|
"loss": 0.1498, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.186666916089239e-05, |
|
"loss": 0.1417, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.1089138373994223e-05, |
|
"loss": 0.1287, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.031546713535688e-05, |
|
"loss": 0.1152, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.9546418965086442e-05, |
|
"loss": 0.1352, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.8782752820878634e-05, |
|
"loss": 0.1149, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.802522234901927e-05, |
|
"loss": 0.0871, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.7274575140626318e-05, |
|
"loss": 0.0679, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.6531551993867717e-05, |
|
"loss": 0.0616, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.5796886182883053e-05, |
|
"loss": 0.0677, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.5071302734130489e-05, |
|
"loss": 0.0636, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.4355517710873184e-05, |
|
"loss": 0.0612, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.3650237506511331e-05, |
|
"loss": 0.0574, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.2956158147457115e-05, |
|
"loss": 0.0467, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.2273964606240718e-05, |
|
"loss": 0.0611, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.1604330125525079e-05, |
|
"loss": 0.0655, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.0947915553696742e-05, |
|
"loss": 0.069, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.0305368692688174e-05, |
|
"loss": 0.0573, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 9.677323658675594e-06, |
|
"loss": 0.047, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 9.064400256282757e-06, |
|
"loss": 0.0443, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 8.467203366908707e-06, |
|
"loss": 0.0595, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 25, |
|
"total_flos": 2.021958544038822e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|