|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.5197355996222852, |
|
"global_step": 125, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004, |
|
"loss": 2.6298, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0008, |
|
"loss": 2.6348, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009999024041442454, |
|
"loss": 2.6855, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009991218658821608, |
|
"loss": 2.6779, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009975620080758321, |
|
"loss": 2.7265, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000995225266258058, |
|
"loss": 2.7332, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009921152889737985, |
|
"loss": 2.7308, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009882369320834067, |
|
"loss": 2.7706, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009835962511807786, |
|
"loss": 2.7455, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009782004921382612, |
|
"loss": 2.7819, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0009720580797930845, |
|
"loss": 2.7122, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009651786047929772, |
|
"loss": 2.7141, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009575728086215092, |
|
"loss": 2.694, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00094925256682654, |
|
"loss": 2.6981, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0009402308704779598, |
|
"loss": 2.6848, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0009305218058836777, |
|
"loss": 2.6498, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0009201405325955221, |
|
"loss": 2.7059, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0009091032597394012, |
|
"loss": 2.7016, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0008974272207066767, |
|
"loss": 2.6599, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0008851306462462688, |
|
"loss": 2.6922, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0008722327359995064, |
|
"loss": 2.6473, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0008587536285221655, |
|
"loss": 2.7236, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.000844714369840506, |
|
"loss": 2.6924, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0008301368805903988, |
|
"loss": 2.6428, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0008150439217908557, |
|
"loss": 2.6161, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0007994590593054001, |
|
"loss": 2.6721, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0007834066270467691, |
|
"loss": 2.6506, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0007669116889823954, |
|
"loss": 2.6787, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00075, |
|
"loss": 2.6172, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0007326979656943906, |
|
"loss": 2.6071, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0007150326011382603, |
|
"loss": 2.6513, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0006970314887013584, |
|
"loss": 2.679, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0006787227349838946, |
|
"loss": 2.6879, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0006601349269314187, |
|
"loss": 2.6459, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0006412970871996995, |
|
"loss": 2.6268, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0006222386288392914, |
|
"loss": 2.615, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0006029893093705492, |
|
"loss": 2.6541, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0005835791843207916, |
|
"loss": 2.6697, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0005640385602961634, |
|
"loss": 2.6017, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0005443979476614675, |
|
"loss": 2.658, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0005246880129018515, |
|
"loss": 2.5922, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0005049395307407329, |
|
"loss": 3.5292, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0004851833360887201, |
|
"loss": 2.5377, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0004654502758985611, |
|
"loss": 2.6096, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00044577116100128736, |
|
"loss": 2.5112, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00042617671799875947, |
|
"loss": 2.5469, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0004066975412877255, |
|
"loss": 2.5364, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00038736404529030255, |
|
"loss": 2.5132, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0003682064169654663, |
|
"loss": 2.5339, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0003492545686756986, |
|
"loss": 2.5136, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00033053809148238423, |
|
"loss": 2.5043, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00031208620894288106, |
|
"loss": 2.5174, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0002939277314814041, |
|
"loss": 2.479, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002760910114049686, |
|
"loss": 2.5554, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00025860389863462764, |
|
"loss": 2.5007, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00024149369722112717, |
|
"loss": 2.493, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00022478712271287087, |
|
"loss": 2.4715, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00020851026044276406, |
|
"loss": 2.5359, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00019268852479906146, |
|
"loss": 2.5447, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00017734661954381753, |
|
"loss": 2.5141, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00016250849924089484, |
|
"loss": 2.558, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00014819733185375534, |
|
"loss": 2.5372, |
|
"step": 124 |
|
} |
|
], |
|
"max_steps": 164, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.1869674526642012e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|