|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 13746, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8181289102284306e-05, |
|
"loss": 2.1559, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.63625782045686e-05, |
|
"loss": 1.9084, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4543867306852907e-05, |
|
"loss": 1.8072, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.272515640913721e-05, |
|
"loss": 1.7445, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.090644551142151e-05, |
|
"loss": 1.6868, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9087734613705804e-05, |
|
"loss": 1.659, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.726902371599011e-05, |
|
"loss": 1.6168, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.545031281827441e-05, |
|
"loss": 1.5922, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.363160192055871e-05, |
|
"loss": 1.5692, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.181289102284301e-05, |
|
"loss": 1.5414, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.9994180125127312e-05, |
|
"loss": 1.5159, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.817546922741161e-05, |
|
"loss": 1.5054, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6356758329695913e-05, |
|
"loss": 1.4857, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4538047431980213e-05, |
|
"loss": 1.4778, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2719336534264517e-05, |
|
"loss": 1.4614, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0900625636548813e-05, |
|
"loss": 1.4475, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9081914738833114e-05, |
|
"loss": 1.4312, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7263203841117417e-05, |
|
"loss": 1.4272, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5444492943401718e-05, |
|
"loss": 1.4061, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.362578204568602e-05, |
|
"loss": 1.3994, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.180707114797032e-05, |
|
"loss": 1.3973, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.98836025025462e-06, |
|
"loss": 1.3753, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.16964935253892e-06, |
|
"loss": 1.3756, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.3509384548232216e-06, |
|
"loss": 1.3687, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.532227557107523e-06, |
|
"loss": 1.361, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.713516659391823e-06, |
|
"loss": 1.3533, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.948057616761241e-07, |
|
"loss": 1.3555, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 13746, |
|
"total_flos": 8.69185921169664e+16, |
|
"train_loss": 1.5310567042443597, |
|
"train_runtime": 46665.3959, |
|
"train_samples_per_second": 7.069, |
|
"train_steps_per_second": 0.295 |
|
} |
|
], |
|
"max_steps": 13746, |
|
"num_train_epochs": 3, |
|
"total_flos": 8.69185921169664e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|