dalio-1.3b-test / trainer_state.json
AlekseyKorshuk's picture
End of training
3951ee6
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"global_step": 26,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"learning_rate": 0,
"loss": 2.6133,
"step": 1
},
{
"epoch": 0.08,
"eval_accuracy": 0.06519634211941905,
"eval_loss": 2.625,
"eval_runtime": 2.6754,
"eval_samples_per_second": 29.154,
"eval_steps_per_second": 1.121,
"step": 1
},
{
"epoch": 0.15,
"learning_rate": 0,
"loss": 2.6199,
"step": 2
},
{
"epoch": 0.15,
"eval_accuracy": 0.06519634211941905,
"eval_loss": 2.625,
"eval_runtime": 2.9539,
"eval_samples_per_second": 26.406,
"eval_steps_per_second": 1.016,
"step": 2
},
{
"epoch": 0.23,
"learning_rate": 0.0,
"loss": 2.7202,
"step": 3
},
{
"epoch": 0.23,
"eval_accuracy": 0.06575219652142729,
"eval_loss": 2.611328125,
"eval_runtime": 2.94,
"eval_samples_per_second": 26.53,
"eval_steps_per_second": 1.02,
"step": 3
},
{
"epoch": 0.31,
"learning_rate": 5e-05,
"loss": 2.6177,
"step": 4
},
{
"epoch": 0.31,
"eval_accuracy": 0.06575219652142729,
"eval_loss": 2.611328125,
"eval_runtime": 2.9241,
"eval_samples_per_second": 26.675,
"eval_steps_per_second": 1.026,
"step": 4
},
{
"epoch": 0.38,
"learning_rate": 5e-05,
"loss": 2.5422,
"step": 5
},
{
"epoch": 0.38,
"eval_accuracy": 0.06611081226465843,
"eval_loss": 2.5703125,
"eval_runtime": 2.6904,
"eval_samples_per_second": 28.992,
"eval_steps_per_second": 1.115,
"step": 5
},
{
"epoch": 0.46,
"learning_rate": 5e-05,
"loss": 2.5627,
"step": 6
},
{
"epoch": 0.46,
"eval_accuracy": 0.06623632777478931,
"eval_loss": 2.556640625,
"eval_runtime": 2.9307,
"eval_samples_per_second": 26.615,
"eval_steps_per_second": 1.024,
"step": 6
},
{
"epoch": 0.54,
"learning_rate": 5e-05,
"loss": 2.5784,
"step": 7
},
{
"epoch": 0.54,
"eval_accuracy": 0.06639770485924332,
"eval_loss": 2.546875,
"eval_runtime": 2.9555,
"eval_samples_per_second": 26.391,
"eval_steps_per_second": 1.015,
"step": 7
},
{
"epoch": 0.62,
"learning_rate": 5e-05,
"loss": 2.5264,
"step": 8
},
{
"epoch": 0.62,
"eval_accuracy": 0.06630805092343554,
"eval_loss": 2.537109375,
"eval_runtime": 2.6791,
"eval_samples_per_second": 29.114,
"eval_steps_per_second": 1.12,
"step": 8
},
{
"epoch": 0.69,
"learning_rate": 5e-05,
"loss": 2.3396,
"step": 9
},
{
"epoch": 0.69,
"eval_accuracy": 0.06695355926125157,
"eval_loss": 2.533203125,
"eval_runtime": 1.929,
"eval_samples_per_second": 40.435,
"eval_steps_per_second": 1.555,
"step": 9
},
{
"epoch": 0.77,
"learning_rate": 5e-05,
"loss": 2.4297,
"step": 10
},
{
"epoch": 0.77,
"eval_accuracy": 0.06734803657880581,
"eval_loss": 2.52734375,
"eval_runtime": 2.4146,
"eval_samples_per_second": 32.304,
"eval_steps_per_second": 1.242,
"step": 10
},
{
"epoch": 0.85,
"learning_rate": 5e-05,
"loss": 2.3914,
"step": 11
},
{
"epoch": 0.85,
"eval_accuracy": 0.06722252106867492,
"eval_loss": 2.5234375,
"eval_runtime": 2.4302,
"eval_samples_per_second": 32.096,
"eval_steps_per_second": 1.234,
"step": 11
},
{
"epoch": 0.92,
"learning_rate": 5e-05,
"loss": 2.429,
"step": 12
},
{
"epoch": 0.92,
"eval_accuracy": 0.06709700555854402,
"eval_loss": 2.51953125,
"eval_runtime": 2.9318,
"eval_samples_per_second": 26.605,
"eval_steps_per_second": 1.023,
"step": 12
},
{
"epoch": 1.0,
"learning_rate": 5e-05,
"loss": 2.3055,
"step": 13
},
{
"epoch": 1.0,
"eval_accuracy": 0.06720459028151336,
"eval_loss": 2.51171875,
"eval_runtime": 2.9231,
"eval_samples_per_second": 26.684,
"eval_steps_per_second": 1.026,
"step": 13
},
{
"epoch": 1.08,
"learning_rate": 5e-05,
"loss": 1.7162,
"step": 14
},
{
"epoch": 1.08,
"eval_accuracy": 0.0671866594943518,
"eval_loss": 2.521484375,
"eval_runtime": 2.9212,
"eval_samples_per_second": 26.702,
"eval_steps_per_second": 1.027,
"step": 14
},
{
"epoch": 1.15,
"learning_rate": 5e-05,
"loss": 1.7264,
"step": 15
},
{
"epoch": 1.15,
"eval_accuracy": 0.06774251389636005,
"eval_loss": 2.546875,
"eval_runtime": 2.6863,
"eval_samples_per_second": 29.036,
"eval_steps_per_second": 1.117,
"step": 15
},
{
"epoch": 1.23,
"learning_rate": 5e-05,
"loss": 1.7559,
"step": 16
},
{
"epoch": 1.23,
"eval_accuracy": 0.06707907477138246,
"eval_loss": 2.587890625,
"eval_runtime": 2.9302,
"eval_samples_per_second": 26.619,
"eval_steps_per_second": 1.024,
"step": 16
},
{
"epoch": 1.31,
"learning_rate": 5e-05,
"loss": 1.7899,
"step": 17
},
{
"epoch": 1.31,
"eval_accuracy": 0.0667383898153129,
"eval_loss": 2.611328125,
"eval_runtime": 2.6864,
"eval_samples_per_second": 29.035,
"eval_steps_per_second": 1.117,
"step": 17
},
{
"epoch": 1.38,
"learning_rate": 5e-05,
"loss": 1.6465,
"step": 18
},
{
"epoch": 1.38,
"eval_accuracy": 0.06664873587950511,
"eval_loss": 2.619140625,
"eval_runtime": 2.9278,
"eval_samples_per_second": 26.641,
"eval_steps_per_second": 1.025,
"step": 18
},
{
"epoch": 1.46,
"learning_rate": 5e-05,
"loss": 1.5955,
"step": 19
},
{
"epoch": 1.46,
"eval_accuracy": 0.0670611439842209,
"eval_loss": 2.607421875,
"eval_runtime": 2.4278,
"eval_samples_per_second": 32.128,
"eval_steps_per_second": 1.236,
"step": 19
},
{
"epoch": 1.54,
"learning_rate": 5e-05,
"loss": 1.5389,
"step": 20
},
{
"epoch": 1.54,
"eval_accuracy": 0.06720459028151336,
"eval_loss": 2.595703125,
"eval_runtime": 2.6825,
"eval_samples_per_second": 29.077,
"eval_steps_per_second": 1.118,
"step": 20
},
{
"epoch": 1.62,
"learning_rate": 5e-05,
"loss": 1.5356,
"step": 21
},
{
"epoch": 1.62,
"eval_accuracy": 0.06697149004841313,
"eval_loss": 2.5859375,
"eval_runtime": 2.6798,
"eval_samples_per_second": 29.107,
"eval_steps_per_second": 1.119,
"step": 21
},
{
"epoch": 1.69,
"learning_rate": 5e-05,
"loss": 1.386,
"step": 22
},
{
"epoch": 1.69,
"eval_accuracy": 0.06722252106867492,
"eval_loss": 2.58203125,
"eval_runtime": 2.9465,
"eval_samples_per_second": 26.472,
"eval_steps_per_second": 1.018,
"step": 22
},
{
"epoch": 1.77,
"learning_rate": 5e-05,
"loss": 1.7698,
"step": 23
},
{
"epoch": 1.77,
"eval_accuracy": 0.06695355926125157,
"eval_loss": 2.57421875,
"eval_runtime": 2.1897,
"eval_samples_per_second": 35.622,
"eval_steps_per_second": 1.37,
"step": 23
},
{
"epoch": 1.85,
"learning_rate": 5e-05,
"loss": 1.3923,
"step": 24
},
{
"epoch": 1.85,
"eval_accuracy": 0.06686390532544378,
"eval_loss": 2.580078125,
"eval_runtime": 2.6567,
"eval_samples_per_second": 29.36,
"eval_steps_per_second": 1.129,
"step": 24
},
{
"epoch": 1.92,
"learning_rate": 5e-05,
"loss": 1.4723,
"step": 25
},
{
"epoch": 1.92,
"eval_accuracy": 0.06722252106867492,
"eval_loss": 2.58984375,
"eval_runtime": 2.6764,
"eval_samples_per_second": 29.144,
"eval_steps_per_second": 1.121,
"step": 25
},
{
"epoch": 2.0,
"learning_rate": 5e-05,
"loss": 1.5653,
"step": 26
},
{
"epoch": 2.0,
"eval_accuracy": 0.06715079792002869,
"eval_loss": 2.603515625,
"eval_runtime": 2.4205,
"eval_samples_per_second": 32.225,
"eval_steps_per_second": 1.239,
"step": 26
},
{
"epoch": 2.0,
"step": 26,
"total_flos": 2728683110400.0,
"train_loss": 2.0602510892427883,
"train_runtime": 142.5326,
"train_samples_per_second": 5.837,
"train_steps_per_second": 0.182
}
],
"max_steps": 26,
"num_train_epochs": 2,
"total_flos": 2728683110400.0,
"trial_name": null,
"trial_params": null
}