GPT-Neo-2.7B-AID / trainer_state.json
Henk717's picture
Initial Upload
e4b98e6
raw history blame
No virus
12.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9994739610731194,
"global_step": 950,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"eval_loss": 2.40625,
"eval_runtime": 1.9288,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 15
},
{
"epoch": 0.03,
"eval_loss": 2.35546875,
"eval_runtime": 1.9286,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.519,
"step": 30
},
{
"epoch": 0.05,
"eval_loss": 2.341796875,
"eval_runtime": 1.9286,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.519,
"step": 45
},
{
"epoch": 0.06,
"eval_loss": 2.33203125,
"eval_runtime": 1.9285,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.519,
"step": 60
},
{
"epoch": 0.08,
"eval_loss": 2.32421875,
"eval_runtime": 1.9288,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 75
},
{
"epoch": 0.09,
"eval_loss": 2.318359375,
"eval_runtime": 1.9291,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 90
},
{
"epoch": 0.11,
"eval_loss": 2.314453125,
"eval_runtime": 1.9288,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 105
},
{
"epoch": 0.13,
"eval_loss": 2.3125,
"eval_runtime": 1.9287,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 120
},
{
"epoch": 0.14,
"eval_loss": 2.3125,
"eval_runtime": 1.9289,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 135
},
{
"epoch": 0.16,
"eval_loss": 2.306640625,
"eval_runtime": 1.9287,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 150
},
{
"epoch": 0.17,
"eval_loss": 2.30078125,
"eval_runtime": 1.9284,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.519,
"step": 165
},
{
"epoch": 0.19,
"eval_loss": 2.30078125,
"eval_runtime": 1.9291,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 180
},
{
"epoch": 0.21,
"eval_loss": 2.302734375,
"eval_runtime": 1.9292,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 195
},
{
"epoch": 0.22,
"eval_loss": 2.294921875,
"eval_runtime": 1.9287,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 210
},
{
"epoch": 0.24,
"eval_loss": 2.287109375,
"eval_runtime": 1.9291,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 225
},
{
"epoch": 0.25,
"eval_loss": 2.283203125,
"eval_runtime": 1.9283,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.519,
"step": 240
},
{
"epoch": 0.27,
"eval_loss": 2.279296875,
"eval_runtime": 1.9288,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 255
},
{
"epoch": 0.28,
"eval_loss": 2.27734375,
"eval_runtime": 1.929,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 270
},
{
"epoch": 0.3,
"eval_loss": 2.248046875,
"eval_runtime": 1.9292,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 285
},
{
"epoch": 0.32,
"eval_loss": 2.2265625,
"eval_runtime": 1.9288,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 300
},
{
"epoch": 0.33,
"eval_loss": 2.2265625,
"eval_runtime": 1.9288,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 315
},
{
"epoch": 0.35,
"eval_loss": 2.2265625,
"eval_runtime": 1.9287,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 330
},
{
"epoch": 0.36,
"eval_loss": 2.2265625,
"eval_runtime": 1.9316,
"eval_samples_per_second": 1.035,
"eval_steps_per_second": 0.518,
"step": 345
},
{
"epoch": 0.38,
"eval_loss": 2.228515625,
"eval_runtime": 1.9288,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 360
},
{
"epoch": 0.39,
"eval_loss": 2.2265625,
"eval_runtime": 1.9286,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.519,
"step": 375
},
{
"epoch": 0.41,
"eval_loss": 2.224609375,
"eval_runtime": 1.9289,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 390
},
{
"epoch": 0.43,
"eval_loss": 2.22265625,
"eval_runtime": 1.9294,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 405
},
{
"epoch": 0.44,
"eval_loss": 2.220703125,
"eval_runtime": 1.9284,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.519,
"step": 420
},
{
"epoch": 0.46,
"eval_loss": 2.22265625,
"eval_runtime": 1.9289,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 435
},
{
"epoch": 0.47,
"eval_loss": 2.22265625,
"eval_runtime": 1.9295,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 450
},
{
"epoch": 0.49,
"eval_loss": 2.220703125,
"eval_runtime": 1.9284,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.519,
"step": 465
},
{
"epoch": 0.5,
"eval_loss": 2.220703125,
"eval_runtime": 1.9291,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 480
},
{
"epoch": 0.52,
"eval_loss": 2.21875,
"eval_runtime": 1.9289,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 495
},
{
"epoch": 0.53,
"learning_rate": 5e-06,
"loss": 2.374,
"step": 500
},
{
"epoch": 0.54,
"eval_loss": 2.203125,
"eval_runtime": 1.937,
"eval_samples_per_second": 1.033,
"eval_steps_per_second": 0.516,
"step": 510
},
{
"epoch": 0.55,
"eval_loss": 2.1875,
"eval_runtime": 1.9291,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 525
},
{
"epoch": 0.57,
"eval_loss": 2.185546875,
"eval_runtime": 1.9291,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 540
},
{
"epoch": 0.58,
"eval_loss": 2.140625,
"eval_runtime": 1.9294,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 555
},
{
"epoch": 0.6,
"eval_loss": 2.134765625,
"eval_runtime": 1.9289,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 570
},
{
"epoch": 0.62,
"eval_loss": 2.134765625,
"eval_runtime": 1.9285,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.519,
"step": 585
},
{
"epoch": 0.63,
"eval_loss": 2.134765625,
"eval_runtime": 1.929,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 600
},
{
"epoch": 0.65,
"eval_loss": 2.1328125,
"eval_runtime": 1.929,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 615
},
{
"epoch": 0.66,
"eval_loss": 2.1328125,
"eval_runtime": 1.9284,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.519,
"step": 630
},
{
"epoch": 0.68,
"eval_loss": 2.130859375,
"eval_runtime": 1.929,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 645
},
{
"epoch": 0.69,
"eval_loss": 2.12890625,
"eval_runtime": 1.9293,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 660
},
{
"epoch": 0.71,
"eval_loss": 2.12890625,
"eval_runtime": 1.9283,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.519,
"step": 675
},
{
"epoch": 0.73,
"eval_loss": 2.1328125,
"eval_runtime": 1.9291,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 690
},
{
"epoch": 0.74,
"eval_loss": 2.1328125,
"eval_runtime": 1.9288,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 705
},
{
"epoch": 0.76,
"eval_loss": 2.130859375,
"eval_runtime": 1.9294,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 720
},
{
"epoch": 0.77,
"eval_loss": 2.134765625,
"eval_runtime": 1.929,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 735
},
{
"epoch": 0.79,
"eval_loss": 2.1328125,
"eval_runtime": 1.9295,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 750
},
{
"epoch": 0.8,
"eval_loss": 2.134765625,
"eval_runtime": 1.9289,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 765
},
{
"epoch": 0.82,
"eval_loss": 2.134765625,
"eval_runtime": 1.9291,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 780
},
{
"epoch": 0.84,
"eval_loss": 2.134765625,
"eval_runtime": 1.9289,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 795
},
{
"epoch": 0.85,
"eval_loss": 2.1328125,
"eval_runtime": 1.9291,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 810
},
{
"epoch": 0.87,
"eval_loss": 2.12890625,
"eval_runtime": 1.9286,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.519,
"step": 825
},
{
"epoch": 0.88,
"eval_loss": 2.119140625,
"eval_runtime": 1.9289,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 840
},
{
"epoch": 0.9,
"eval_loss": 2.12109375,
"eval_runtime": 1.9287,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 855
},
{
"epoch": 0.92,
"eval_loss": 2.1171875,
"eval_runtime": 1.9289,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 870
},
{
"epoch": 0.93,
"eval_loss": 2.119140625,
"eval_runtime": 1.9291,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 885
},
{
"epoch": 0.95,
"eval_loss": 2.123046875,
"eval_runtime": 1.9294,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 900
},
{
"epoch": 0.96,
"eval_loss": 2.12109375,
"eval_runtime": 1.9292,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 915
},
{
"epoch": 0.98,
"eval_loss": 2.119140625,
"eval_runtime": 1.9287,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 930
},
{
"epoch": 0.99,
"eval_loss": 2.12109375,
"eval_runtime": 1.9291,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 0.518,
"step": 945
},
{
"epoch": 1.0,
"step": 950,
"total_flos": 1.2380121253648794e+17,
"train_loss": 2.3266365131578945,
"train_runtime": 29836.7817,
"train_samples_per_second": 0.255,
"train_steps_per_second": 0.032
}
],
"max_steps": 950,
"num_train_epochs": 1,
"total_flos": 1.2380121253648794e+17,
"trial_name": null,
"trial_params": null
}