longt5-stable-diffusion-prompt / trainer_state.json
vahn9995's picture
Upload 14 files
68d75c2
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.993779160186625,
"eval_steps": 30,
"global_step": 749,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"learning_rate": 0.0002,
"loss": 3.4262,
"step": 9
},
{
"epoch": 0.17,
"learning_rate": 0.0002,
"loss": 2.6518,
"step": 18
},
{
"epoch": 0.25,
"learning_rate": 0.0002,
"loss": 2.453,
"step": 27
},
{
"epoch": 0.28,
"eval_loss": 2.0444083213806152,
"eval_runtime": 62.6984,
"eval_samples_per_second": 20.511,
"eval_steps_per_second": 2.568,
"step": 30
},
{
"epoch": 0.34,
"learning_rate": 0.0002,
"loss": 2.3345,
"step": 36
},
{
"epoch": 0.42,
"learning_rate": 0.0002,
"loss": 2.3033,
"step": 45
},
{
"epoch": 0.5,
"learning_rate": 0.0002,
"loss": 2.2692,
"step": 54
},
{
"epoch": 0.56,
"eval_loss": 1.8969556093215942,
"eval_runtime": 68.3363,
"eval_samples_per_second": 18.819,
"eval_steps_per_second": 2.356,
"step": 60
},
{
"epoch": 0.59,
"learning_rate": 0.0002,
"loss": 2.2294,
"step": 63
},
{
"epoch": 0.67,
"learning_rate": 0.0002,
"loss": 2.235,
"step": 72
},
{
"epoch": 0.76,
"learning_rate": 0.0002,
"loss": 2.1576,
"step": 81
},
{
"epoch": 0.84,
"learning_rate": 0.0002,
"loss": 2.1485,
"step": 90
},
{
"epoch": 0.84,
"eval_loss": 1.8373124599456787,
"eval_runtime": 69.5775,
"eval_samples_per_second": 18.483,
"eval_steps_per_second": 2.314,
"step": 90
},
{
"epoch": 0.92,
"learning_rate": 0.0002,
"loss": 2.1657,
"step": 99
},
{
"epoch": 1.01,
"learning_rate": 0.0002,
"loss": 2.1049,
"step": 108
},
{
"epoch": 1.09,
"learning_rate": 0.0002,
"loss": 2.0469,
"step": 117
},
{
"epoch": 1.12,
"eval_loss": 1.8033325672149658,
"eval_runtime": 69.7392,
"eval_samples_per_second": 18.44,
"eval_steps_per_second": 2.309,
"step": 120
},
{
"epoch": 1.18,
"learning_rate": 0.0002,
"loss": 2.003,
"step": 126
},
{
"epoch": 1.26,
"learning_rate": 0.0002,
"loss": 1.9928,
"step": 135
},
{
"epoch": 1.34,
"learning_rate": 0.0002,
"loss": 1.9954,
"step": 144
},
{
"epoch": 1.4,
"eval_loss": 1.7761518955230713,
"eval_runtime": 68.9976,
"eval_samples_per_second": 18.638,
"eval_steps_per_second": 2.333,
"step": 150
},
{
"epoch": 1.43,
"learning_rate": 0.0002,
"loss": 1.9571,
"step": 153
},
{
"epoch": 1.51,
"learning_rate": 0.0002,
"loss": 2.0011,
"step": 162
},
{
"epoch": 1.6,
"learning_rate": 0.0002,
"loss": 1.983,
"step": 171
},
{
"epoch": 1.68,
"learning_rate": 0.0002,
"loss": 1.9778,
"step": 180
},
{
"epoch": 1.68,
"eval_loss": 1.7592827081680298,
"eval_runtime": 71.1717,
"eval_samples_per_second": 18.069,
"eval_steps_per_second": 2.262,
"step": 180
},
{
"epoch": 1.76,
"learning_rate": 0.0002,
"loss": 1.9624,
"step": 189
},
{
"epoch": 1.85,
"learning_rate": 0.0002,
"loss": 1.9384,
"step": 198
},
{
"epoch": 1.93,
"learning_rate": 0.0002,
"loss": 1.9536,
"step": 207
},
{
"epoch": 1.96,
"eval_loss": 1.7472261190414429,
"eval_runtime": 68.6137,
"eval_samples_per_second": 18.743,
"eval_steps_per_second": 2.346,
"step": 210
},
{
"epoch": 2.02,
"learning_rate": 0.0002,
"loss": 1.9249,
"step": 216
},
{
"epoch": 2.1,
"learning_rate": 0.0002,
"loss": 1.8604,
"step": 225
},
{
"epoch": 2.18,
"learning_rate": 0.0002,
"loss": 1.8524,
"step": 234
},
{
"epoch": 2.24,
"eval_loss": 1.730584740638733,
"eval_runtime": 68.1311,
"eval_samples_per_second": 18.875,
"eval_steps_per_second": 2.363,
"step": 240
},
{
"epoch": 2.27,
"learning_rate": 0.0002,
"loss": 1.8442,
"step": 243
},
{
"epoch": 2.35,
"learning_rate": 0.0002,
"loss": 1.8647,
"step": 252
},
{
"epoch": 2.44,
"learning_rate": 0.0002,
"loss": 1.8218,
"step": 261
},
{
"epoch": 2.52,
"learning_rate": 0.0002,
"loss": 1.8438,
"step": 270
},
{
"epoch": 2.52,
"eval_loss": 1.7255065441131592,
"eval_runtime": 67.2395,
"eval_samples_per_second": 19.126,
"eval_steps_per_second": 2.394,
"step": 270
},
{
"epoch": 2.6,
"learning_rate": 0.0002,
"loss": 1.8491,
"step": 279
},
{
"epoch": 2.69,
"learning_rate": 0.0002,
"loss": 1.8304,
"step": 288
},
{
"epoch": 2.77,
"learning_rate": 0.0002,
"loss": 1.8436,
"step": 297
},
{
"epoch": 2.8,
"eval_loss": 1.7139911651611328,
"eval_runtime": 70.3499,
"eval_samples_per_second": 18.28,
"eval_steps_per_second": 2.289,
"step": 300
},
{
"epoch": 2.86,
"learning_rate": 0.0002,
"loss": 1.8039,
"step": 306
},
{
"epoch": 2.94,
"learning_rate": 0.0002,
"loss": 1.8255,
"step": 315
},
{
"epoch": 3.02,
"learning_rate": 0.0002,
"loss": 1.7765,
"step": 324
},
{
"epoch": 3.08,
"eval_loss": 1.7048661708831787,
"eval_runtime": 70.1678,
"eval_samples_per_second": 18.328,
"eval_steps_per_second": 2.295,
"step": 330
},
{
"epoch": 3.11,
"learning_rate": 0.0002,
"loss": 1.7546,
"step": 333
},
{
"epoch": 3.19,
"learning_rate": 0.0002,
"loss": 1.7389,
"step": 342
},
{
"epoch": 3.28,
"learning_rate": 0.0002,
"loss": 1.7248,
"step": 351
},
{
"epoch": 3.36,
"learning_rate": 0.0002,
"loss": 1.7537,
"step": 360
},
{
"epoch": 3.36,
"eval_loss": 1.7056528329849243,
"eval_runtime": 70.6453,
"eval_samples_per_second": 18.204,
"eval_steps_per_second": 2.279,
"step": 360
},
{
"epoch": 3.44,
"learning_rate": 0.0002,
"loss": 1.7234,
"step": 369
},
{
"epoch": 3.53,
"learning_rate": 0.0002,
"loss": 1.7208,
"step": 378
},
{
"epoch": 3.61,
"learning_rate": 0.0002,
"loss": 1.7328,
"step": 387
},
{
"epoch": 3.64,
"eval_loss": 1.6977263689041138,
"eval_runtime": 61.7073,
"eval_samples_per_second": 20.84,
"eval_steps_per_second": 2.609,
"step": 390
},
{
"epoch": 3.7,
"learning_rate": 0.0002,
"loss": 1.7231,
"step": 396
},
{
"epoch": 3.78,
"learning_rate": 0.0002,
"loss": 1.7424,
"step": 405
},
{
"epoch": 3.87,
"learning_rate": 0.0002,
"loss": 1.723,
"step": 414
},
{
"epoch": 3.92,
"eval_loss": 1.6972527503967285,
"eval_runtime": 69.7095,
"eval_samples_per_second": 18.448,
"eval_steps_per_second": 2.31,
"step": 420
},
{
"epoch": 3.95,
"learning_rate": 0.0002,
"loss": 1.6888,
"step": 423
},
{
"epoch": 4.04,
"learning_rate": 0.0002,
"loss": 1.6847,
"step": 432
},
{
"epoch": 4.12,
"learning_rate": 0.0002,
"loss": 1.6702,
"step": 441
},
{
"epoch": 4.2,
"learning_rate": 0.0002,
"loss": 1.6592,
"step": 450
},
{
"epoch": 4.2,
"eval_loss": 1.7057932615280151,
"eval_runtime": 71.3257,
"eval_samples_per_second": 18.03,
"eval_steps_per_second": 2.257,
"step": 450
},
{
"epoch": 4.29,
"learning_rate": 0.0002,
"loss": 1.6158,
"step": 459
},
{
"epoch": 4.37,
"learning_rate": 0.0002,
"loss": 1.6684,
"step": 468
},
{
"epoch": 4.46,
"learning_rate": 0.0002,
"loss": 1.6563,
"step": 477
},
{
"epoch": 4.48,
"eval_loss": 1.703405499458313,
"eval_runtime": 67.5168,
"eval_samples_per_second": 19.047,
"eval_steps_per_second": 2.385,
"step": 480
},
{
"epoch": 4.54,
"learning_rate": 0.0002,
"loss": 1.6111,
"step": 486
},
{
"epoch": 4.62,
"learning_rate": 0.0002,
"loss": 1.6514,
"step": 495
},
{
"epoch": 4.71,
"learning_rate": 0.0002,
"loss": 1.6443,
"step": 504
},
{
"epoch": 4.76,
"eval_loss": 1.6969022750854492,
"eval_runtime": 68.7108,
"eval_samples_per_second": 18.716,
"eval_steps_per_second": 2.343,
"step": 510
},
{
"epoch": 4.79,
"learning_rate": 0.0002,
"loss": 1.6255,
"step": 513
},
{
"epoch": 4.88,
"learning_rate": 0.0002,
"loss": 1.6563,
"step": 522
},
{
"epoch": 4.96,
"learning_rate": 0.0002,
"loss": 1.617,
"step": 531
},
{
"epoch": 5.04,
"learning_rate": 0.0002,
"loss": 1.5782,
"step": 540
},
{
"epoch": 5.04,
"eval_loss": 1.6952643394470215,
"eval_runtime": 68.1803,
"eval_samples_per_second": 18.862,
"eval_steps_per_second": 2.361,
"step": 540
},
{
"epoch": 5.13,
"learning_rate": 0.0002,
"loss": 1.5834,
"step": 549
},
{
"epoch": 5.21,
"learning_rate": 0.0002,
"loss": 1.5956,
"step": 558
},
{
"epoch": 5.3,
"learning_rate": 0.0002,
"loss": 1.509,
"step": 567
},
{
"epoch": 5.32,
"eval_loss": 1.7135779857635498,
"eval_runtime": 68.335,
"eval_samples_per_second": 18.819,
"eval_steps_per_second": 2.356,
"step": 570
},
{
"epoch": 5.38,
"learning_rate": 0.0002,
"loss": 1.5999,
"step": 576
},
{
"epoch": 5.46,
"learning_rate": 0.0002,
"loss": 1.5743,
"step": 585
},
{
"epoch": 5.55,
"learning_rate": 0.0002,
"loss": 1.5516,
"step": 594
},
{
"epoch": 5.6,
"eval_loss": 1.7064013481140137,
"eval_runtime": 69.1824,
"eval_samples_per_second": 18.589,
"eval_steps_per_second": 2.327,
"step": 600
},
{
"epoch": 5.63,
"learning_rate": 0.0002,
"loss": 1.5623,
"step": 603
},
{
"epoch": 5.72,
"learning_rate": 0.0002,
"loss": 1.5476,
"step": 612
},
{
"epoch": 5.8,
"learning_rate": 0.0002,
"loss": 1.5587,
"step": 621
},
{
"epoch": 5.88,
"learning_rate": 0.0002,
"loss": 1.558,
"step": 630
},
{
"epoch": 5.88,
"eval_loss": 1.7045198678970337,
"eval_runtime": 69.0953,
"eval_samples_per_second": 18.612,
"eval_steps_per_second": 2.33,
"step": 630
},
{
"epoch": 5.97,
"learning_rate": 0.0002,
"loss": 1.5317,
"step": 639
},
{
"epoch": 6.05,
"learning_rate": 0.0002,
"loss": 1.5192,
"step": 648
},
{
"epoch": 6.14,
"learning_rate": 0.0002,
"loss": 1.5016,
"step": 657
},
{
"epoch": 6.16,
"eval_loss": 1.718214750289917,
"eval_runtime": 68.4195,
"eval_samples_per_second": 18.796,
"eval_steps_per_second": 2.353,
"step": 660
},
{
"epoch": 6.22,
"learning_rate": 0.0002,
"loss": 1.5243,
"step": 666
},
{
"epoch": 6.3,
"learning_rate": 0.0002,
"loss": 1.4374,
"step": 675
},
{
"epoch": 6.39,
"learning_rate": 0.0002,
"loss": 1.5288,
"step": 684
},
{
"epoch": 6.44,
"eval_loss": 1.7111074924468994,
"eval_runtime": 68.9961,
"eval_samples_per_second": 18.639,
"eval_steps_per_second": 2.333,
"step": 690
},
{
"epoch": 6.47,
"learning_rate": 0.0002,
"loss": 1.4874,
"step": 693
},
{
"epoch": 6.56,
"learning_rate": 0.0002,
"loss": 1.4725,
"step": 702
},
{
"epoch": 6.64,
"learning_rate": 0.0002,
"loss": 1.5042,
"step": 711
},
{
"epoch": 6.72,
"learning_rate": 0.0002,
"loss": 1.4665,
"step": 720
},
{
"epoch": 6.72,
"eval_loss": 1.7030370235443115,
"eval_runtime": 67.7732,
"eval_samples_per_second": 18.975,
"eval_steps_per_second": 2.376,
"step": 720
},
{
"epoch": 6.81,
"learning_rate": 0.0002,
"loss": 1.4698,
"step": 729
},
{
"epoch": 6.89,
"learning_rate": 0.0002,
"loss": 1.5119,
"step": 738
},
{
"epoch": 6.98,
"learning_rate": 0.0002,
"loss": 1.4489,
"step": 747
},
{
"epoch": 6.99,
"step": 749,
"total_flos": 5.022814063873229e+16,
"train_loss": 0.7637141177428262,
"train_runtime": 5381.4192,
"train_samples_per_second": 6.69,
"train_steps_per_second": 0.139
}
],
"logging_steps": 9,
"max_steps": 749,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 43,
"total_flos": 5.022814063873229e+16,
"trial_name": null,
"trial_params": null
}