musicgen-melody-lora-kk-colab / trainer_state.json
jane102350's picture
End of training
107bea7 verified
raw
history blame
No virus
14 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.238095238095237,
"eval_steps": 500,
"global_step": 160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19047619047619047,
"grad_norm": 0.9942206144332886,
"learning_rate": 0.00019750000000000003,
"loss": 9.5613,
"step": 2
},
{
"epoch": 0.38095238095238093,
"grad_norm": 1.2790788412094116,
"learning_rate": 0.000195,
"loss": 9.2339,
"step": 4
},
{
"epoch": 0.5714285714285714,
"grad_norm": 1.9939367771148682,
"learning_rate": 0.00019250000000000002,
"loss": 8.7953,
"step": 6
},
{
"epoch": 0.7619047619047619,
"grad_norm": 2.010485887527466,
"learning_rate": 0.00019,
"loss": 8.2168,
"step": 8
},
{
"epoch": 0.9523809523809523,
"grad_norm": 1.352328896522522,
"learning_rate": 0.0001875,
"loss": 7.8941,
"step": 10
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.9626594185829163,
"learning_rate": 0.00018500000000000002,
"loss": 7.6817,
"step": 12
},
{
"epoch": 1.3333333333333333,
"grad_norm": 1.1568268537521362,
"learning_rate": 0.0001825,
"loss": 7.5131,
"step": 14
},
{
"epoch": 1.5238095238095237,
"grad_norm": 1.0264520645141602,
"learning_rate": 0.00018,
"loss": 7.4247,
"step": 16
},
{
"epoch": 1.7142857142857144,
"grad_norm": 0.9865540862083435,
"learning_rate": 0.0001775,
"loss": 7.4369,
"step": 18
},
{
"epoch": 1.9047619047619047,
"grad_norm": 1.0182702541351318,
"learning_rate": 0.000175,
"loss": 7.3787,
"step": 20
},
{
"epoch": 2.0952380952380953,
"grad_norm": 0.7922359108924866,
"learning_rate": 0.00017250000000000002,
"loss": 7.373,
"step": 22
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.7033187747001648,
"learning_rate": 0.00017,
"loss": 7.3096,
"step": 24
},
{
"epoch": 2.4761904761904763,
"grad_norm": 2.9758119583129883,
"learning_rate": 0.0001675,
"loss": 7.1991,
"step": 26
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.7531760931015015,
"learning_rate": 0.000165,
"loss": 7.2661,
"step": 28
},
{
"epoch": 2.857142857142857,
"grad_norm": 1.3790533542633057,
"learning_rate": 0.00016250000000000002,
"loss": 7.2782,
"step": 30
},
{
"epoch": 3.0476190476190474,
"grad_norm": 0.6538093686103821,
"learning_rate": 0.00016,
"loss": 7.2109,
"step": 32
},
{
"epoch": 3.238095238095238,
"grad_norm": 0.6145215630531311,
"learning_rate": 0.0001575,
"loss": 7.2192,
"step": 34
},
{
"epoch": 3.4285714285714284,
"grad_norm": 0.4128475785255432,
"learning_rate": 0.000155,
"loss": 7.2892,
"step": 36
},
{
"epoch": 3.619047619047619,
"grad_norm": 1.0160013437271118,
"learning_rate": 0.0001525,
"loss": 7.2049,
"step": 38
},
{
"epoch": 3.8095238095238093,
"grad_norm": 0.5834835171699524,
"learning_rate": 0.00015000000000000001,
"loss": 7.1672,
"step": 40
},
{
"epoch": 4.0,
"grad_norm": 0.4894554615020752,
"learning_rate": 0.0001475,
"loss": 7.1269,
"step": 42
},
{
"epoch": 4.190476190476191,
"grad_norm": 0.593618631362915,
"learning_rate": 0.000145,
"loss": 7.0175,
"step": 44
},
{
"epoch": 4.380952380952381,
"grad_norm": 1.6190487146377563,
"learning_rate": 0.00014250000000000002,
"loss": 7.2919,
"step": 46
},
{
"epoch": 4.571428571428571,
"grad_norm": 0.755859911441803,
"learning_rate": 0.00014,
"loss": 7.1624,
"step": 48
},
{
"epoch": 4.761904761904762,
"grad_norm": 0.46613645553588867,
"learning_rate": 0.0001375,
"loss": 7.2233,
"step": 50
},
{
"epoch": 4.9523809523809526,
"grad_norm": 0.5973020792007446,
"learning_rate": 0.00013500000000000003,
"loss": 7.1642,
"step": 52
},
{
"epoch": 5.142857142857143,
"grad_norm": 0.97837233543396,
"learning_rate": 0.0001325,
"loss": 7.1172,
"step": 54
},
{
"epoch": 5.333333333333333,
"grad_norm": 0.9348046183586121,
"learning_rate": 0.00013000000000000002,
"loss": 7.1564,
"step": 56
},
{
"epoch": 5.523809523809524,
"grad_norm": 0.6632198691368103,
"learning_rate": 0.0001275,
"loss": 7.0821,
"step": 58
},
{
"epoch": 5.714285714285714,
"grad_norm": 0.7776179909706116,
"learning_rate": 0.000125,
"loss": 7.2272,
"step": 60
},
{
"epoch": 5.904761904761905,
"grad_norm": 0.6282438039779663,
"learning_rate": 0.00012250000000000002,
"loss": 7.0926,
"step": 62
},
{
"epoch": 6.095238095238095,
"grad_norm": 0.6008353233337402,
"learning_rate": 0.00012,
"loss": 7.1073,
"step": 64
},
{
"epoch": 6.285714285714286,
"grad_norm": 0.8796420097351074,
"learning_rate": 0.00011750000000000001,
"loss": 7.1737,
"step": 66
},
{
"epoch": 6.476190476190476,
"grad_norm": 0.6400454640388489,
"learning_rate": 0.00011499999999999999,
"loss": 7.0924,
"step": 68
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.5479526519775391,
"learning_rate": 0.00011250000000000001,
"loss": 7.1275,
"step": 70
},
{
"epoch": 6.857142857142857,
"grad_norm": 0.5992618203163147,
"learning_rate": 0.00011000000000000002,
"loss": 7.0599,
"step": 72
},
{
"epoch": 7.0476190476190474,
"grad_norm": 0.5336684584617615,
"learning_rate": 0.0001075,
"loss": 7.0206,
"step": 74
},
{
"epoch": 7.238095238095238,
"grad_norm": 0.3991040289402008,
"learning_rate": 0.000105,
"loss": 7.0123,
"step": 76
},
{
"epoch": 7.428571428571429,
"grad_norm": 1.032917857170105,
"learning_rate": 0.0001025,
"loss": 7.0267,
"step": 78
},
{
"epoch": 7.619047619047619,
"grad_norm": 0.5554404854774475,
"learning_rate": 0.0001,
"loss": 7.0203,
"step": 80
},
{
"epoch": 7.809523809523809,
"grad_norm": 0.7755109667778015,
"learning_rate": 9.75e-05,
"loss": 7.1445,
"step": 82
},
{
"epoch": 8.0,
"grad_norm": 1.8295842409133911,
"learning_rate": 9.5e-05,
"loss": 7.0002,
"step": 84
},
{
"epoch": 8.19047619047619,
"grad_norm": 1.4985620975494385,
"learning_rate": 9.250000000000001e-05,
"loss": 7.0613,
"step": 86
},
{
"epoch": 8.380952380952381,
"grad_norm": 1.0733778476715088,
"learning_rate": 9e-05,
"loss": 7.0594,
"step": 88
},
{
"epoch": 8.571428571428571,
"grad_norm": 0.7009026408195496,
"learning_rate": 8.75e-05,
"loss": 6.9432,
"step": 90
},
{
"epoch": 8.761904761904763,
"grad_norm": 1.195196509361267,
"learning_rate": 8.5e-05,
"loss": 6.9266,
"step": 92
},
{
"epoch": 8.952380952380953,
"grad_norm": 2.6835684776306152,
"learning_rate": 8.25e-05,
"loss": 6.9855,
"step": 94
},
{
"epoch": 9.142857142857142,
"grad_norm": 0.7434377670288086,
"learning_rate": 8e-05,
"loss": 6.7975,
"step": 96
},
{
"epoch": 9.333333333333334,
"grad_norm": 0.5993837118148804,
"learning_rate": 7.75e-05,
"loss": 7.0476,
"step": 98
},
{
"epoch": 9.523809523809524,
"grad_norm": 0.4656153619289398,
"learning_rate": 7.500000000000001e-05,
"loss": 6.9894,
"step": 100
},
{
"epoch": 9.714285714285714,
"grad_norm": 0.7926774621009827,
"learning_rate": 7.25e-05,
"loss": 6.9854,
"step": 102
},
{
"epoch": 9.904761904761905,
"grad_norm": 1.0828678607940674,
"learning_rate": 7e-05,
"loss": 6.9185,
"step": 104
},
{
"epoch": 10.095238095238095,
"grad_norm": 0.6923830509185791,
"learning_rate": 6.750000000000001e-05,
"loss": 6.9804,
"step": 106
},
{
"epoch": 10.285714285714286,
"grad_norm": 0.5546735525131226,
"learning_rate": 6.500000000000001e-05,
"loss": 6.9273,
"step": 108
},
{
"epoch": 10.476190476190476,
"grad_norm": 0.8265076875686646,
"learning_rate": 6.25e-05,
"loss": 6.9087,
"step": 110
},
{
"epoch": 10.666666666666666,
"grad_norm": 0.3945198655128479,
"learning_rate": 6e-05,
"loss": 6.9375,
"step": 112
},
{
"epoch": 10.857142857142858,
"grad_norm": 0.5948878526687622,
"learning_rate": 5.7499999999999995e-05,
"loss": 6.8764,
"step": 114
},
{
"epoch": 11.047619047619047,
"grad_norm": 0.7741471529006958,
"learning_rate": 5.500000000000001e-05,
"loss": 6.7551,
"step": 116
},
{
"epoch": 11.238095238095237,
"grad_norm": 0.32554784417152405,
"learning_rate": 5.25e-05,
"loss": 6.8862,
"step": 118
},
{
"epoch": 11.428571428571429,
"grad_norm": 0.5033702850341797,
"learning_rate": 5e-05,
"loss": 6.7297,
"step": 120
},
{
"epoch": 11.619047619047619,
"grad_norm": 0.5291158556938171,
"learning_rate": 4.75e-05,
"loss": 6.9826,
"step": 122
},
{
"epoch": 11.80952380952381,
"grad_norm": 0.39498385787010193,
"learning_rate": 4.5e-05,
"loss": 6.837,
"step": 124
},
{
"epoch": 12.0,
"grad_norm": 0.4402136206626892,
"learning_rate": 4.25e-05,
"loss": 7.0434,
"step": 126
},
{
"epoch": 12.19047619047619,
"grad_norm": 0.6476764678955078,
"learning_rate": 4e-05,
"loss": 6.8524,
"step": 128
},
{
"epoch": 12.380952380952381,
"grad_norm": 0.330609530210495,
"learning_rate": 3.7500000000000003e-05,
"loss": 6.8742,
"step": 130
},
{
"epoch": 12.571428571428571,
"grad_norm": 0.5420040488243103,
"learning_rate": 3.5e-05,
"loss": 6.7931,
"step": 132
},
{
"epoch": 12.761904761904763,
"grad_norm": 0.3482373356819153,
"learning_rate": 3.2500000000000004e-05,
"loss": 6.883,
"step": 134
},
{
"epoch": 12.952380952380953,
"grad_norm": 0.3476051092147827,
"learning_rate": 3e-05,
"loss": 6.9857,
"step": 136
},
{
"epoch": 13.142857142857142,
"grad_norm": 0.43590274453163147,
"learning_rate": 2.8749999999999997e-05,
"loss": 8.116,
"step": 138
},
{
"epoch": 13.333333333333334,
"grad_norm": 0.2993098497390747,
"learning_rate": 2.625e-05,
"loss": 6.657,
"step": 140
},
{
"epoch": 13.523809523809524,
"grad_norm": 0.3477262556552887,
"learning_rate": 2.375e-05,
"loss": 6.9781,
"step": 142
},
{
"epoch": 13.714285714285714,
"grad_norm": 0.47370073199272156,
"learning_rate": 2.125e-05,
"loss": 6.9277,
"step": 144
},
{
"epoch": 13.904761904761905,
"grad_norm": 0.3924289345741272,
"learning_rate": 1.8750000000000002e-05,
"loss": 6.8967,
"step": 146
},
{
"epoch": 14.095238095238095,
"grad_norm": 0.5621922612190247,
"learning_rate": 1.6250000000000002e-05,
"loss": 6.7197,
"step": 148
},
{
"epoch": 14.285714285714286,
"grad_norm": 0.3454875349998474,
"learning_rate": 1.3750000000000002e-05,
"loss": 6.9314,
"step": 150
},
{
"epoch": 14.476190476190476,
"grad_norm": 0.3146642744541168,
"learning_rate": 1.125e-05,
"loss": 6.9142,
"step": 152
},
{
"epoch": 14.666666666666666,
"grad_norm": 0.3762160837650299,
"learning_rate": 8.75e-06,
"loss": 6.8759,
"step": 154
},
{
"epoch": 14.857142857142858,
"grad_norm": 0.33906954526901245,
"learning_rate": 6.25e-06,
"loss": 6.8712,
"step": 156
},
{
"epoch": 15.047619047619047,
"grad_norm": 0.3414846360683441,
"learning_rate": 3.75e-06,
"loss": 6.737,
"step": 158
},
{
"epoch": 15.238095238095237,
"grad_norm": 0.4463809132575989,
"learning_rate": 1.25e-06,
"loss": 6.9144,
"step": 160
},
{
"epoch": 15.238095238095237,
"step": 160,
"total_flos": 800861569170024.0,
"train_loss": 7.170098584890366,
"train_runtime": 677.1666,
"train_samples_per_second": 3.969,
"train_steps_per_second": 0.236
}
],
"logging_steps": 2,
"max_steps": 160,
"num_input_tokens_seen": 0,
"num_train_epochs": 16,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 800861569170024.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}