T-music's picture
Upload folder using huggingface_hub
9f73c38
{
"best_metric": 0.7834821428571429,
"best_model_checkpoint": "/content/10epcoh1/checkpoint-600",
"epoch": 10.0,
"global_step": 750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13,
"learning_rate": 1.3333333333333333e-05,
"loss": 2.1275,
"step": 10
},
{
"epoch": 0.27,
"learning_rate": 2.6666666666666667e-05,
"loss": 1.8422,
"step": 20
},
{
"epoch": 0.4,
"learning_rate": 4e-05,
"loss": 1.5182,
"step": 30
},
{
"epoch": 0.53,
"learning_rate": 5.333333333333333e-05,
"loss": 1.2899,
"step": 40
},
{
"epoch": 0.67,
"learning_rate": 6.666666666666667e-05,
"loss": 1.1166,
"step": 50
},
{
"epoch": 0.8,
"learning_rate": 8e-05,
"loss": 0.9772,
"step": 60
},
{
"epoch": 0.93,
"learning_rate": 9.333333333333334e-05,
"loss": 1.0362,
"step": 70
},
{
"epoch": 1.0,
"eval_accuracy": 0.6339285714285714,
"eval_loss": 1.1306579113006592,
"eval_runtime": 15.6391,
"eval_samples_per_second": 28.646,
"eval_steps_per_second": 3.581,
"step": 75
},
{
"epoch": 1.07,
"learning_rate": 9.925925925925926e-05,
"loss": 1.1734,
"step": 80
},
{
"epoch": 1.2,
"learning_rate": 9.777777777777778e-05,
"loss": 0.9443,
"step": 90
},
{
"epoch": 1.33,
"learning_rate": 9.62962962962963e-05,
"loss": 0.8425,
"step": 100
},
{
"epoch": 1.47,
"learning_rate": 9.481481481481483e-05,
"loss": 0.9121,
"step": 110
},
{
"epoch": 1.6,
"learning_rate": 9.333333333333334e-05,
"loss": 0.7943,
"step": 120
},
{
"epoch": 1.73,
"learning_rate": 9.185185185185186e-05,
"loss": 0.8326,
"step": 130
},
{
"epoch": 1.87,
"learning_rate": 9.037037037037038e-05,
"loss": 0.8621,
"step": 140
},
{
"epoch": 2.0,
"learning_rate": 8.888888888888889e-05,
"loss": 0.7188,
"step": 150
},
{
"epoch": 2.0,
"eval_accuracy": 0.7366071428571429,
"eval_loss": 0.7066014409065247,
"eval_runtime": 15.9611,
"eval_samples_per_second": 28.068,
"eval_steps_per_second": 3.509,
"step": 150
},
{
"epoch": 2.13,
"learning_rate": 8.740740740740741e-05,
"loss": 0.4786,
"step": 160
},
{
"epoch": 2.27,
"learning_rate": 8.592592592592593e-05,
"loss": 0.5607,
"step": 170
},
{
"epoch": 2.4,
"learning_rate": 8.444444444444444e-05,
"loss": 0.5072,
"step": 180
},
{
"epoch": 2.53,
"learning_rate": 8.296296296296296e-05,
"loss": 0.5531,
"step": 190
},
{
"epoch": 2.67,
"learning_rate": 8.148148148148148e-05,
"loss": 0.4221,
"step": 200
},
{
"epoch": 2.8,
"learning_rate": 8e-05,
"loss": 0.5585,
"step": 210
},
{
"epoch": 2.93,
"learning_rate": 7.851851851851852e-05,
"loss": 0.6219,
"step": 220
},
{
"epoch": 3.0,
"eval_accuracy": 0.7299107142857143,
"eval_loss": 0.7877911925315857,
"eval_runtime": 17.0375,
"eval_samples_per_second": 26.295,
"eval_steps_per_second": 3.287,
"step": 225
},
{
"epoch": 3.07,
"learning_rate": 7.703703703703704e-05,
"loss": 0.5331,
"step": 230
},
{
"epoch": 3.2,
"learning_rate": 7.555555555555556e-05,
"loss": 0.3028,
"step": 240
},
{
"epoch": 3.33,
"learning_rate": 7.407407407407407e-05,
"loss": 0.3088,
"step": 250
},
{
"epoch": 3.47,
"learning_rate": 7.25925925925926e-05,
"loss": 0.3205,
"step": 260
},
{
"epoch": 3.6,
"learning_rate": 7.111111111111112e-05,
"loss": 0.2972,
"step": 270
},
{
"epoch": 3.73,
"learning_rate": 6.962962962962964e-05,
"loss": 0.3062,
"step": 280
},
{
"epoch": 3.87,
"learning_rate": 6.814814814814815e-05,
"loss": 0.3865,
"step": 290
},
{
"epoch": 4.0,
"learning_rate": 6.666666666666667e-05,
"loss": 0.3372,
"step": 300
},
{
"epoch": 4.0,
"eval_accuracy": 0.7767857142857143,
"eval_loss": 0.6498553156852722,
"eval_runtime": 15.0316,
"eval_samples_per_second": 29.804,
"eval_steps_per_second": 3.725,
"step": 300
},
{
"epoch": 4.13,
"learning_rate": 6.51851851851852e-05,
"loss": 0.1711,
"step": 310
},
{
"epoch": 4.27,
"learning_rate": 6.37037037037037e-05,
"loss": 0.2188,
"step": 320
},
{
"epoch": 4.4,
"learning_rate": 6.222222222222222e-05,
"loss": 0.3254,
"step": 330
},
{
"epoch": 4.53,
"learning_rate": 6.074074074074074e-05,
"loss": 0.2683,
"step": 340
},
{
"epoch": 4.67,
"learning_rate": 5.925925925925926e-05,
"loss": 0.2208,
"step": 350
},
{
"epoch": 4.8,
"learning_rate": 5.7777777777777776e-05,
"loss": 0.1729,
"step": 360
},
{
"epoch": 4.93,
"learning_rate": 5.62962962962963e-05,
"loss": 0.2024,
"step": 370
},
{
"epoch": 5.0,
"eval_accuracy": 0.765625,
"eval_loss": 0.7129976153373718,
"eval_runtime": 15.0955,
"eval_samples_per_second": 29.678,
"eval_steps_per_second": 3.71,
"step": 375
},
{
"epoch": 5.07,
"learning_rate": 5.4814814814814817e-05,
"loss": 0.2077,
"step": 380
},
{
"epoch": 5.2,
"learning_rate": 5.333333333333333e-05,
"loss": 0.17,
"step": 390
},
{
"epoch": 5.33,
"learning_rate": 5.185185185185185e-05,
"loss": 0.1838,
"step": 400
},
{
"epoch": 5.47,
"learning_rate": 5.0370370370370366e-05,
"loss": 0.2272,
"step": 410
},
{
"epoch": 5.6,
"learning_rate": 4.888888888888889e-05,
"loss": 0.185,
"step": 420
},
{
"epoch": 5.73,
"learning_rate": 4.740740740740741e-05,
"loss": 0.1832,
"step": 430
},
{
"epoch": 5.87,
"learning_rate": 4.592592592592593e-05,
"loss": 0.1449,
"step": 440
},
{
"epoch": 6.0,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.169,
"step": 450
},
{
"epoch": 6.0,
"eval_accuracy": 0.7790178571428571,
"eval_loss": 0.756848156452179,
"eval_runtime": 14.9826,
"eval_samples_per_second": 29.901,
"eval_steps_per_second": 3.738,
"step": 450
},
{
"epoch": 6.13,
"learning_rate": 4.296296296296296e-05,
"loss": 0.1311,
"step": 460
},
{
"epoch": 6.27,
"learning_rate": 4.148148148148148e-05,
"loss": 0.1196,
"step": 470
},
{
"epoch": 6.4,
"learning_rate": 4e-05,
"loss": 0.12,
"step": 480
},
{
"epoch": 6.53,
"learning_rate": 3.851851851851852e-05,
"loss": 0.1471,
"step": 490
},
{
"epoch": 6.67,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.1104,
"step": 500
},
{
"epoch": 6.8,
"learning_rate": 3.555555555555556e-05,
"loss": 0.1529,
"step": 510
},
{
"epoch": 6.93,
"learning_rate": 3.4074074074074077e-05,
"loss": 0.1375,
"step": 520
},
{
"epoch": 7.0,
"eval_accuracy": 0.7723214285714286,
"eval_loss": 0.8526908159255981,
"eval_runtime": 15.1218,
"eval_samples_per_second": 29.626,
"eval_steps_per_second": 3.703,
"step": 525
},
{
"epoch": 7.07,
"learning_rate": 3.25925925925926e-05,
"loss": 0.1935,
"step": 530
},
{
"epoch": 7.2,
"learning_rate": 3.111111111111111e-05,
"loss": 0.131,
"step": 540
},
{
"epoch": 7.33,
"learning_rate": 2.962962962962963e-05,
"loss": 0.0954,
"step": 550
},
{
"epoch": 7.47,
"learning_rate": 2.814814814814815e-05,
"loss": 0.0971,
"step": 560
},
{
"epoch": 7.6,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.1179,
"step": 570
},
{
"epoch": 7.73,
"learning_rate": 2.5185185185185183e-05,
"loss": 0.1074,
"step": 580
},
{
"epoch": 7.87,
"learning_rate": 2.3703703703703707e-05,
"loss": 0.1349,
"step": 590
},
{
"epoch": 8.0,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.0893,
"step": 600
},
{
"epoch": 8.0,
"eval_accuracy": 0.7834821428571429,
"eval_loss": 0.9136894941329956,
"eval_runtime": 14.9574,
"eval_samples_per_second": 29.952,
"eval_steps_per_second": 3.744,
"step": 600
},
{
"epoch": 8.13,
"learning_rate": 2.074074074074074e-05,
"loss": 0.0898,
"step": 610
},
{
"epoch": 8.27,
"learning_rate": 1.925925925925926e-05,
"loss": 0.1095,
"step": 620
},
{
"epoch": 8.4,
"learning_rate": 1.777777777777778e-05,
"loss": 0.0696,
"step": 630
},
{
"epoch": 8.53,
"learning_rate": 1.62962962962963e-05,
"loss": 0.1215,
"step": 640
},
{
"epoch": 8.67,
"learning_rate": 1.4814814814814815e-05,
"loss": 0.1103,
"step": 650
},
{
"epoch": 8.8,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.1112,
"step": 660
},
{
"epoch": 8.93,
"learning_rate": 1.1851851851851853e-05,
"loss": 0.0804,
"step": 670
},
{
"epoch": 9.0,
"eval_accuracy": 0.7834821428571429,
"eval_loss": 0.9393314719200134,
"eval_runtime": 15.2041,
"eval_samples_per_second": 29.466,
"eval_steps_per_second": 3.683,
"step": 675
},
{
"epoch": 9.07,
"learning_rate": 1.037037037037037e-05,
"loss": 0.1229,
"step": 680
},
{
"epoch": 9.2,
"learning_rate": 8.88888888888889e-06,
"loss": 0.078,
"step": 690
},
{
"epoch": 9.33,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.068,
"step": 700
},
{
"epoch": 9.47,
"learning_rate": 5.925925925925927e-06,
"loss": 0.1024,
"step": 710
},
{
"epoch": 9.6,
"learning_rate": 4.444444444444445e-06,
"loss": 0.0794,
"step": 720
},
{
"epoch": 9.73,
"learning_rate": 2.9629629629629633e-06,
"loss": 0.0874,
"step": 730
},
{
"epoch": 9.87,
"learning_rate": 1.4814814814814817e-06,
"loss": 0.0562,
"step": 740
},
{
"epoch": 10.0,
"learning_rate": 0.0,
"loss": 0.11,
"step": 750
},
{
"epoch": 10.0,
"eval_accuracy": 0.7723214285714286,
"eval_loss": 0.9582878947257996,
"eval_runtime": 15.0752,
"eval_samples_per_second": 29.718,
"eval_steps_per_second": 3.715,
"step": 750
}
],
"max_steps": 750,
"num_train_epochs": 10,
"total_flos": 4.90627722230102e+18,
"trial_name": null,
"trial_params": null
}