ARIA-7b-v0.1 / trainer_state.json
shadyy's picture
Upload 13 files
894704c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 68.0,
"eval_steps": 500,
"global_step": 3094,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.79,
"learning_rate": 1.5555555555555555e-05,
"loss": 2.3054,
"step": 36
},
{
"epoch": 1.58,
"learning_rate": 3.155555555555556e-05,
"loss": 2.1949,
"step": 72
},
{
"epoch": 2.37,
"learning_rate": 4.755555555555556e-05,
"loss": 1.978,
"step": 108
},
{
"epoch": 3.16,
"learning_rate": 6.355555555555556e-05,
"loss": 1.9161,
"step": 144
},
{
"epoch": 3.96,
"learning_rate": 7.955555555555556e-05,
"loss": 1.8945,
"step": 180
},
{
"epoch": 4.75,
"learning_rate": 9.555555555555557e-05,
"loss": 1.8682,
"step": 216
},
{
"epoch": 5.54,
"learning_rate": 0.00011155555555555556,
"loss": 1.8492,
"step": 252
},
{
"epoch": 6.33,
"learning_rate": 0.00012755555555555556,
"loss": 1.8337,
"step": 288
},
{
"epoch": 7.12,
"learning_rate": 0.0001431111111111111,
"loss": 1.8144,
"step": 324
},
{
"epoch": 7.91,
"learning_rate": 0.00015911111111111112,
"loss": 1.7907,
"step": 360
},
{
"epoch": 8.7,
"learning_rate": 0.00017511111111111113,
"loss": 1.7382,
"step": 396
},
{
"epoch": 9.49,
"learning_rate": 0.00019111111111111114,
"loss": 1.6991,
"step": 432
},
{
"epoch": 10.29,
"learning_rate": 0.00019925925925925927,
"loss": 1.6405,
"step": 468
},
{
"epoch": 11.08,
"learning_rate": 0.00019748148148148148,
"loss": 1.5929,
"step": 504
},
{
"epoch": 11.87,
"learning_rate": 0.0001957037037037037,
"loss": 1.5414,
"step": 540
},
{
"epoch": 12.66,
"learning_rate": 0.00019392592592592592,
"loss": 1.447,
"step": 576
},
{
"epoch": 13.45,
"learning_rate": 0.00019214814814814816,
"loss": 1.3947,
"step": 612
},
{
"epoch": 14.24,
"learning_rate": 0.00019037037037037037,
"loss": 1.3655,
"step": 648
},
{
"epoch": 15.03,
"learning_rate": 0.0001885925925925926,
"loss": 1.2873,
"step": 684
},
{
"epoch": 15.82,
"learning_rate": 0.0001868148148148148,
"loss": 1.2198,
"step": 720
},
{
"epoch": 16.62,
"learning_rate": 0.00018503703703703705,
"loss": 1.1512,
"step": 756
},
{
"epoch": 17.41,
"learning_rate": 0.00018325925925925926,
"loss": 1.1286,
"step": 792
},
{
"epoch": 18.2,
"learning_rate": 0.0001814814814814815,
"loss": 1.0709,
"step": 828
},
{
"epoch": 18.99,
"learning_rate": 0.0001797037037037037,
"loss": 1.0442,
"step": 864
},
{
"epoch": 19.78,
"learning_rate": 0.00017792592592592594,
"loss": 0.971,
"step": 900
},
{
"epoch": 20.57,
"learning_rate": 0.00017614814814814815,
"loss": 0.9412,
"step": 936
},
{
"epoch": 21.36,
"learning_rate": 0.00017437037037037039,
"loss": 0.9084,
"step": 972
},
{
"epoch": 22.15,
"learning_rate": 0.0001725925925925926,
"loss": 0.885,
"step": 1008
},
{
"epoch": 22.95,
"learning_rate": 0.00017081481481481483,
"loss": 0.844,
"step": 1044
},
{
"epoch": 23.74,
"learning_rate": 0.00016903703703703704,
"loss": 0.7975,
"step": 1080
},
{
"epoch": 24.53,
"learning_rate": 0.00016725925925925928,
"loss": 0.7786,
"step": 1116
},
{
"epoch": 25.32,
"learning_rate": 0.00016548148148148149,
"loss": 0.7465,
"step": 1152
},
{
"epoch": 26.11,
"learning_rate": 0.00016370370370370372,
"loss": 0.7311,
"step": 1188
},
{
"epoch": 26.9,
"learning_rate": 0.00016192592592592593,
"loss": 0.6955,
"step": 1224
},
{
"epoch": 27.69,
"learning_rate": 0.00016014814814814817,
"loss": 0.6656,
"step": 1260
},
{
"epoch": 28.48,
"learning_rate": 0.00015837037037037038,
"loss": 0.6507,
"step": 1296
},
{
"epoch": 29.27,
"learning_rate": 0.0001565925925925926,
"loss": 0.6336,
"step": 1332
},
{
"epoch": 30.07,
"learning_rate": 0.00015481481481481482,
"loss": 0.6142,
"step": 1368
},
{
"epoch": 30.86,
"learning_rate": 0.00015303703703703706,
"loss": 0.5865,
"step": 1404
},
{
"epoch": 31.65,
"learning_rate": 0.00015125925925925927,
"loss": 0.5649,
"step": 1440
},
{
"epoch": 32.44,
"learning_rate": 0.00014948148148148148,
"loss": 0.5511,
"step": 1476
},
{
"epoch": 33.23,
"learning_rate": 0.0001477037037037037,
"loss": 0.5329,
"step": 1512
},
{
"epoch": 34.02,
"learning_rate": 0.00014592592592592592,
"loss": 0.5265,
"step": 1548
},
{
"epoch": 34.81,
"learning_rate": 0.00014414814814814816,
"loss": 0.4931,
"step": 1584
},
{
"epoch": 35.6,
"learning_rate": 0.00014237037037037037,
"loss": 0.4904,
"step": 1620
},
{
"epoch": 36.4,
"learning_rate": 0.0001405925925925926,
"loss": 0.466,
"step": 1656
},
{
"epoch": 37.19,
"learning_rate": 0.0001388148148148148,
"loss": 0.4585,
"step": 1692
},
{
"epoch": 37.98,
"learning_rate": 0.00013703703703703705,
"loss": 0.4545,
"step": 1728
},
{
"epoch": 38.77,
"learning_rate": 0.00013525925925925926,
"loss": 0.4244,
"step": 1764
},
{
"epoch": 39.56,
"learning_rate": 0.0001334814814814815,
"loss": 0.4143,
"step": 1800
},
{
"epoch": 40.35,
"learning_rate": 0.0001317037037037037,
"loss": 0.4118,
"step": 1836
},
{
"epoch": 41.14,
"learning_rate": 0.00012992592592592594,
"loss": 0.3963,
"step": 1872
},
{
"epoch": 41.93,
"learning_rate": 0.00012814814814814815,
"loss": 0.3901,
"step": 1908
},
{
"epoch": 42.73,
"learning_rate": 0.00012637037037037038,
"loss": 0.3697,
"step": 1944
},
{
"epoch": 43.52,
"learning_rate": 0.0001245925925925926,
"loss": 0.3595,
"step": 1980
},
{
"epoch": 44.31,
"learning_rate": 0.00012281481481481483,
"loss": 0.3609,
"step": 2016
},
{
"epoch": 45.1,
"learning_rate": 0.00012103703703703704,
"loss": 0.3457,
"step": 2052
},
{
"epoch": 45.89,
"learning_rate": 0.00011925925925925927,
"loss": 0.3342,
"step": 2088
},
{
"epoch": 46.68,
"learning_rate": 0.00011748148148148148,
"loss": 0.3213,
"step": 2124
},
{
"epoch": 47.47,
"learning_rate": 0.00011570370370370372,
"loss": 0.3183,
"step": 2160
},
{
"epoch": 48.26,
"learning_rate": 0.00011392592592592593,
"loss": 0.3067,
"step": 2196
},
{
"epoch": 49.05,
"learning_rate": 0.00011214814814814815,
"loss": 0.3062,
"step": 2232
},
{
"epoch": 49.85,
"learning_rate": 0.00011037037037037037,
"loss": 0.291,
"step": 2268
},
{
"epoch": 50.64,
"learning_rate": 0.0001085925925925926,
"loss": 0.2837,
"step": 2304
},
{
"epoch": 51.43,
"learning_rate": 0.0001068148148148148,
"loss": 0.2768,
"step": 2340
},
{
"epoch": 52.22,
"learning_rate": 0.00010503703703703704,
"loss": 0.2733,
"step": 2376
},
{
"epoch": 53.01,
"learning_rate": 0.00010325925925925925,
"loss": 0.2622,
"step": 2412
},
{
"epoch": 53.8,
"learning_rate": 0.00010148148148148149,
"loss": 0.254,
"step": 2448
},
{
"epoch": 54.59,
"learning_rate": 9.970370370370371e-05,
"loss": 0.247,
"step": 2484
},
{
"epoch": 55.38,
"learning_rate": 9.792592592592593e-05,
"loss": 0.2415,
"step": 2520
},
{
"epoch": 56.18,
"learning_rate": 9.614814814814816e-05,
"loss": 0.2391,
"step": 2556
},
{
"epoch": 56.97,
"learning_rate": 9.437037037037038e-05,
"loss": 0.2318,
"step": 2592
},
{
"epoch": 57.76,
"learning_rate": 9.25925925925926e-05,
"loss": 0.221,
"step": 2628
},
{
"epoch": 58.55,
"learning_rate": 9.081481481481482e-05,
"loss": 0.2179,
"step": 2664
},
{
"epoch": 59.34,
"learning_rate": 8.903703703703705e-05,
"loss": 0.2145,
"step": 2700
},
{
"epoch": 60.13,
"learning_rate": 8.725925925925927e-05,
"loss": 0.209,
"step": 2736
},
{
"epoch": 60.92,
"learning_rate": 8.548148148148148e-05,
"loss": 0.205,
"step": 2772
},
{
"epoch": 61.71,
"learning_rate": 8.37037037037037e-05,
"loss": 0.197,
"step": 2808
},
{
"epoch": 62.51,
"learning_rate": 8.192592592592592e-05,
"loss": 0.1948,
"step": 2844
},
{
"epoch": 63.3,
"learning_rate": 8.014814814814815e-05,
"loss": 0.188,
"step": 2880
},
{
"epoch": 64.09,
"learning_rate": 7.837037037037037e-05,
"loss": 0.188,
"step": 2916
},
{
"epoch": 64.88,
"learning_rate": 7.659259259259259e-05,
"loss": 0.1785,
"step": 2952
},
{
"epoch": 65.67,
"learning_rate": 7.481481481481481e-05,
"loss": 0.1758,
"step": 2988
},
{
"epoch": 66.46,
"learning_rate": 7.303703703703704e-05,
"loss": 0.1703,
"step": 3024
},
{
"epoch": 67.25,
"learning_rate": 7.125925925925926e-05,
"loss": 0.1702,
"step": 3060
}
],
"logging_steps": 36,
"max_steps": 4500,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 2.6941037663276237e+18,
"trial_name": null,
"trial_params": null
}