test-finetune / checkpoint-1000 /trainer_state.json
Achmat Sodikkun
feat: add model finetuning pantun
1e78c18
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.2051282051282053,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 4.9903846153846154e-05,
"loss": 5.8218,
"step": 6
},
{
"epoch": 0.04,
"learning_rate": 4.980769230769231e-05,
"loss": 5.2499,
"step": 12
},
{
"epoch": 0.06,
"learning_rate": 4.9711538461538465e-05,
"loss": 5.1236,
"step": 18
},
{
"epoch": 0.08,
"learning_rate": 4.961538461538462e-05,
"loss": 4.8363,
"step": 24
},
{
"epoch": 0.1,
"learning_rate": 4.9519230769230776e-05,
"loss": 4.5805,
"step": 30
},
{
"epoch": 0.12,
"learning_rate": 4.942307692307693e-05,
"loss": 4.5276,
"step": 36
},
{
"epoch": 0.13,
"learning_rate": 4.932692307692308e-05,
"loss": 4.3871,
"step": 42
},
{
"epoch": 0.15,
"learning_rate": 4.923076923076924e-05,
"loss": 4.2706,
"step": 48
},
{
"epoch": 0.17,
"learning_rate": 4.9134615384615384e-05,
"loss": 4.0906,
"step": 54
},
{
"epoch": 0.19,
"learning_rate": 4.9038461538461536e-05,
"loss": 4.1704,
"step": 60
},
{
"epoch": 0.21,
"learning_rate": 4.8942307692307695e-05,
"loss": 3.9014,
"step": 66
},
{
"epoch": 0.23,
"learning_rate": 4.884615384615385e-05,
"loss": 4.0338,
"step": 72
},
{
"epoch": 0.25,
"learning_rate": 4.875e-05,
"loss": 4.011,
"step": 78
},
{
"epoch": 0.27,
"learning_rate": 4.865384615384616e-05,
"loss": 3.8017,
"step": 84
},
{
"epoch": 0.29,
"learning_rate": 4.855769230769231e-05,
"loss": 3.819,
"step": 90
},
{
"epoch": 0.31,
"learning_rate": 4.846153846153846e-05,
"loss": 3.8157,
"step": 96
},
{
"epoch": 0.33,
"learning_rate": 4.836538461538462e-05,
"loss": 3.7675,
"step": 102
},
{
"epoch": 0.35,
"learning_rate": 4.826923076923077e-05,
"loss": 3.756,
"step": 108
},
{
"epoch": 0.37,
"learning_rate": 4.8173076923076925e-05,
"loss": 3.7439,
"step": 114
},
{
"epoch": 0.38,
"learning_rate": 4.8076923076923084e-05,
"loss": 3.761,
"step": 120
},
{
"epoch": 0.4,
"learning_rate": 4.7980769230769236e-05,
"loss": 3.7318,
"step": 126
},
{
"epoch": 0.42,
"learning_rate": 4.788461538461539e-05,
"loss": 3.745,
"step": 132
},
{
"epoch": 0.44,
"learning_rate": 4.778846153846154e-05,
"loss": 3.784,
"step": 138
},
{
"epoch": 0.46,
"learning_rate": 4.76923076923077e-05,
"loss": 3.6114,
"step": 144
},
{
"epoch": 0.48,
"learning_rate": 4.7596153846153844e-05,
"loss": 3.637,
"step": 150
},
{
"epoch": 0.5,
"learning_rate": 4.75e-05,
"loss": 3.5881,
"step": 156
},
{
"epoch": 0.52,
"learning_rate": 4.7403846153846155e-05,
"loss": 3.5336,
"step": 162
},
{
"epoch": 0.54,
"learning_rate": 4.730769230769231e-05,
"loss": 3.5313,
"step": 168
},
{
"epoch": 0.56,
"learning_rate": 4.7211538461538465e-05,
"loss": 3.6357,
"step": 174
},
{
"epoch": 0.58,
"learning_rate": 4.711538461538462e-05,
"loss": 3.5818,
"step": 180
},
{
"epoch": 0.6,
"learning_rate": 4.701923076923077e-05,
"loss": 3.5017,
"step": 186
},
{
"epoch": 0.62,
"learning_rate": 4.692307692307693e-05,
"loss": 3.5815,
"step": 192
},
{
"epoch": 0.63,
"learning_rate": 4.682692307692308e-05,
"loss": 3.611,
"step": 198
},
{
"epoch": 0.65,
"learning_rate": 4.673076923076923e-05,
"loss": 3.6096,
"step": 204
},
{
"epoch": 0.67,
"learning_rate": 4.6634615384615384e-05,
"loss": 3.5436,
"step": 210
},
{
"epoch": 0.69,
"learning_rate": 4.653846153846154e-05,
"loss": 3.5258,
"step": 216
},
{
"epoch": 0.71,
"learning_rate": 4.6442307692307695e-05,
"loss": 3.5277,
"step": 222
},
{
"epoch": 0.73,
"learning_rate": 4.634615384615385e-05,
"loss": 3.505,
"step": 228
},
{
"epoch": 0.75,
"learning_rate": 4.6250000000000006e-05,
"loss": 3.4665,
"step": 234
},
{
"epoch": 0.77,
"learning_rate": 4.615384615384616e-05,
"loss": 3.4245,
"step": 240
},
{
"epoch": 0.79,
"learning_rate": 4.605769230769231e-05,
"loss": 3.4547,
"step": 246
},
{
"epoch": 0.81,
"learning_rate": 4.596153846153846e-05,
"loss": 3.4382,
"step": 252
},
{
"epoch": 0.83,
"learning_rate": 4.5865384615384614e-05,
"loss": 3.482,
"step": 258
},
{
"epoch": 0.85,
"learning_rate": 4.576923076923077e-05,
"loss": 3.4795,
"step": 264
},
{
"epoch": 0.87,
"learning_rate": 4.5673076923076925e-05,
"loss": 3.5018,
"step": 270
},
{
"epoch": 0.88,
"learning_rate": 4.557692307692308e-05,
"loss": 3.448,
"step": 276
},
{
"epoch": 0.9,
"learning_rate": 4.548076923076923e-05,
"loss": 3.4206,
"step": 282
},
{
"epoch": 0.92,
"learning_rate": 4.538461538461539e-05,
"loss": 3.528,
"step": 288
},
{
"epoch": 0.94,
"learning_rate": 4.528846153846154e-05,
"loss": 3.475,
"step": 294
},
{
"epoch": 0.96,
"learning_rate": 4.519230769230769e-05,
"loss": 3.4142,
"step": 300
},
{
"epoch": 0.98,
"learning_rate": 4.509615384615385e-05,
"loss": 3.4156,
"step": 306
},
{
"epoch": 1.0,
"learning_rate": 4.5e-05,
"loss": 3.3274,
"step": 312
},
{
"epoch": 1.02,
"learning_rate": 4.4903846153846155e-05,
"loss": 3.3594,
"step": 318
},
{
"epoch": 1.04,
"learning_rate": 4.4807692307692314e-05,
"loss": 3.2517,
"step": 324
},
{
"epoch": 1.06,
"learning_rate": 4.4711538461538466e-05,
"loss": 3.2934,
"step": 330
},
{
"epoch": 1.08,
"learning_rate": 4.461538461538462e-05,
"loss": 3.2986,
"step": 336
},
{
"epoch": 1.1,
"learning_rate": 4.451923076923077e-05,
"loss": 3.2655,
"step": 342
},
{
"epoch": 1.12,
"learning_rate": 4.442307692307692e-05,
"loss": 3.4058,
"step": 348
},
{
"epoch": 1.13,
"learning_rate": 4.4326923076923074e-05,
"loss": 3.265,
"step": 354
},
{
"epoch": 1.15,
"learning_rate": 4.423076923076923e-05,
"loss": 3.3208,
"step": 360
},
{
"epoch": 1.17,
"learning_rate": 4.4134615384615385e-05,
"loss": 3.238,
"step": 366
},
{
"epoch": 1.19,
"learning_rate": 4.403846153846154e-05,
"loss": 3.2153,
"step": 372
},
{
"epoch": 1.21,
"learning_rate": 4.3942307692307695e-05,
"loss": 3.264,
"step": 378
},
{
"epoch": 1.23,
"learning_rate": 4.384615384615385e-05,
"loss": 3.2911,
"step": 384
},
{
"epoch": 1.25,
"learning_rate": 4.375e-05,
"loss": 3.3027,
"step": 390
},
{
"epoch": 1.27,
"learning_rate": 4.365384615384616e-05,
"loss": 3.2589,
"step": 396
},
{
"epoch": 1.29,
"learning_rate": 4.355769230769231e-05,
"loss": 3.3683,
"step": 402
},
{
"epoch": 1.31,
"learning_rate": 4.346153846153846e-05,
"loss": 3.2849,
"step": 408
},
{
"epoch": 1.33,
"learning_rate": 4.336538461538462e-05,
"loss": 3.2397,
"step": 414
},
{
"epoch": 1.35,
"learning_rate": 4.326923076923077e-05,
"loss": 3.2128,
"step": 420
},
{
"epoch": 1.37,
"learning_rate": 4.3173076923076925e-05,
"loss": 3.1944,
"step": 426
},
{
"epoch": 1.38,
"learning_rate": 4.3076923076923084e-05,
"loss": 3.1837,
"step": 432
},
{
"epoch": 1.4,
"learning_rate": 4.2980769230769236e-05,
"loss": 3.1793,
"step": 438
},
{
"epoch": 1.42,
"learning_rate": 4.288461538461538e-05,
"loss": 3.1447,
"step": 444
},
{
"epoch": 1.44,
"learning_rate": 4.278846153846154e-05,
"loss": 3.1028,
"step": 450
},
{
"epoch": 1.46,
"learning_rate": 4.269230769230769e-05,
"loss": 3.2471,
"step": 456
},
{
"epoch": 1.48,
"learning_rate": 4.2596153846153844e-05,
"loss": 3.1855,
"step": 462
},
{
"epoch": 1.5,
"learning_rate": 4.25e-05,
"loss": 3.1817,
"step": 468
},
{
"epoch": 1.52,
"learning_rate": 4.2403846153846155e-05,
"loss": 3.214,
"step": 474
},
{
"epoch": 1.54,
"learning_rate": 4.230769230769231e-05,
"loss": 2.997,
"step": 480
},
{
"epoch": 1.56,
"learning_rate": 4.2211538461538466e-05,
"loss": 3.2059,
"step": 486
},
{
"epoch": 1.58,
"learning_rate": 4.211538461538462e-05,
"loss": 3.1517,
"step": 492
},
{
"epoch": 1.6,
"learning_rate": 4.201923076923077e-05,
"loss": 3.2236,
"step": 498
},
{
"epoch": 1.62,
"learning_rate": 4.192307692307693e-05,
"loss": 3.1939,
"step": 504
},
{
"epoch": 1.63,
"learning_rate": 4.182692307692308e-05,
"loss": 3.1143,
"step": 510
},
{
"epoch": 1.65,
"learning_rate": 4.173076923076923e-05,
"loss": 3.2261,
"step": 516
},
{
"epoch": 1.67,
"learning_rate": 4.163461538461539e-05,
"loss": 3.2095,
"step": 522
},
{
"epoch": 1.69,
"learning_rate": 4.1538461538461544e-05,
"loss": 3.2498,
"step": 528
},
{
"epoch": 1.71,
"learning_rate": 4.1442307692307696e-05,
"loss": 3.1493,
"step": 534
},
{
"epoch": 1.73,
"learning_rate": 4.134615384615385e-05,
"loss": 3.0919,
"step": 540
},
{
"epoch": 1.75,
"learning_rate": 4.125e-05,
"loss": 3.2617,
"step": 546
},
{
"epoch": 1.77,
"learning_rate": 4.115384615384615e-05,
"loss": 3.1447,
"step": 552
},
{
"epoch": 1.79,
"learning_rate": 4.105769230769231e-05,
"loss": 3.2025,
"step": 558
},
{
"epoch": 1.81,
"learning_rate": 4.096153846153846e-05,
"loss": 3.1829,
"step": 564
},
{
"epoch": 1.83,
"learning_rate": 4.0865384615384615e-05,
"loss": 3.2216,
"step": 570
},
{
"epoch": 1.85,
"learning_rate": 4.0769230769230773e-05,
"loss": 3.1391,
"step": 576
},
{
"epoch": 1.87,
"learning_rate": 4.0673076923076926e-05,
"loss": 3.1344,
"step": 582
},
{
"epoch": 1.88,
"learning_rate": 4.057692307692308e-05,
"loss": 3.1699,
"step": 588
},
{
"epoch": 1.9,
"learning_rate": 4.0480769230769236e-05,
"loss": 3.0138,
"step": 594
},
{
"epoch": 1.92,
"learning_rate": 4.038461538461539e-05,
"loss": 3.1157,
"step": 600
},
{
"epoch": 1.94,
"learning_rate": 4.028846153846154e-05,
"loss": 3.1173,
"step": 606
},
{
"epoch": 1.96,
"learning_rate": 4.019230769230769e-05,
"loss": 3.2019,
"step": 612
},
{
"epoch": 1.98,
"learning_rate": 4.009615384615385e-05,
"loss": 3.0801,
"step": 618
},
{
"epoch": 2.0,
"learning_rate": 4e-05,
"loss": 3.0888,
"step": 624
},
{
"epoch": 2.02,
"learning_rate": 3.9903846153846155e-05,
"loss": 3.0332,
"step": 630
},
{
"epoch": 2.04,
"learning_rate": 3.980769230769231e-05,
"loss": 2.9418,
"step": 636
},
{
"epoch": 2.06,
"learning_rate": 3.971153846153846e-05,
"loss": 2.933,
"step": 642
},
{
"epoch": 2.08,
"learning_rate": 3.961538461538462e-05,
"loss": 3.0467,
"step": 648
},
{
"epoch": 2.1,
"learning_rate": 3.951923076923077e-05,
"loss": 3.0991,
"step": 654
},
{
"epoch": 2.12,
"learning_rate": 3.942307692307692e-05,
"loss": 2.913,
"step": 660
},
{
"epoch": 2.13,
"learning_rate": 3.932692307692308e-05,
"loss": 3.0531,
"step": 666
},
{
"epoch": 2.15,
"learning_rate": 3.923076923076923e-05,
"loss": 2.9838,
"step": 672
},
{
"epoch": 2.17,
"learning_rate": 3.9134615384615385e-05,
"loss": 2.9406,
"step": 678
},
{
"epoch": 2.19,
"learning_rate": 3.903846153846154e-05,
"loss": 3.046,
"step": 684
},
{
"epoch": 2.21,
"learning_rate": 3.8942307692307696e-05,
"loss": 3.006,
"step": 690
},
{
"epoch": 2.23,
"learning_rate": 3.884615384615385e-05,
"loss": 2.9774,
"step": 696
},
{
"epoch": 2.25,
"learning_rate": 3.875e-05,
"loss": 2.9937,
"step": 702
},
{
"epoch": 2.27,
"learning_rate": 3.865384615384616e-05,
"loss": 2.9737,
"step": 708
},
{
"epoch": 2.29,
"learning_rate": 3.855769230769231e-05,
"loss": 2.9772,
"step": 714
},
{
"epoch": 2.31,
"learning_rate": 3.846153846153846e-05,
"loss": 2.8405,
"step": 720
},
{
"epoch": 2.33,
"learning_rate": 3.836538461538462e-05,
"loss": 3.0856,
"step": 726
},
{
"epoch": 2.35,
"learning_rate": 3.826923076923077e-05,
"loss": 2.9947,
"step": 732
},
{
"epoch": 2.37,
"learning_rate": 3.8173076923076926e-05,
"loss": 2.9251,
"step": 738
},
{
"epoch": 2.38,
"learning_rate": 3.807692307692308e-05,
"loss": 2.9613,
"step": 744
},
{
"epoch": 2.4,
"learning_rate": 3.798076923076923e-05,
"loss": 2.9402,
"step": 750
},
{
"epoch": 2.42,
"learning_rate": 3.788461538461538e-05,
"loss": 2.8823,
"step": 756
},
{
"epoch": 2.44,
"learning_rate": 3.778846153846154e-05,
"loss": 2.8906,
"step": 762
},
{
"epoch": 2.46,
"learning_rate": 3.769230769230769e-05,
"loss": 2.9475,
"step": 768
},
{
"epoch": 2.48,
"learning_rate": 3.7596153846153845e-05,
"loss": 2.9999,
"step": 774
},
{
"epoch": 2.5,
"learning_rate": 3.7500000000000003e-05,
"loss": 2.9503,
"step": 780
},
{
"epoch": 2.52,
"learning_rate": 3.7403846153846156e-05,
"loss": 3.0112,
"step": 786
},
{
"epoch": 2.54,
"learning_rate": 3.730769230769231e-05,
"loss": 3.0096,
"step": 792
},
{
"epoch": 2.56,
"learning_rate": 3.7211538461538466e-05,
"loss": 2.9804,
"step": 798
},
{
"epoch": 2.58,
"learning_rate": 3.711538461538462e-05,
"loss": 2.9203,
"step": 804
},
{
"epoch": 2.6,
"learning_rate": 3.701923076923077e-05,
"loss": 2.939,
"step": 810
},
{
"epoch": 2.62,
"learning_rate": 3.692307692307693e-05,
"loss": 2.8455,
"step": 816
},
{
"epoch": 2.63,
"learning_rate": 3.682692307692308e-05,
"loss": 2.9651,
"step": 822
},
{
"epoch": 2.65,
"learning_rate": 3.673076923076923e-05,
"loss": 2.9528,
"step": 828
},
{
"epoch": 2.67,
"learning_rate": 3.6634615384615385e-05,
"loss": 2.8042,
"step": 834
},
{
"epoch": 2.69,
"learning_rate": 3.653846153846154e-05,
"loss": 2.8311,
"step": 840
},
{
"epoch": 2.71,
"learning_rate": 3.644230769230769e-05,
"loss": 2.8888,
"step": 846
},
{
"epoch": 2.73,
"learning_rate": 3.634615384615385e-05,
"loss": 2.9151,
"step": 852
},
{
"epoch": 2.75,
"learning_rate": 3.625e-05,
"loss": 2.9463,
"step": 858
},
{
"epoch": 2.77,
"learning_rate": 3.615384615384615e-05,
"loss": 2.957,
"step": 864
},
{
"epoch": 2.79,
"learning_rate": 3.605769230769231e-05,
"loss": 2.9473,
"step": 870
},
{
"epoch": 2.81,
"learning_rate": 3.596153846153846e-05,
"loss": 2.9994,
"step": 876
},
{
"epoch": 2.83,
"learning_rate": 3.5865384615384615e-05,
"loss": 3.0486,
"step": 882
},
{
"epoch": 2.85,
"learning_rate": 3.5769230769230774e-05,
"loss": 2.9487,
"step": 888
},
{
"epoch": 2.87,
"learning_rate": 3.5673076923076926e-05,
"loss": 3.0173,
"step": 894
},
{
"epoch": 2.88,
"learning_rate": 3.557692307692308e-05,
"loss": 2.8656,
"step": 900
},
{
"epoch": 2.9,
"learning_rate": 3.548076923076924e-05,
"loss": 2.8834,
"step": 906
},
{
"epoch": 2.92,
"learning_rate": 3.538461538461539e-05,
"loss": 2.9829,
"step": 912
},
{
"epoch": 2.94,
"learning_rate": 3.528846153846154e-05,
"loss": 3.001,
"step": 918
},
{
"epoch": 2.96,
"learning_rate": 3.51923076923077e-05,
"loss": 2.9618,
"step": 924
},
{
"epoch": 2.98,
"learning_rate": 3.5096153846153845e-05,
"loss": 2.7964,
"step": 930
},
{
"epoch": 3.0,
"learning_rate": 3.5e-05,
"loss": 2.89,
"step": 936
},
{
"epoch": 3.02,
"learning_rate": 3.4903846153846156e-05,
"loss": 2.7191,
"step": 942
},
{
"epoch": 3.04,
"learning_rate": 3.480769230769231e-05,
"loss": 2.7875,
"step": 948
},
{
"epoch": 3.06,
"learning_rate": 3.471153846153846e-05,
"loss": 2.8498,
"step": 954
},
{
"epoch": 3.08,
"learning_rate": 3.461538461538462e-05,
"loss": 2.7858,
"step": 960
},
{
"epoch": 3.1,
"learning_rate": 3.451923076923077e-05,
"loss": 2.7435,
"step": 966
},
{
"epoch": 3.12,
"learning_rate": 3.442307692307692e-05,
"loss": 2.8449,
"step": 972
},
{
"epoch": 3.13,
"learning_rate": 3.432692307692308e-05,
"loss": 2.7817,
"step": 978
},
{
"epoch": 3.15,
"learning_rate": 3.4230769230769234e-05,
"loss": 2.8797,
"step": 984
},
{
"epoch": 3.17,
"learning_rate": 3.4134615384615386e-05,
"loss": 2.7719,
"step": 990
},
{
"epoch": 3.19,
"learning_rate": 3.4038461538461544e-05,
"loss": 2.7919,
"step": 996
}
],
"logging_steps": 6,
"max_steps": 3120,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 402675517440000.0,
"trial_name": null,
"trial_params": null
}