snu-nia-12's picture
Upload 9 files
025ea45
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.4851953222194576,
"global_step": 3900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.6399999999999995e-05,
"loss": 12.4264,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 5.6399999999999995e-05,
"loss": 7.1545,
"step": 100
},
{
"epoch": 0.01,
"eval_cer": 0.990295846526883,
"eval_loss": 4.435502529144287,
"eval_runtime": 2212.4333,
"eval_samples_per_second": 21.798,
"eval_steps_per_second": 1.363,
"eval_wer": 0.9803344576185385,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 8.639999999999999e-05,
"loss": 3.2621,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 0.0001164,
"loss": 2.9431,
"step": 200
},
{
"epoch": 0.02,
"eval_cer": 0.990295846526883,
"eval_loss": 3.032039165496826,
"eval_runtime": 2093.0103,
"eval_samples_per_second": 23.041,
"eval_steps_per_second": 1.441,
"eval_wer": 0.9803344576185385,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 0.00014639999999999998,
"loss": 2.9236,
"step": 250
},
{
"epoch": 0.04,
"learning_rate": 0.00017639999999999998,
"loss": 2.9188,
"step": 300
},
{
"epoch": 0.04,
"eval_cer": 0.990295846526883,
"eval_loss": 2.976602077484131,
"eval_runtime": 2082.4254,
"eval_samples_per_second": 23.159,
"eval_steps_per_second": 1.448,
"eval_wer": 0.9803344576185385,
"step": 300
},
{
"epoch": 0.04,
"learning_rate": 0.00020639999999999998,
"loss": 2.8775,
"step": 350
},
{
"epoch": 0.05,
"learning_rate": 0.0002364,
"loss": 2.7809,
"step": 400
},
{
"epoch": 0.05,
"eval_cer": 0.9696764691640024,
"eval_loss": 2.7085533142089844,
"eval_runtime": 2082.5137,
"eval_samples_per_second": 23.158,
"eval_steps_per_second": 1.448,
"eval_wer": 0.9589848218162464,
"step": 400
},
{
"epoch": 0.06,
"learning_rate": 0.00026639999999999997,
"loss": 2.5919,
"step": 450
},
{
"epoch": 0.06,
"learning_rate": 0.0002964,
"loss": 2.3631,
"step": 500
},
{
"epoch": 0.06,
"eval_cer": 0.862518823322377,
"eval_loss": 1.9892584085464478,
"eval_runtime": 2074.1857,
"eval_samples_per_second": 23.251,
"eval_steps_per_second": 1.454,
"eval_wer": 0.9145354269893812,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 0.0002994410095705937,
"loss": 1.5803,
"step": 550
},
{
"epoch": 0.07,
"learning_rate": 0.0002988057931735411,
"loss": 0.9821,
"step": 600
},
{
"epoch": 0.07,
"eval_cer": 0.1523255882620419,
"eval_loss": 0.5998358726501465,
"eval_runtime": 2075.5254,
"eval_samples_per_second": 23.236,
"eval_steps_per_second": 1.453,
"eval_wer": 0.3576434163952664,
"step": 600
},
{
"epoch": 0.08,
"learning_rate": 0.0002981705767764885,
"loss": 0.7758,
"step": 650
},
{
"epoch": 0.09,
"learning_rate": 0.0002975353603794359,
"loss": 0.6585,
"step": 700
},
{
"epoch": 0.09,
"eval_cer": 0.10982663055738294,
"eval_loss": 0.4177716076374054,
"eval_runtime": 2075.6462,
"eval_samples_per_second": 23.234,
"eval_steps_per_second": 1.453,
"eval_wer": 0.25202041187922813,
"step": 700
},
{
"epoch": 0.09,
"learning_rate": 0.0002969001439823833,
"loss": 0.6077,
"step": 750
},
{
"epoch": 0.1,
"learning_rate": 0.0002962649275853307,
"loss": 0.5657,
"step": 800
},
{
"epoch": 0.1,
"eval_cer": 0.09587676230587619,
"eval_loss": 0.36425185203552246,
"eval_runtime": 2094.5001,
"eval_samples_per_second": 23.025,
"eval_steps_per_second": 1.439,
"eval_wer": 0.21289942193910186,
"step": 800
},
{
"epoch": 0.11,
"learning_rate": 0.00029562971118827813,
"loss": 0.5295,
"step": 850
},
{
"epoch": 0.11,
"learning_rate": 0.0002949944947912255,
"loss": 0.4998,
"step": 900
},
{
"epoch": 0.11,
"eval_cer": 0.09022533657576749,
"eval_loss": 0.3022037148475647,
"eval_runtime": 2108.0878,
"eval_samples_per_second": 22.877,
"eval_steps_per_second": 1.43,
"eval_wer": 0.20626376037718358,
"step": 900
},
{
"epoch": 0.12,
"learning_rate": 0.00029435927839417295,
"loss": 0.4887,
"step": 950
},
{
"epoch": 0.12,
"learning_rate": 0.0002937240619971203,
"loss": 0.4902,
"step": 1000
},
{
"epoch": 0.12,
"eval_cer": 0.0833139440810026,
"eval_loss": 0.2917855978012085,
"eval_runtime": 2122.6451,
"eval_samples_per_second": 22.72,
"eval_steps_per_second": 1.42,
"eval_wer": 0.1899083786482299,
"step": 1000
},
{
"epoch": 0.13,
"learning_rate": 0.0002930888456000677,
"loss": 0.4776,
"step": 1050
},
{
"epoch": 0.14,
"learning_rate": 0.00029245362920301513,
"loss": 0.468,
"step": 1100
},
{
"epoch": 0.14,
"eval_cer": 0.08389881054318646,
"eval_loss": 0.2726401686668396,
"eval_runtime": 2129.6342,
"eval_samples_per_second": 22.645,
"eval_steps_per_second": 1.416,
"eval_wer": 0.1888892520157431,
"step": 1100
},
{
"epoch": 0.14,
"learning_rate": 0.00029181841280596254,
"loss": 0.44,
"step": 1150
},
{
"epoch": 0.15,
"learning_rate": 0.00029118319640890995,
"loss": 0.4332,
"step": 1200
},
{
"epoch": 0.15,
"eval_cer": 0.07579453612837772,
"eval_loss": 0.2764703929424286,
"eval_runtime": 2387.2613,
"eval_samples_per_second": 20.201,
"eval_steps_per_second": 1.263,
"eval_wer": 0.1726946095004212,
"step": 1200
},
{
"epoch": 0.16,
"learning_rate": 0.00029054798001185736,
"loss": 0.4363,
"step": 1250
},
{
"epoch": 0.16,
"learning_rate": 0.00028991276361480477,
"loss": 0.4178,
"step": 1300
},
{
"epoch": 0.16,
"eval_cer": 0.07956404299571626,
"eval_loss": 0.2525430917739868,
"eval_runtime": 2685.8951,
"eval_samples_per_second": 17.955,
"eval_steps_per_second": 1.123,
"eval_wer": 0.18290121608219176,
"step": 1300
},
{
"epoch": 0.17,
"learning_rate": 0.0002892775472177521,
"loss": 0.4072,
"step": 1350
},
{
"epoch": 0.17,
"learning_rate": 0.0002886423308206996,
"loss": 0.4171,
"step": 1400
},
{
"epoch": 0.17,
"eval_cer": 0.07121410576965095,
"eval_loss": 0.24207398295402527,
"eval_runtime": 2387.6421,
"eval_samples_per_second": 20.198,
"eval_steps_per_second": 1.263,
"eval_wer": 0.16114939876197457,
"step": 1400
},
{
"epoch": 0.18,
"learning_rate": 0.00028800711442364694,
"loss": 0.4074,
"step": 1450
},
{
"epoch": 0.19,
"learning_rate": 0.00028737189802659435,
"loss": 0.4049,
"step": 1500
},
{
"epoch": 0.19,
"eval_cer": 0.07119875184394742,
"eval_loss": 0.23904423415660858,
"eval_runtime": 2682.696,
"eval_samples_per_second": 17.977,
"eval_steps_per_second": 1.124,
"eval_wer": 0.1614502011908041,
"step": 1500
},
{
"epoch": 0.19,
"learning_rate": 0.00028673668162954176,
"loss": 0.3901,
"step": 1550
},
{
"epoch": 0.2,
"learning_rate": 0.0002861014652324892,
"loss": 0.3963,
"step": 1600
},
{
"epoch": 0.2,
"eval_cer": 0.07037200199837249,
"eval_loss": 0.22700898349285126,
"eval_runtime": 2633.1938,
"eval_samples_per_second": 18.315,
"eval_steps_per_second": 1.145,
"eval_wer": 0.16208515443303412,
"step": 1600
},
{
"epoch": 0.21,
"learning_rate": 0.0002854662488354366,
"loss": 0.3813,
"step": 1650
},
{
"epoch": 0.21,
"learning_rate": 0.000284831032438384,
"loss": 0.3924,
"step": 1700
},
{
"epoch": 0.21,
"eval_cer": 0.07119875184394742,
"eval_loss": 0.2278161644935608,
"eval_runtime": 2675.9933,
"eval_samples_per_second": 18.022,
"eval_steps_per_second": 1.127,
"eval_wer": 0.16218386565136397,
"step": 1700
},
{
"epoch": 0.22,
"learning_rate": 0.0002841958160413314,
"loss": 0.3938,
"step": 1750
},
{
"epoch": 0.22,
"learning_rate": 0.00028356059964427876,
"loss": 0.3942,
"step": 1800
},
{
"epoch": 0.22,
"eval_cer": 0.06874165130289871,
"eval_loss": 0.22094863653182983,
"eval_runtime": 2685.0595,
"eval_samples_per_second": 17.961,
"eval_steps_per_second": 1.123,
"eval_wer": 0.1562058342331839,
"step": 1800
},
{
"epoch": 0.23,
"learning_rate": 0.0002829253832472262,
"loss": 0.3674,
"step": 1850
},
{
"epoch": 0.24,
"learning_rate": 0.0002822901668501736,
"loss": 0.3745,
"step": 1900
},
{
"epoch": 0.24,
"eval_cer": 0.0659505438242377,
"eval_loss": 0.21558238565921783,
"eval_runtime": 2439.0418,
"eval_samples_per_second": 19.773,
"eval_steps_per_second": 1.236,
"eval_wer": 0.1506740042005625,
"step": 1900
},
{
"epoch": 0.24,
"learning_rate": 0.000281654950453121,
"loss": 0.3711,
"step": 1950
},
{
"epoch": 0.25,
"learning_rate": 0.0002810197340560684,
"loss": 0.3598,
"step": 2000
},
{
"epoch": 0.25,
"eval_cer": 0.06898353468628977,
"eval_loss": 0.221131831407547,
"eval_runtime": 2663.9985,
"eval_samples_per_second": 18.103,
"eval_steps_per_second": 1.132,
"eval_wer": 0.15953867178719996,
"step": 2000
},
{
"epoch": 0.26,
"learning_rate": 0.0002803845176590158,
"loss": 0.3756,
"step": 2050
},
{
"epoch": 0.26,
"learning_rate": 0.0002797493012619632,
"loss": 0.3653,
"step": 2100
},
{
"epoch": 0.26,
"eval_cer": 0.06659281034712863,
"eval_loss": 0.2138465791940689,
"eval_runtime": 2389.088,
"eval_samples_per_second": 20.186,
"eval_steps_per_second": 1.262,
"eval_wer": 0.1510541757846708,
"step": 2100
},
{
"epoch": 0.27,
"learning_rate": 0.00027911408486491063,
"loss": 0.3751,
"step": 2150
},
{
"epoch": 0.27,
"learning_rate": 0.00027847886846785804,
"loss": 0.3576,
"step": 2200
},
{
"epoch": 0.27,
"eval_cer": 0.06642438959287295,
"eval_loss": 0.2111993283033371,
"eval_runtime": 2676.0178,
"eval_samples_per_second": 18.022,
"eval_steps_per_second": 1.127,
"eval_wer": 0.1520546273217979,
"step": 2200
},
{
"epoch": 0.28,
"learning_rate": 0.00027784365207080545,
"loss": 0.353,
"step": 2250
},
{
"epoch": 0.29,
"learning_rate": 0.00027720843567375286,
"loss": 0.3569,
"step": 2300
},
{
"epoch": 0.29,
"eval_cer": 0.06564322909592542,
"eval_loss": 0.21856307983398438,
"eval_runtime": 2642.3302,
"eval_samples_per_second": 18.251,
"eval_steps_per_second": 1.141,
"eval_wer": 0.14776135629213985,
"step": 2300
},
{
"epoch": 0.29,
"learning_rate": 0.00027657321927670027,
"loss": 0.371,
"step": 2350
},
{
"epoch": 0.3,
"learning_rate": 0.0002759380028796476,
"loss": 0.3473,
"step": 2400
},
{
"epoch": 0.3,
"eval_cer": 0.0664718686554331,
"eval_loss": 0.2098075896501541,
"eval_runtime": 2685.1293,
"eval_samples_per_second": 17.96,
"eval_steps_per_second": 1.123,
"eval_wer": 0.15094546005096968,
"step": 2400
},
{
"epoch": 0.3,
"learning_rate": 0.0002753027864825951,
"loss": 0.3574,
"step": 2450
},
{
"epoch": 0.31,
"learning_rate": 0.00027466757008554244,
"loss": 0.3735,
"step": 2500
},
{
"epoch": 0.31,
"eval_cer": 0.06522796445920521,
"eval_loss": 0.20497867465019226,
"eval_runtime": 2686.3997,
"eval_samples_per_second": 17.952,
"eval_steps_per_second": 1.122,
"eval_wer": 0.1481148491685914,
"step": 2500
},
{
"epoch": 0.32,
"learning_rate": 0.00027403235368848985,
"loss": 0.3617,
"step": 2550
},
{
"epoch": 0.32,
"learning_rate": 0.00027339713729143726,
"loss": 0.3523,
"step": 2600
},
{
"epoch": 0.32,
"eval_cer": 0.06236906349320707,
"eval_loss": 0.20248664915561676,
"eval_runtime": 2418.5335,
"eval_samples_per_second": 19.94,
"eval_steps_per_second": 1.247,
"eval_wer": 0.14132578503764698,
"step": 2600
},
{
"epoch": 0.33,
"learning_rate": 0.0002727619208943847,
"loss": 0.3466,
"step": 2650
},
{
"epoch": 0.34,
"learning_rate": 0.0002721267044973321,
"loss": 0.3443,
"step": 2700
},
{
"epoch": 0.34,
"eval_cer": 0.0631530585610537,
"eval_loss": 0.19711410999298096,
"eval_runtime": 2678.8408,
"eval_samples_per_second": 18.003,
"eval_steps_per_second": 1.125,
"eval_wer": 0.14343206900714522,
"step": 2700
},
{
"epoch": 0.34,
"learning_rate": 0.0002714914881002795,
"loss": 0.3353,
"step": 2750
},
{
"epoch": 0.35,
"learning_rate": 0.0002708562717032269,
"loss": 0.3271,
"step": 2800
},
{
"epoch": 0.35,
"eval_cer": 0.06349556921136332,
"eval_loss": 0.1974596232175827,
"eval_runtime": 2413.2013,
"eval_samples_per_second": 19.984,
"eval_steps_per_second": 1.249,
"eval_wer": 0.14444185809195217,
"step": 2800
},
{
"epoch": 0.35,
"learning_rate": 0.00027022105530617426,
"loss": 0.3411,
"step": 2850
},
{
"epoch": 0.36,
"learning_rate": 0.0002695858389091217,
"loss": 0.347,
"step": 2900
},
{
"epoch": 0.36,
"eval_cer": 0.06194080707319925,
"eval_loss": 0.19428882002830505,
"eval_runtime": 2689.3153,
"eval_samples_per_second": 17.932,
"eval_steps_per_second": 1.121,
"eval_wer": 0.14129443755615034,
"step": 2900
},
{
"epoch": 0.37,
"learning_rate": 0.0002689506225120691,
"loss": 0.3414,
"step": 2950
},
{
"epoch": 0.37,
"learning_rate": 0.0002683154061150165,
"loss": 0.3291,
"step": 3000
},
{
"epoch": 0.37,
"eval_cer": 0.06220513080954164,
"eval_loss": 0.19397367537021637,
"eval_runtime": 2678.8937,
"eval_samples_per_second": 18.002,
"eval_steps_per_second": 1.125,
"eval_wer": 0.14132445110226416,
"step": 3000
},
{
"epoch": 0.38,
"learning_rate": 0.0002676801897179639,
"loss": 0.3472,
"step": 3050
},
{
"epoch": 0.39,
"learning_rate": 0.0002670449733209113,
"loss": 0.3386,
"step": 3100
},
{
"epoch": 0.39,
"eval_cer": 0.06227859343867701,
"eval_loss": 0.18810917437076569,
"eval_runtime": 2096.7687,
"eval_samples_per_second": 23.0,
"eval_steps_per_second": 1.438,
"eval_wer": 0.143275331599662,
"step": 3100
},
{
"epoch": 0.39,
"learning_rate": 0.0002664097569238587,
"loss": 0.3561,
"step": 3150
},
{
"epoch": 0.4,
"learning_rate": 0.00026577454052680613,
"loss": 0.3346,
"step": 3200
},
{
"epoch": 0.4,
"eval_cer": 0.062040017054668245,
"eval_loss": 0.19153311848640442,
"eval_runtime": 2107.1655,
"eval_samples_per_second": 22.887,
"eval_steps_per_second": 1.431,
"eval_wer": 0.141297105426916,
"step": 3200
},
{
"epoch": 0.4,
"learning_rate": 0.00026513932412975354,
"loss": 0.3362,
"step": 3250
},
{
"epoch": 0.41,
"learning_rate": 0.0002645041077327009,
"loss": 0.3286,
"step": 3300
},
{
"epoch": 0.41,
"eval_cer": 0.06275858077759366,
"eval_loss": 0.18800656497478485,
"eval_runtime": 2108.9053,
"eval_samples_per_second": 22.868,
"eval_steps_per_second": 1.43,
"eval_wer": 0.14638073317090447,
"step": 3300
},
{
"epoch": 0.42,
"learning_rate": 0.00026386889133564836,
"loss": 0.3088,
"step": 3350
},
{
"epoch": 0.42,
"learning_rate": 0.0002632336749385957,
"loss": 0.3211,
"step": 3400
},
{
"epoch": 0.42,
"eval_cer": 0.06097185625418542,
"eval_loss": 0.1875077337026596,
"eval_runtime": 2192.9622,
"eval_samples_per_second": 21.991,
"eval_steps_per_second": 1.375,
"eval_wer": 0.1381210052803832,
"step": 3400
},
{
"epoch": 0.43,
"learning_rate": 0.0002625984585415431,
"loss": 0.3185,
"step": 3450
},
{
"epoch": 0.44,
"learning_rate": 0.00026196324214449053,
"loss": 0.3272,
"step": 3500
},
{
"epoch": 0.44,
"eval_cer": 0.060900047124741195,
"eval_loss": 0.18167421221733093,
"eval_runtime": 2110.7367,
"eval_samples_per_second": 22.848,
"eval_steps_per_second": 1.428,
"eval_wer": 0.14127176065464214,
"step": 3500
},
{
"epoch": 0.44,
"learning_rate": 0.00026132802574743794,
"loss": 0.321,
"step": 3550
},
{
"epoch": 0.45,
"learning_rate": 0.00026069280935038535,
"loss": 0.3246,
"step": 3600
},
{
"epoch": 0.45,
"eval_cer": 0.05953118559471069,
"eval_loss": 0.18253999948501587,
"eval_runtime": 2122.2554,
"eval_samples_per_second": 22.724,
"eval_steps_per_second": 1.421,
"eval_wer": 0.13588532957874988,
"step": 3600
},
{
"epoch": 0.45,
"learning_rate": 0.00026005759295333276,
"loss": 0.3266,
"step": 3650
},
{
"epoch": 0.46,
"learning_rate": 0.0002594223765562802,
"loss": 0.3239,
"step": 3700
},
{
"epoch": 0.46,
"eval_cer": 0.06006573842343529,
"eval_loss": 0.18680287897586823,
"eval_runtime": 2677.6172,
"eval_samples_per_second": 18.011,
"eval_steps_per_second": 1.126,
"eval_wer": 0.13703584884644604,
"step": 3700
},
{
"epoch": 0.47,
"learning_rate": 0.00025878716015922753,
"loss": 0.3272,
"step": 3750
},
{
"epoch": 0.47,
"learning_rate": 0.000258151943762175,
"loss": 0.32,
"step": 3800
},
{
"epoch": 0.47,
"eval_cer": 0.05884947129347375,
"eval_loss": 0.18254593014717102,
"eval_runtime": 2588.5152,
"eval_samples_per_second": 18.631,
"eval_steps_per_second": 1.165,
"eval_wer": 0.13350625582346165,
"step": 3800
},
{
"epoch": 0.48,
"learning_rate": 0.00025751672736512235,
"loss": 0.3148,
"step": 3850
},
{
"epoch": 0.49,
"learning_rate": 0.00025688151096806976,
"loss": 0.3185,
"step": 3900
},
{
"epoch": 0.49,
"eval_cer": 0.06071367408812445,
"eval_loss": 0.1804201900959015,
"eval_runtime": 2677.674,
"eval_samples_per_second": 18.01,
"eval_steps_per_second": 1.126,
"eval_wer": 0.14048273787569457,
"step": 3900
}
],
"max_steps": 24114,
"num_train_epochs": 3,
"total_flos": 3.282673713375086e+19,
"trial_name": null,
"trial_params": null
}