xlsr_mid2_ko-en / trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
141dd3c
raw
history blame contribute delete
No virus
18.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.993114001530222,
"eval_steps": 2000,
"global_step": 24495,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"learning_rate": 3.9999999999999996e-05,
"loss": 33.0181,
"step": 200
},
{
"epoch": 0.24,
"learning_rate": 7.999999999999999e-05,
"loss": 8.7646,
"step": 400
},
{
"epoch": 0.37,
"learning_rate": 0.00011999999999999999,
"loss": 4.6933,
"step": 600
},
{
"epoch": 0.49,
"learning_rate": 0.00015999999999999999,
"loss": 4.5796,
"step": 800
},
{
"epoch": 0.61,
"learning_rate": 0.00019999999999999998,
"loss": 4.1047,
"step": 1000
},
{
"epoch": 0.73,
"learning_rate": 0.00023999999999999998,
"loss": 2.6327,
"step": 1200
},
{
"epoch": 0.86,
"learning_rate": 0.00028,
"loss": 1.9893,
"step": 1400
},
{
"epoch": 0.98,
"learning_rate": 0.00029869536855838224,
"loss": 1.7241,
"step": 1600
},
{
"epoch": 1.1,
"learning_rate": 0.00029608610567514673,
"loss": 1.5607,
"step": 1800
},
{
"epoch": 1.22,
"learning_rate": 0.00029347684279191127,
"loss": 1.4503,
"step": 2000
},
{
"epoch": 1.22,
"eval_cer": 0.268688843618788,
"eval_loss": 1.0610458850860596,
"eval_runtime": 1586.9419,
"eval_samples_per_second": 8.25,
"eval_steps_per_second": 0.516,
"step": 2000
},
{
"epoch": 1.35,
"learning_rate": 0.00029086757990867576,
"loss": 1.3782,
"step": 2200
},
{
"epoch": 1.47,
"learning_rate": 0.0002882583170254403,
"loss": 1.3244,
"step": 2400
},
{
"epoch": 1.59,
"learning_rate": 0.0002856490541422048,
"loss": 1.2553,
"step": 2600
},
{
"epoch": 1.71,
"learning_rate": 0.00028303979125896933,
"loss": 1.2077,
"step": 2800
},
{
"epoch": 1.84,
"learning_rate": 0.0002804305283757338,
"loss": 1.1868,
"step": 3000
},
{
"epoch": 1.96,
"learning_rate": 0.00027782126549249836,
"loss": 1.1624,
"step": 3200
},
{
"epoch": 2.08,
"learning_rate": 0.00027521200260926284,
"loss": 1.1136,
"step": 3400
},
{
"epoch": 2.2,
"learning_rate": 0.0002726027397260274,
"loss": 1.077,
"step": 3600
},
{
"epoch": 2.33,
"learning_rate": 0.00026999347684279187,
"loss": 1.053,
"step": 3800
},
{
"epoch": 2.45,
"learning_rate": 0.0002673842139595564,
"loss": 1.0239,
"step": 4000
},
{
"epoch": 2.45,
"eval_cer": 0.19039349567825709,
"eval_loss": 0.6961866617202759,
"eval_runtime": 1607.479,
"eval_samples_per_second": 8.145,
"eval_steps_per_second": 0.509,
"step": 4000
},
{
"epoch": 2.57,
"learning_rate": 0.0002647749510763209,
"loss": 1.0206,
"step": 4200
},
{
"epoch": 2.69,
"learning_rate": 0.00026216568819308544,
"loss": 1.0045,
"step": 4400
},
{
"epoch": 2.82,
"learning_rate": 0.0002595564253098499,
"loss": 0.9802,
"step": 4600
},
{
"epoch": 2.94,
"learning_rate": 0.00025694716242661447,
"loss": 0.9839,
"step": 4800
},
{
"epoch": 3.06,
"learning_rate": 0.00025433789954337895,
"loss": 0.9243,
"step": 5000
},
{
"epoch": 3.18,
"learning_rate": 0.0002517286366601435,
"loss": 0.9082,
"step": 5200
},
{
"epoch": 3.31,
"learning_rate": 0.000249119373776908,
"loss": 0.9017,
"step": 5400
},
{
"epoch": 3.43,
"learning_rate": 0.0002465101108936725,
"loss": 0.8871,
"step": 5600
},
{
"epoch": 3.55,
"learning_rate": 0.00024390084801043704,
"loss": 0.9036,
"step": 5800
},
{
"epoch": 3.67,
"learning_rate": 0.00024129158512720155,
"loss": 0.8977,
"step": 6000
},
{
"epoch": 3.67,
"eval_cer": 0.16872254319465907,
"eval_loss": 0.594495415687561,
"eval_runtime": 1591.6049,
"eval_samples_per_second": 8.226,
"eval_steps_per_second": 0.515,
"step": 6000
},
{
"epoch": 3.79,
"learning_rate": 0.00023868232224396607,
"loss": 0.8843,
"step": 6200
},
{
"epoch": 3.92,
"learning_rate": 0.00023607305936073058,
"loss": 0.8757,
"step": 6400
},
{
"epoch": 4.04,
"learning_rate": 0.0002334637964774951,
"loss": 0.8573,
"step": 6600
},
{
"epoch": 4.16,
"learning_rate": 0.0002308545335942596,
"loss": 0.8126,
"step": 6800
},
{
"epoch": 4.28,
"learning_rate": 0.00022824527071102412,
"loss": 0.8192,
"step": 7000
},
{
"epoch": 4.41,
"learning_rate": 0.00022563600782778863,
"loss": 0.8061,
"step": 7200
},
{
"epoch": 4.53,
"learning_rate": 0.00022302674494455315,
"loss": 0.8123,
"step": 7400
},
{
"epoch": 4.65,
"learning_rate": 0.00022041748206131766,
"loss": 0.8046,
"step": 7600
},
{
"epoch": 4.77,
"learning_rate": 0.00021780821917808218,
"loss": 0.7979,
"step": 7800
},
{
"epoch": 4.9,
"learning_rate": 0.0002151989562948467,
"loss": 0.804,
"step": 8000
},
{
"epoch": 4.9,
"eval_cer": 0.14924661713942214,
"eval_loss": 0.5327703952789307,
"eval_runtime": 1595.6324,
"eval_samples_per_second": 8.206,
"eval_steps_per_second": 0.513,
"step": 8000
},
{
"epoch": 5.02,
"learning_rate": 0.0002125896934116112,
"loss": 0.7867,
"step": 8200
},
{
"epoch": 5.14,
"learning_rate": 0.00020998043052837572,
"loss": 0.7557,
"step": 8400
},
{
"epoch": 5.26,
"learning_rate": 0.00020737116764514023,
"loss": 0.7478,
"step": 8600
},
{
"epoch": 5.39,
"learning_rate": 0.00020476190476190475,
"loss": 0.7398,
"step": 8800
},
{
"epoch": 5.51,
"learning_rate": 0.00020215264187866926,
"loss": 0.7408,
"step": 9000
},
{
"epoch": 5.63,
"learning_rate": 0.00019954337899543377,
"loss": 0.75,
"step": 9200
},
{
"epoch": 5.75,
"learning_rate": 0.0001969341161121983,
"loss": 0.7344,
"step": 9400
},
{
"epoch": 5.88,
"learning_rate": 0.0001943248532289628,
"loss": 0.738,
"step": 9600
},
{
"epoch": 6.0,
"learning_rate": 0.00019171559034572732,
"loss": 0.7373,
"step": 9800
},
{
"epoch": 6.12,
"learning_rate": 0.00018910632746249183,
"loss": 0.698,
"step": 10000
},
{
"epoch": 6.12,
"eval_cer": 0.13653489424101573,
"eval_loss": 0.5013594031333923,
"eval_runtime": 1605.1161,
"eval_samples_per_second": 8.157,
"eval_steps_per_second": 0.51,
"step": 10000
},
{
"epoch": 6.24,
"learning_rate": 0.00018649706457925634,
"loss": 0.6943,
"step": 10200
},
{
"epoch": 6.37,
"learning_rate": 0.00018388780169602086,
"loss": 0.6997,
"step": 10400
},
{
"epoch": 6.49,
"learning_rate": 0.00018127853881278537,
"loss": 0.6929,
"step": 10600
},
{
"epoch": 6.61,
"learning_rate": 0.00017866927592954989,
"loss": 0.7003,
"step": 10800
},
{
"epoch": 6.73,
"learning_rate": 0.0001760600130463144,
"loss": 0.6863,
"step": 11000
},
{
"epoch": 6.86,
"learning_rate": 0.00017345075016307891,
"loss": 0.6883,
"step": 11200
},
{
"epoch": 6.98,
"learning_rate": 0.00017084148727984343,
"loss": 0.6787,
"step": 11400
},
{
"epoch": 7.1,
"learning_rate": 0.00016823222439660794,
"loss": 0.6518,
"step": 11600
},
{
"epoch": 7.22,
"learning_rate": 0.00016562296151337246,
"loss": 0.6494,
"step": 11800
},
{
"epoch": 7.35,
"learning_rate": 0.00016301369863013697,
"loss": 0.6426,
"step": 12000
},
{
"epoch": 7.35,
"eval_cer": 0.13216305737125092,
"eval_loss": 0.47150149941444397,
"eval_runtime": 1597.7342,
"eval_samples_per_second": 8.195,
"eval_steps_per_second": 0.513,
"step": 12000
},
{
"epoch": 7.47,
"learning_rate": 0.00016040443574690148,
"loss": 0.6457,
"step": 12200
},
{
"epoch": 7.59,
"learning_rate": 0.000157795172863666,
"loss": 0.6429,
"step": 12400
},
{
"epoch": 7.71,
"learning_rate": 0.0001551859099804305,
"loss": 0.6512,
"step": 12600
},
{
"epoch": 7.83,
"learning_rate": 0.00015257664709719503,
"loss": 0.6458,
"step": 12800
},
{
"epoch": 7.96,
"learning_rate": 0.00014996738421395954,
"loss": 0.6501,
"step": 13000
},
{
"epoch": 8.08,
"learning_rate": 0.00014735812133072405,
"loss": 0.6095,
"step": 13200
},
{
"epoch": 8.2,
"learning_rate": 0.00014474885844748857,
"loss": 0.6132,
"step": 13400
},
{
"epoch": 8.32,
"learning_rate": 0.00014213959556425308,
"loss": 0.6102,
"step": 13600
},
{
"epoch": 8.45,
"learning_rate": 0.0001395303326810176,
"loss": 0.6125,
"step": 13800
},
{
"epoch": 8.57,
"learning_rate": 0.0001369210697977821,
"loss": 0.61,
"step": 14000
},
{
"epoch": 8.57,
"eval_cer": 0.1257692459492199,
"eval_loss": 0.45295360684394836,
"eval_runtime": 1603.0798,
"eval_samples_per_second": 8.167,
"eval_steps_per_second": 0.511,
"step": 14000
},
{
"epoch": 8.69,
"learning_rate": 0.00013431180691454662,
"loss": 0.606,
"step": 14200
},
{
"epoch": 8.81,
"learning_rate": 0.00013170254403131114,
"loss": 0.5957,
"step": 14400
},
{
"epoch": 8.94,
"learning_rate": 0.00012909328114807565,
"loss": 0.5992,
"step": 14600
},
{
"epoch": 9.06,
"learning_rate": 0.00012648401826484017,
"loss": 0.5752,
"step": 14800
},
{
"epoch": 9.18,
"learning_rate": 0.00012387475538160468,
"loss": 0.5654,
"step": 15000
},
{
"epoch": 9.3,
"learning_rate": 0.00012126549249836919,
"loss": 0.5725,
"step": 15200
},
{
"epoch": 9.43,
"learning_rate": 0.00011865622961513371,
"loss": 0.5713,
"step": 15400
},
{
"epoch": 9.55,
"learning_rate": 0.00011604696673189822,
"loss": 0.5649,
"step": 15600
},
{
"epoch": 9.67,
"learning_rate": 0.00011343770384866273,
"loss": 0.5643,
"step": 15800
},
{
"epoch": 9.79,
"learning_rate": 0.00011082844096542725,
"loss": 0.5709,
"step": 16000
},
{
"epoch": 9.79,
"eval_cer": 0.1200554980402634,
"eval_loss": 0.4299587607383728,
"eval_runtime": 1609.5227,
"eval_samples_per_second": 8.135,
"eval_steps_per_second": 0.509,
"step": 16000
},
{
"epoch": 9.92,
"learning_rate": 0.00010821917808219176,
"loss": 0.5666,
"step": 16200
},
{
"epoch": 10.04,
"learning_rate": 0.00010560991519895628,
"loss": 0.5531,
"step": 16400
},
{
"epoch": 10.16,
"learning_rate": 0.00010300065231572079,
"loss": 0.5389,
"step": 16600
},
{
"epoch": 10.28,
"learning_rate": 0.0001003913894324853,
"loss": 0.5456,
"step": 16800
},
{
"epoch": 10.41,
"learning_rate": 9.778212654924982e-05,
"loss": 0.5353,
"step": 17000
},
{
"epoch": 10.53,
"learning_rate": 9.517286366601433e-05,
"loss": 0.5337,
"step": 17200
},
{
"epoch": 10.65,
"learning_rate": 9.256360078277885e-05,
"loss": 0.5296,
"step": 17400
},
{
"epoch": 10.77,
"learning_rate": 8.995433789954336e-05,
"loss": 0.5372,
"step": 17600
},
{
"epoch": 10.9,
"learning_rate": 8.734507501630787e-05,
"loss": 0.5388,
"step": 17800
},
{
"epoch": 11.02,
"learning_rate": 8.473581213307239e-05,
"loss": 0.5235,
"step": 18000
},
{
"epoch": 11.02,
"eval_cer": 0.11664607248141211,
"eval_loss": 0.4167773723602295,
"eval_runtime": 1608.2913,
"eval_samples_per_second": 8.141,
"eval_steps_per_second": 0.509,
"step": 18000
},
{
"epoch": 11.14,
"learning_rate": 8.212654924983692e-05,
"loss": 0.509,
"step": 18200
},
{
"epoch": 11.26,
"learning_rate": 7.951728636660143e-05,
"loss": 0.5116,
"step": 18400
},
{
"epoch": 11.38,
"learning_rate": 7.690802348336594e-05,
"loss": 0.4967,
"step": 18600
},
{
"epoch": 11.51,
"learning_rate": 7.429876060013046e-05,
"loss": 0.511,
"step": 18800
},
{
"epoch": 11.63,
"learning_rate": 7.168949771689497e-05,
"loss": 0.5056,
"step": 19000
},
{
"epoch": 11.75,
"learning_rate": 6.908023483365949e-05,
"loss": 0.5073,
"step": 19200
},
{
"epoch": 11.87,
"learning_rate": 6.6470971950424e-05,
"loss": 0.4968,
"step": 19400
},
{
"epoch": 12.0,
"learning_rate": 6.386170906718851e-05,
"loss": 0.5009,
"step": 19600
},
{
"epoch": 12.12,
"learning_rate": 6.125244618395303e-05,
"loss": 0.4832,
"step": 19800
},
{
"epoch": 12.24,
"learning_rate": 5.864318330071754e-05,
"loss": 0.4778,
"step": 20000
},
{
"epoch": 12.24,
"eval_cer": 0.11294822712906938,
"eval_loss": 0.40570223331451416,
"eval_runtime": 1612.661,
"eval_samples_per_second": 8.119,
"eval_steps_per_second": 0.508,
"step": 20000
},
{
"epoch": 12.36,
"learning_rate": 5.6033920417482055e-05,
"loss": 0.4775,
"step": 20200
},
{
"epoch": 12.49,
"learning_rate": 5.342465753424657e-05,
"loss": 0.4855,
"step": 20400
},
{
"epoch": 12.61,
"learning_rate": 5.081539465101108e-05,
"loss": 0.4773,
"step": 20600
},
{
"epoch": 12.73,
"learning_rate": 4.82061317677756e-05,
"loss": 0.4745,
"step": 20800
},
{
"epoch": 12.85,
"learning_rate": 4.559686888454011e-05,
"loss": 0.48,
"step": 21000
},
{
"epoch": 12.98,
"learning_rate": 4.2987606001304625e-05,
"loss": 0.463,
"step": 21200
},
{
"epoch": 13.1,
"learning_rate": 4.037834311806914e-05,
"loss": 0.4643,
"step": 21400
},
{
"epoch": 13.22,
"learning_rate": 3.776908023483365e-05,
"loss": 0.449,
"step": 21600
},
{
"epoch": 13.34,
"learning_rate": 3.515981735159817e-05,
"loss": 0.4604,
"step": 21800
},
{
"epoch": 13.47,
"learning_rate": 3.255055446836268e-05,
"loss": 0.4571,
"step": 22000
},
{
"epoch": 13.47,
"eval_cer": 0.10995473327241098,
"eval_loss": 0.3945465385913849,
"eval_runtime": 1610.0901,
"eval_samples_per_second": 8.132,
"eval_steps_per_second": 0.509,
"step": 22000
},
{
"epoch": 13.59,
"learning_rate": 2.99412915851272e-05,
"loss": 0.4539,
"step": 22200
},
{
"epoch": 13.71,
"learning_rate": 2.7332028701891712e-05,
"loss": 0.4569,
"step": 22400
},
{
"epoch": 13.83,
"learning_rate": 2.4722765818656226e-05,
"loss": 0.4533,
"step": 22600
},
{
"epoch": 13.96,
"learning_rate": 2.211350293542074e-05,
"loss": 0.4545,
"step": 22800
},
{
"epoch": 14.08,
"learning_rate": 1.9504240052185254e-05,
"loss": 0.447,
"step": 23000
},
{
"epoch": 14.2,
"learning_rate": 1.6894977168949768e-05,
"loss": 0.4443,
"step": 23200
},
{
"epoch": 14.32,
"learning_rate": 1.4285714285714284e-05,
"loss": 0.4423,
"step": 23400
},
{
"epoch": 14.45,
"learning_rate": 1.1676451402478798e-05,
"loss": 0.4434,
"step": 23600
},
{
"epoch": 14.57,
"learning_rate": 9.067188519243312e-06,
"loss": 0.438,
"step": 23800
},
{
"epoch": 14.69,
"learning_rate": 6.4579256360078264e-06,
"loss": 0.4388,
"step": 24000
},
{
"epoch": 14.69,
"eval_cer": 0.10809973860058716,
"eval_loss": 0.38906005024909973,
"eval_runtime": 1607.4288,
"eval_samples_per_second": 8.145,
"eval_steps_per_second": 0.51,
"step": 24000
},
{
"epoch": 14.81,
"learning_rate": 3.848662752772341e-06,
"loss": 0.4406,
"step": 24200
},
{
"epoch": 14.93,
"learning_rate": 1.2393998695368556e-06,
"loss": 0.449,
"step": 24400
},
{
"epoch": 14.99,
"step": 24495,
"total_flos": 1.7220710227304147e+20,
"train_loss": 1.1506779817299688,
"train_runtime": 54167.2991,
"train_samples_per_second": 28.951,
"train_steps_per_second": 0.452
}
],
"logging_steps": 200,
"max_steps": 24495,
"num_train_epochs": 15,
"save_steps": 2000,
"total_flos": 1.7220710227304147e+20,
"trial_name": null,
"trial_params": null
}