changes

Browse files

Files changed (6) hide show

model.safetensors +1 -1
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +1177 -0
training_args.bin +2 -2

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:981ad6637ebe478993821fba115789efe534de05b68ec78e58ef81a182e6060b
 size 290403936

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4a87d1bdd63729eba67ec2664e3415bd1ee14cb6c5e39d16bbc13e39f8a5ca9
 size 290403936

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecd26f408db59df6a2f85549ae0e5abffdcf14411891223eea5a5f8dd95840ef
+size 574811514

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7c93a397e9322e49f4ed50d18f810eaf2c39ecdb2985c95d248cd7a2fa2aa47
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e88295c42365fd93a1c1de4d8be0425f3a739044af9fb1d0b4ef0f8c27295d60
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1177 @@

+{
+  "best_metric": 34.549659010739205,
+  "best_model_checkpoint": "./whisper-small-hi/checkpoint-4000",
+  "epoch": 2.546148949713558,
+  "eval_steps": 1000,
+  "global_step": 4000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.015913430935709738,
+      "grad_norm": 6.266472339630127,
+      "learning_rate": 1.44e-05,
+      "loss": 1.7883,
+      "step": 25
+    },
+    {
+      "epoch": 0.031826861871419476,
+      "grad_norm": 11.293601989746094,
+      "learning_rate": 2.94e-05,
+      "loss": 1.3114,
+      "step": 50
+    },
+    {
+      "epoch": 0.047740292807129214,
+      "grad_norm": 11.253959655761719,
+      "learning_rate": 4.4399999999999995e-05,
+      "loss": 0.7712,
+      "step": 75
+    },
+    {
+      "epoch": 0.06365372374283895,
+      "grad_norm": 6.763392448425293,
+      "learning_rate": 5.94e-05,
+      "loss": 0.4958,
+      "step": 100
+    },
+    {
+      "epoch": 0.07956715467854869,
+      "grad_norm": 7.226134300231934,
+      "learning_rate": 7.439999999999999e-05,
+      "loss": 0.4231,
+      "step": 125
+    },
+    {
+      "epoch": 0.09548058561425843,
+      "grad_norm": 8.527499198913574,
+      "learning_rate": 8.939999999999999e-05,
+      "loss": 0.3815,
+      "step": 150
+    },
+    {
+      "epoch": 0.11139401654996817,
+      "grad_norm": 6.013850212097168,
+      "learning_rate": 0.00010439999999999999,
+      "loss": 0.3633,
+      "step": 175
+    },
+    {
+      "epoch": 0.1273074474856779,
+      "grad_norm": 5.921957015991211,
+      "learning_rate": 0.0001194,
+      "loss": 0.3564,
+      "step": 200
+    },
+    {
+      "epoch": 0.14322087842138764,
+      "grad_norm": 6.438776969909668,
+      "learning_rate": 0.0001344,
+      "loss": 0.3549,
+      "step": 225
+    },
+    {
+      "epoch": 0.15913430935709738,
+      "grad_norm": 6.543764114379883,
+      "learning_rate": 0.0001494,
+      "loss": 0.3298,
+      "step": 250
+    },
+    {
+      "epoch": 0.17504774029280712,
+      "grad_norm": 5.377121448516846,
+      "learning_rate": 0.0001644,
+      "loss": 0.3251,
+      "step": 275
+    },
+    {
+      "epoch": 0.19096117122851686,
+      "grad_norm": 6.325755596160889,
+      "learning_rate": 0.00017939999999999997,
+      "loss": 0.3319,
+      "step": 300
+    },
+    {
+      "epoch": 0.2068746021642266,
+      "grad_norm": 3.889002561569214,
+      "learning_rate": 0.00019439999999999998,
+      "loss": 0.3311,
+      "step": 325
+    },
+    {
+      "epoch": 0.22278803309993633,
+      "grad_norm": 4.005517482757568,
+      "learning_rate": 0.00020939999999999997,
+      "loss": 0.3388,
+      "step": 350
+    },
+    {
+      "epoch": 0.23870146403564607,
+      "grad_norm": 3.7465219497680664,
+      "learning_rate": 0.00022439999999999998,
+      "loss": 0.3304,
+      "step": 375
+    },
+    {
+      "epoch": 0.2546148949713558,
+      "grad_norm": 3.612300157546997,
+      "learning_rate": 0.0002394,
+      "loss": 0.3253,
+      "step": 400
+    },
+    {
+      "epoch": 0.27052832590706555,
+      "grad_norm": 2.8564534187316895,
+      "learning_rate": 0.00025439999999999995,
+      "loss": 0.325,
+      "step": 425
+    },
+    {
+      "epoch": 0.2864417568427753,
+      "grad_norm": 3.6657044887542725,
+      "learning_rate": 0.0002694,
+      "loss": 0.3362,
+      "step": 450
+    },
+    {
+      "epoch": 0.302355187778485,
+      "grad_norm": 6.07236385345459,
+      "learning_rate": 0.0002844,
+      "loss": 0.3396,
+      "step": 475
+    },
+    {
+      "epoch": 0.31826861871419476,
+      "grad_norm": 3.5293257236480713,
+      "learning_rate": 0.00029939999999999996,
+      "loss": 0.3265,
+      "step": 500
+    },
+    {
+      "epoch": 0.3341820496499045,
+      "grad_norm": 2.7845981121063232,
+      "learning_rate": 0.0002979428571428571,
+      "loss": 0.3066,
+      "step": 525
+    },
+    {
+      "epoch": 0.35009548058561424,
+      "grad_norm": 3.957970380783081,
+      "learning_rate": 0.0002958,
+      "loss": 0.3076,
+      "step": 550
+    },
+    {
+      "epoch": 0.366008911521324,
+      "grad_norm": 2.7481985092163086,
+      "learning_rate": 0.00029365714285714285,
+      "loss": 0.3181,
+      "step": 575
+    },
+    {
+      "epoch": 0.3819223424570337,
+      "grad_norm": 5.5784912109375,
+      "learning_rate": 0.0002915142857142857,
+      "loss": 0.325,
+      "step": 600
+    },
+    {
+      "epoch": 0.39783577339274345,
+      "grad_norm": 2.688514232635498,
+      "learning_rate": 0.0002893714285714285,
+      "loss": 0.2907,
+      "step": 625
+    },
+    {
+      "epoch": 0.4137492043284532,
+      "grad_norm": 1.8559094667434692,
+      "learning_rate": 0.0002872285714285714,
+      "loss": 0.2837,
+      "step": 650
+    },
+    {
+      "epoch": 0.42966263526416293,
+      "grad_norm": 2.075263023376465,
+      "learning_rate": 0.00028508571428571426,
+      "loss": 0.2884,
+      "step": 675
+    },
+    {
+      "epoch": 0.44557606619987267,
+      "grad_norm": 2.253748893737793,
+      "learning_rate": 0.00028294285714285713,
+      "loss": 0.2652,
+      "step": 700
+    },
+    {
+      "epoch": 0.4614894971355824,
+      "grad_norm": 2.5322704315185547,
+      "learning_rate": 0.0002808,
+      "loss": 0.2763,
+      "step": 725
+    },
+    {
+      "epoch": 0.47740292807129214,
+      "grad_norm": 2.8500661849975586,
+      "learning_rate": 0.0002786571428571428,
+      "loss": 0.2698,
+      "step": 750
+    },
+    {
+      "epoch": 0.49331635900700194,
+      "grad_norm": 2.4770638942718506,
+      "learning_rate": 0.0002765142857142857,
+      "loss": 0.2643,
+      "step": 775
+    },
+    {
+      "epoch": 0.5092297899427116,
+      "grad_norm": 2.8346035480499268,
+      "learning_rate": 0.00027437142857142854,
+      "loss": 0.2682,
+      "step": 800
+    },
+    {
+      "epoch": 0.5251432208784214,
+      "grad_norm": 2.551896095275879,
+      "learning_rate": 0.0002722285714285714,
+      "loss": 0.2484,
+      "step": 825
+    },
+    {
+      "epoch": 0.5410566518141311,
+      "grad_norm": 2.1376454830169678,
+      "learning_rate": 0.0002700857142857143,
+      "loss": 0.2433,
+      "step": 850
+    },
+    {
+      "epoch": 0.5569700827498408,
+      "grad_norm": 2.4351694583892822,
+      "learning_rate": 0.0002679428571428571,
+      "loss": 0.248,
+      "step": 875
+    },
+    {
+      "epoch": 0.5728835136855506,
+      "grad_norm": 2.142422914505005,
+      "learning_rate": 0.00026579999999999996,
+      "loss": 0.2592,
+      "step": 900
+    },
+    {
+      "epoch": 0.5887969446212603,
+      "grad_norm": 2.287863254547119,
+      "learning_rate": 0.0002636571428571428,
+      "loss": 0.2361,
+      "step": 925
+    },
+    {
+      "epoch": 0.60471037555697,
+      "grad_norm": 1.8011589050292969,
+      "learning_rate": 0.0002615142857142857,
+      "loss": 0.2349,
+      "step": 950
+    },
+    {
+      "epoch": 0.6206238064926798,
+      "grad_norm": 2.0155577659606934,
+      "learning_rate": 0.00025937142857142856,
+      "loss": 0.2343,
+      "step": 975
+    },
+    {
+      "epoch": 0.6365372374283895,
+      "grad_norm": 1.7602595090866089,
+      "learning_rate": 0.0002572285714285714,
+      "loss": 0.2369,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6365372374283895,
+      "eval_loss": 0.24331499636173248,
+      "eval_runtime": 3161.0794,
+      "eval_samples_per_second": 2.642,
+      "eval_steps_per_second": 0.331,
+      "eval_wer": 62.18807974706697,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6524506683640993,
+      "grad_norm": 2.084760904312134,
+      "learning_rate": 0.0002550857142857143,
+      "loss": 0.2352,
+      "step": 1025
+    },
+    {
+      "epoch": 0.668364099299809,
+      "grad_norm": 1.6412831544876099,
+      "learning_rate": 0.00025294285714285716,
+      "loss": 0.2242,
+      "step": 1050
+    },
+    {
+      "epoch": 0.6842775302355187,
+      "grad_norm": 1.3746275901794434,
+      "learning_rate": 0.00025079999999999997,
+      "loss": 0.2206,
+      "step": 1075
+    },
+    {
+      "epoch": 0.7001909611712285,
+      "grad_norm": 2.017609119415283,
+      "learning_rate": 0.00024865714285714284,
+      "loss": 0.2223,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7161043921069382,
+      "grad_norm": 2.0669291019439697,
+      "learning_rate": 0.0002465142857142857,
+      "loss": 0.2233,
+      "step": 1125
+    },
+    {
+      "epoch": 0.732017823042648,
+      "grad_norm": 1.699652075767517,
+      "learning_rate": 0.00024437142857142857,
+      "loss": 0.1984,
+      "step": 1150
+    },
+    {
+      "epoch": 0.7479312539783577,
+      "grad_norm": 1.5082184076309204,
+      "learning_rate": 0.00024222857142857138,
+      "loss": 0.2143,
+      "step": 1175
+    },
+    {
+      "epoch": 0.7638446849140674,
+      "grad_norm": 1.363891363143921,
+      "learning_rate": 0.00024008571428571425,
+      "loss": 0.2067,
+      "step": 1200
+    },
+    {
+      "epoch": 0.7797581158497772,
+      "grad_norm": 1.3396174907684326,
+      "learning_rate": 0.00023794285714285712,
+      "loss": 0.2083,
+      "step": 1225
+    },
+    {
+      "epoch": 0.7956715467854869,
+      "grad_norm": 1.6803642511367798,
+      "learning_rate": 0.00023579999999999999,
+      "loss": 0.2072,
+      "step": 1250
+    },
+    {
+      "epoch": 0.8115849777211966,
+      "grad_norm": 1.8154999017715454,
+      "learning_rate": 0.00023365714285714283,
+      "loss": 0.2137,
+      "step": 1275
+    },
+    {
+      "epoch": 0.8274984086569064,
+      "grad_norm": 1.9345273971557617,
+      "learning_rate": 0.0002315142857142857,
+      "loss": 0.1854,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8434118395926161,
+      "grad_norm": 1.8204659223556519,
+      "learning_rate": 0.00022937142857142856,
+      "loss": 0.1944,
+      "step": 1325
+    },
+    {
+      "epoch": 0.8593252705283259,
+      "grad_norm": 1.7036362886428833,
+      "learning_rate": 0.00022722857142857143,
+      "loss": 0.1925,
+      "step": 1350
+    },
+    {
+      "epoch": 0.8752387014640356,
+      "grad_norm": 1.5732781887054443,
+      "learning_rate": 0.0002250857142857143,
+      "loss": 0.2006,
+      "step": 1375
+    },
+    {
+      "epoch": 0.8911521323997453,
+      "grad_norm": 1.4482449293136597,
+      "learning_rate": 0.0002229428571428571,
+      "loss": 0.1814,
+      "step": 1400
+    },
+    {
+      "epoch": 0.9070655633354551,
+      "grad_norm": 1.474438190460205,
+      "learning_rate": 0.00022079999999999997,
+      "loss": 0.1876,
+      "step": 1425
+    },
+    {
+      "epoch": 0.9229789942711648,
+      "grad_norm": 1.6966629028320312,
+      "learning_rate": 0.00021865714285714284,
+      "loss": 0.186,
+      "step": 1450
+    },
+    {
+      "epoch": 0.9388924252068745,
+      "grad_norm": 1.6632884740829468,
+      "learning_rate": 0.0002165142857142857,
+      "loss": 0.1846,
+      "step": 1475
+    },
+    {
+      "epoch": 0.9548058561425843,
+      "grad_norm": 1.4217106103897095,
+      "learning_rate": 0.00021437142857142855,
+      "loss": 0.1714,
+      "step": 1500
+    },
+    {
+      "epoch": 0.9707192870782941,
+      "grad_norm": 1.280887484550476,
+      "learning_rate": 0.00021222857142857141,
+      "loss": 0.1794,
+      "step": 1525
+    },
+    {
+      "epoch": 0.9866327180140039,
+      "grad_norm": 1.7453186511993408,
+      "learning_rate": 0.00021008571428571428,
+      "loss": 0.1788,
+      "step": 1550
+    },
+    {
+      "epoch": 1.0025461489497136,
+      "grad_norm": 1.3471609354019165,
+      "learning_rate": 0.00020794285714285712,
+      "loss": 0.1631,
+      "step": 1575
+    },
+    {
+      "epoch": 1.0184595798854232,
+      "grad_norm": 1.6721386909484863,
+      "learning_rate": 0.0002058,
+      "loss": 0.1388,
+      "step": 1600
+    },
+    {
+      "epoch": 1.034373010821133,
+      "grad_norm": 1.3706496953964233,
+      "learning_rate": 0.00020365714285714283,
+      "loss": 0.139,
+      "step": 1625
+    },
+    {
+      "epoch": 1.0502864417568427,
+      "grad_norm": 1.8108116388320923,
+      "learning_rate": 0.0002015142857142857,
+      "loss": 0.1486,
+      "step": 1650
+    },
+    {
+      "epoch": 1.0661998726925526,
+      "grad_norm": 1.5080534219741821,
+      "learning_rate": 0.00019937142857142856,
+      "loss": 0.1431,
+      "step": 1675
+    },
+    {
+      "epoch": 1.0821133036282622,
+      "grad_norm": 1.377472996711731,
+      "learning_rate": 0.00019722857142857143,
+      "loss": 0.1493,
+      "step": 1700
+    },
+    {
+      "epoch": 1.098026734563972,
+      "grad_norm": 1.4746475219726562,
+      "learning_rate": 0.00019508571428571427,
+      "loss": 0.1386,
+      "step": 1725
+    },
+    {
+      "epoch": 1.1139401654996817,
+      "grad_norm": 1.4167110919952393,
+      "learning_rate": 0.0001929428571428571,
+      "loss": 0.144,
+      "step": 1750
+    },
+    {
+      "epoch": 1.1298535964353915,
+      "grad_norm": 1.4052375555038452,
+      "learning_rate": 0.00019079999999999998,
+      "loss": 0.1479,
+      "step": 1775
+    },
+    {
+      "epoch": 1.1457670273711011,
+      "grad_norm": 1.1903959512710571,
+      "learning_rate": 0.00018865714285714284,
+      "loss": 0.1319,
+      "step": 1800
+    },
+    {
+      "epoch": 1.161680458306811,
+      "grad_norm": 1.5119489431381226,
+      "learning_rate": 0.00018651428571428568,
+      "loss": 0.1408,
+      "step": 1825
+    },
+    {
+      "epoch": 1.1775938892425206,
+      "grad_norm": 1.2824383974075317,
+      "learning_rate": 0.00018437142857142855,
+      "loss": 0.1379,
+      "step": 1850
+    },
+    {
+      "epoch": 1.1935073201782305,
+      "grad_norm": 2.1692168712615967,
+      "learning_rate": 0.00018222857142857142,
+      "loss": 0.1411,
+      "step": 1875
+    },
+    {
+      "epoch": 1.20942075111394,
+      "grad_norm": 1.2325959205627441,
+      "learning_rate": 0.00018008571428571428,
+      "loss": 0.1333,
+      "step": 1900
+    },
+    {
+      "epoch": 1.22533418204965,
+      "grad_norm": 0.9051541090011597,
+      "learning_rate": 0.00017794285714285715,
+      "loss": 0.1523,
+      "step": 1925
+    },
+    {
+      "epoch": 1.2412476129853596,
+      "grad_norm": 1.3505010604858398,
+      "learning_rate": 0.00017579999999999996,
+      "loss": 0.1363,
+      "step": 1950
+    },
+    {
+      "epoch": 1.2571610439210694,
+      "grad_norm": 1.3619211912155151,
+      "learning_rate": 0.00017365714285714283,
+      "loss": 0.1308,
+      "step": 1975
+    },
+    {
+      "epoch": 1.273074474856779,
+      "grad_norm": 1.2368829250335693,
+      "learning_rate": 0.0001715142857142857,
+      "loss": 0.1242,
+      "step": 2000
+    },
+    {
+      "epoch": 1.273074474856779,
+      "eval_loss": 0.17340172827243805,
+      "eval_runtime": 3339.9389,
+      "eval_samples_per_second": 2.501,
+      "eval_steps_per_second": 0.313,
+      "eval_wer": 49.436910454391054,
+      "step": 2000
+    },
+    {
+      "epoch": 1.288987905792489,
+      "grad_norm": 1.3111966848373413,
+      "learning_rate": 0.00016937142857142856,
+      "loss": 0.1318,
+      "step": 2025
+    },
+    {
+      "epoch": 1.3049013367281985,
+      "grad_norm": 1.3677690029144287,
+      "learning_rate": 0.0001672285714285714,
+      "loss": 0.1368,
+      "step": 2050
+    },
+    {
+      "epoch": 1.3208147676639084,
+      "grad_norm": 1.1350477933883667,
+      "learning_rate": 0.00016508571428571427,
+      "loss": 0.1218,
+      "step": 2075
+    },
+    {
+      "epoch": 1.336728198599618,
+      "grad_norm": 1.4157230854034424,
+      "learning_rate": 0.00016294285714285714,
+      "loss": 0.1245,
+      "step": 2100
+    },
+    {
+      "epoch": 1.3526416295353278,
+      "grad_norm": 1.6313213109970093,
+      "learning_rate": 0.0001608,
+      "loss": 0.1249,
+      "step": 2125
+    },
+    {
+      "epoch": 1.3685550604710375,
+      "grad_norm": 1.1400282382965088,
+      "learning_rate": 0.00015865714285714282,
+      "loss": 0.1258,
+      "step": 2150
+    },
+    {
+      "epoch": 1.3844684914067473,
+      "grad_norm": 1.0388585329055786,
+      "learning_rate": 0.00015651428571428569,
+      "loss": 0.1278,
+      "step": 2175
+    },
+    {
+      "epoch": 1.400381922342457,
+      "grad_norm": 1.1972434520721436,
+      "learning_rate": 0.00015437142857142855,
+      "loss": 0.1294,
+      "step": 2200
+    },
+    {
+      "epoch": 1.4162953532781668,
+      "grad_norm": 1.5634195804595947,
+      "learning_rate": 0.00015222857142857142,
+      "loss": 0.122,
+      "step": 2225
+    },
+    {
+      "epoch": 1.4322087842138764,
+      "grad_norm": 1.0193285942077637,
+      "learning_rate": 0.00015008571428571429,
+      "loss": 0.116,
+      "step": 2250
+    },
+    {
+      "epoch": 1.4481222151495863,
+      "grad_norm": 1.2780238389968872,
+      "learning_rate": 0.00014794285714285713,
+      "loss": 0.124,
+      "step": 2275
+    },
+    {
+      "epoch": 1.464035646085296,
+      "grad_norm": 1.1720143556594849,
+      "learning_rate": 0.0001458,
+      "loss": 0.1267,
+      "step": 2300
+    },
+    {
+      "epoch": 1.4799490770210058,
+      "grad_norm": 1.1181762218475342,
+      "learning_rate": 0.00014365714285714286,
+      "loss": 0.1131,
+      "step": 2325
+    },
+    {
+      "epoch": 1.4958625079567156,
+      "grad_norm": 1.2259148359298706,
+      "learning_rate": 0.0001415142857142857,
+      "loss": 0.1121,
+      "step": 2350
+    },
+    {
+      "epoch": 1.5117759388924252,
+      "grad_norm": 1.2877577543258667,
+      "learning_rate": 0.00013937142857142857,
+      "loss": 0.1125,
+      "step": 2375
+    },
+    {
+      "epoch": 1.5276893698281349,
+      "grad_norm": 0.9741705656051636,
+      "learning_rate": 0.0001372285714285714,
+      "loss": 0.1122,
+      "step": 2400
+    },
+    {
+      "epoch": 1.5436028007638447,
+      "grad_norm": 1.1237064599990845,
+      "learning_rate": 0.00013508571428571427,
+      "loss": 0.1135,
+      "step": 2425
+    },
+    {
+      "epoch": 1.5595162316995546,
+      "grad_norm": 0.9983360767364502,
+      "learning_rate": 0.00013294285714285711,
+      "loss": 0.1053,
+      "step": 2450
+    },
+    {
+      "epoch": 1.5754296626352642,
+      "grad_norm": 0.941571831703186,
+      "learning_rate": 0.00013079999999999998,
+      "loss": 0.1104,
+      "step": 2475
+    },
+    {
+      "epoch": 1.5913430935709738,
+      "grad_norm": 0.8732393383979797,
+      "learning_rate": 0.00012865714285714285,
+      "loss": 0.1073,
+      "step": 2500
+    },
+    {
+      "epoch": 1.6072565245066837,
+      "grad_norm": 1.1299751996994019,
+      "learning_rate": 0.0001265142857142857,
+      "loss": 0.1121,
+      "step": 2525
+    },
+    {
+      "epoch": 1.6231699554423935,
+      "grad_norm": 0.8921777606010437,
+      "learning_rate": 0.00012437142857142855,
+      "loss": 0.1097,
+      "step": 2550
+    },
+    {
+      "epoch": 1.6390833863781031,
+      "grad_norm": 0.8513890504837036,
+      "learning_rate": 0.00012222857142857142,
+      "loss": 0.1082,
+      "step": 2575
+    },
+    {
+      "epoch": 1.6549968173138128,
+      "grad_norm": 1.0893051624298096,
+      "learning_rate": 0.00012008571428571428,
+      "loss": 0.1029,
+      "step": 2600
+    },
+    {
+      "epoch": 1.6709102482495226,
+      "grad_norm": 0.8744276165962219,
+      "learning_rate": 0.00011794285714285713,
+      "loss": 0.1086,
+      "step": 2625
+    },
+    {
+      "epoch": 1.6868236791852325,
+      "grad_norm": 1.1604117155075073,
+      "learning_rate": 0.0001158,
+      "loss": 0.1008,
+      "step": 2650
+    },
+    {
+      "epoch": 1.702737110120942,
+      "grad_norm": 0.9912447929382324,
+      "learning_rate": 0.00011365714285714284,
+      "loss": 0.1022,
+      "step": 2675
+    },
+    {
+      "epoch": 1.7186505410566517,
+      "grad_norm": 1.088443636894226,
+      "learning_rate": 0.0001115142857142857,
+      "loss": 0.1029,
+      "step": 2700
+    },
+    {
+      "epoch": 1.7345639719923616,
+      "grad_norm": 1.1775622367858887,
+      "learning_rate": 0.00010937142857142856,
+      "loss": 0.1041,
+      "step": 2725
+    },
+    {
+      "epoch": 1.7504774029280714,
+      "grad_norm": 1.1188244819641113,
+      "learning_rate": 0.00010722857142857142,
+      "loss": 0.1068,
+      "step": 2750
+    },
+    {
+      "epoch": 1.766390833863781,
+      "grad_norm": 0.9570010900497437,
+      "learning_rate": 0.00010508571428571429,
+      "loss": 0.098,
+      "step": 2775
+    },
+    {
+      "epoch": 1.7823042647994907,
+      "grad_norm": 1.4812625646591187,
+      "learning_rate": 0.00010294285714285713,
+      "loss": 0.0949,
+      "step": 2800
+    },
+    {
+      "epoch": 1.7982176957352005,
+      "grad_norm": 0.8738514184951782,
+      "learning_rate": 0.0001008,
+      "loss": 0.1003,
+      "step": 2825
+    },
+    {
+      "epoch": 1.8141311266709104,
+      "grad_norm": 0.9198557734489441,
+      "learning_rate": 9.865714285714285e-05,
+      "loss": 0.0913,
+      "step": 2850
+    },
+    {
+      "epoch": 1.83004455760662,
+      "grad_norm": 0.8232097625732422,
+      "learning_rate": 9.65142857142857e-05,
+      "loss": 0.1017,
+      "step": 2875
+    },
+    {
+      "epoch": 1.8459579885423296,
+      "grad_norm": 0.7927132844924927,
+      "learning_rate": 9.437142857142856e-05,
+      "loss": 0.1006,
+      "step": 2900
+    },
+    {
+      "epoch": 1.8618714194780395,
+      "grad_norm": 0.8228179812431335,
+      "learning_rate": 9.222857142857142e-05,
+      "loss": 0.0954,
+      "step": 2925
+    },
+    {
+      "epoch": 1.8777848504137493,
+      "grad_norm": 1.0207316875457764,
+      "learning_rate": 9.008571428571428e-05,
+      "loss": 0.1015,
+      "step": 2950
+    },
+    {
+      "epoch": 1.893698281349459,
+      "grad_norm": 1.3909655809402466,
+      "learning_rate": 8.794285714285713e-05,
+      "loss": 0.0966,
+      "step": 2975
+    },
+    {
+      "epoch": 1.9096117122851686,
+      "grad_norm": 0.8367329835891724,
+      "learning_rate": 8.579999999999998e-05,
+      "loss": 0.1022,
+      "step": 3000
+    },
+    {
+      "epoch": 1.9096117122851686,
+      "eval_loss": 0.11972030252218246,
+      "eval_runtime": 3680.6627,
+      "eval_samples_per_second": 2.269,
+      "eval_steps_per_second": 0.284,
+      "eval_wer": 39.05306890334718,
+      "step": 3000
+    },
+    {
+      "epoch": 1.9255251432208784,
+      "grad_norm": 0.8776394724845886,
+      "learning_rate": 8.365714285714285e-05,
+      "loss": 0.0951,
+      "step": 3025
+    },
+    {
+      "epoch": 1.9414385741565883,
+      "grad_norm": 0.7883875966072083,
+      "learning_rate": 8.151428571428572e-05,
+      "loss": 0.0887,
+      "step": 3050
+    },
+    {
+      "epoch": 1.957352005092298,
+      "grad_norm": 0.9402434229850769,
+      "learning_rate": 7.937142857142856e-05,
+      "loss": 0.0989,
+      "step": 3075
+    },
+    {
+      "epoch": 1.9732654360280075,
+      "grad_norm": 0.9308575391769409,
+      "learning_rate": 7.722857142857143e-05,
+      "loss": 0.0913,
+      "step": 3100
+    },
+    {
+      "epoch": 1.9891788669637174,
+      "grad_norm": 0.8554713129997253,
+      "learning_rate": 7.508571428571428e-05,
+      "loss": 0.0841,
+      "step": 3125
+    },
+    {
+      "epoch": 2.0050922978994272,
+      "grad_norm": 0.6500595211982727,
+      "learning_rate": 7.294285714285713e-05,
+      "loss": 0.0778,
+      "step": 3150
+    },
+    {
+      "epoch": 2.021005728835137,
+      "grad_norm": 0.8015878200531006,
+      "learning_rate": 7.079999999999999e-05,
+      "loss": 0.0485,
+      "step": 3175
+    },
+    {
+      "epoch": 2.0369191597708465,
+      "grad_norm": 0.9871166944503784,
+      "learning_rate": 6.865714285714285e-05,
+      "loss": 0.0538,
+      "step": 3200
+    },
+    {
+      "epoch": 2.0528325907065565,
+      "grad_norm": 0.7539300918579102,
+      "learning_rate": 6.65142857142857e-05,
+      "loss": 0.0567,
+      "step": 3225
+    },
+    {
+      "epoch": 2.068746021642266,
+      "grad_norm": 0.7191395163536072,
+      "learning_rate": 6.437142857142857e-05,
+      "loss": 0.0474,
+      "step": 3250
+    },
+    {
+      "epoch": 2.084659452577976,
+      "grad_norm": 0.8346642851829529,
+      "learning_rate": 6.222857142857143e-05,
+      "loss": 0.0565,
+      "step": 3275
+    },
+    {
+      "epoch": 2.1005728835136854,
+      "grad_norm": 0.890232264995575,
+      "learning_rate": 6.008571428571428e-05,
+      "loss": 0.0513,
+      "step": 3300
+    },
+    {
+      "epoch": 2.1164863144493955,
+      "grad_norm": 0.7644281983375549,
+      "learning_rate": 5.794285714285714e-05,
+      "loss": 0.0494,
+      "step": 3325
+    },
+    {
+      "epoch": 2.132399745385105,
+      "grad_norm": 1.0637160539627075,
+      "learning_rate": 5.5799999999999994e-05,
+      "loss": 0.0481,
+      "step": 3350
+    },
+    {
+      "epoch": 2.1483131763208148,
+      "grad_norm": 0.7097823619842529,
+      "learning_rate": 5.3657142857142855e-05,
+      "loss": 0.0466,
+      "step": 3375
+    },
+    {
+      "epoch": 2.1642266072565244,
+      "grad_norm": 1.0402812957763672,
+      "learning_rate": 5.151428571428571e-05,
+      "loss": 0.053,
+      "step": 3400
+    },
+    {
+      "epoch": 2.1801400381922345,
+      "grad_norm": 1.0634698867797852,
+      "learning_rate": 4.937142857142856e-05,
+      "loss": 0.0486,
+      "step": 3425
+    },
+    {
+      "epoch": 2.196053469127944,
+      "grad_norm": 0.5193982720375061,
+      "learning_rate": 4.722857142857142e-05,
+      "loss": 0.0468,
+      "step": 3450
+    },
+    {
+      "epoch": 2.2119669000636537,
+      "grad_norm": 0.6877008676528931,
+      "learning_rate": 4.5085714285714275e-05,
+      "loss": 0.0489,
+      "step": 3475
+    },
+    {
+      "epoch": 2.2278803309993633,
+      "grad_norm": 0.7828670740127563,
+      "learning_rate": 4.294285714285714e-05,
+      "loss": 0.0461,
+      "step": 3500
+    },
+    {
+      "epoch": 2.2437937619350734,
+      "grad_norm": 0.714133620262146,
+      "learning_rate": 4.08e-05,
+      "loss": 0.047,
+      "step": 3525
+    },
+    {
+      "epoch": 2.259707192870783,
+      "grad_norm": 0.6755720376968384,
+      "learning_rate": 3.8657142857142856e-05,
+      "loss": 0.0456,
+      "step": 3550
+    },
+    {
+      "epoch": 2.2756206238064927,
+      "grad_norm": 0.908721387386322,
+      "learning_rate": 3.651428571428571e-05,
+      "loss": 0.0427,
+      "step": 3575
+    },
+    {
+      "epoch": 2.2915340547422023,
+      "grad_norm": 0.703593373298645,
+      "learning_rate": 3.437142857142857e-05,
+      "loss": 0.049,
+      "step": 3600
+    },
+    {
+      "epoch": 2.307447485677912,
+      "grad_norm": 0.46566998958587646,
+      "learning_rate": 3.222857142857142e-05,
+      "loss": 0.0436,
+      "step": 3625
+    },
+    {
+      "epoch": 2.323360916613622,
+      "grad_norm": 0.7065137028694153,
+      "learning_rate": 3.0085714285714283e-05,
+      "loss": 0.0429,
+      "step": 3650
+    },
+    {
+      "epoch": 2.3392743475493316,
+      "grad_norm": 0.7245275974273682,
+      "learning_rate": 2.794285714285714e-05,
+      "loss": 0.0436,
+      "step": 3675
+    },
+    {
+      "epoch": 2.3551877784850412,
+      "grad_norm": 0.5561261177062988,
+      "learning_rate": 2.5799999999999997e-05,
+      "loss": 0.0466,
+      "step": 3700
+    },
+    {
+      "epoch": 2.3711012094207513,
+      "grad_norm": 0.6039161086082458,
+      "learning_rate": 2.3657142857142857e-05,
+      "loss": 0.0416,
+      "step": 3725
+    },
+    {
+      "epoch": 2.387014640356461,
+      "grad_norm": 0.5791841149330139,
+      "learning_rate": 2.1514285714285714e-05,
+      "loss": 0.042,
+      "step": 3750
+    },
+    {
+      "epoch": 2.4029280712921706,
+      "grad_norm": 0.5699833631515503,
+      "learning_rate": 1.937142857142857e-05,
+      "loss": 0.0433,
+      "step": 3775
+    },
+    {
+      "epoch": 2.41884150222788,
+      "grad_norm": 0.8795793056488037,
+      "learning_rate": 1.7228571428571428e-05,
+      "loss": 0.0428,
+      "step": 3800
+    },
+    {
+      "epoch": 2.4347549331635903,
+      "grad_norm": 0.7127372622489929,
+      "learning_rate": 1.5085714285714285e-05,
+      "loss": 0.0386,
+      "step": 3825
+    },
+    {
+      "epoch": 2.4506683640993,
+      "grad_norm": 0.7226797342300415,
+      "learning_rate": 1.2942857142857141e-05,
+      "loss": 0.0443,
+      "step": 3850
+    },
+    {
+      "epoch": 2.4665817950350095,
+      "grad_norm": 0.738706648349762,
+      "learning_rate": 1.0799999999999998e-05,
+      "loss": 0.0426,
+      "step": 3875
+    },
+    {
+      "epoch": 2.482495225970719,
+      "grad_norm": 0.9381580352783203,
+      "learning_rate": 8.657142857142855e-06,
+      "loss": 0.0436,
+      "step": 3900
+    },
+    {
+      "epoch": 2.498408656906429,
+      "grad_norm": 0.863370418548584,
+      "learning_rate": 6.514285714285714e-06,
+      "loss": 0.0436,
+      "step": 3925
+    },
+    {
+      "epoch": 2.514322087842139,
+      "grad_norm": 0.7624787092208862,
+      "learning_rate": 4.371428571428571e-06,
+      "loss": 0.0427,
+      "step": 3950
+    },
+    {
+      "epoch": 2.5302355187778485,
+      "grad_norm": 0.6790493726730347,
+      "learning_rate": 2.228571428571428e-06,
+      "loss": 0.0423,
+      "step": 3975
+    },
+    {
+      "epoch": 2.546148949713558,
+      "grad_norm": 0.7205957174301147,
+      "learning_rate": 8.571428571428572e-08,
+      "loss": 0.046,
+      "step": 4000
+    },
+    {
+      "epoch": 2.546148949713558,
+      "eval_loss": 0.10670246928930283,
+      "eval_runtime": 3687.1388,
+      "eval_samples_per_second": 2.265,
+      "eval_steps_per_second": 0.283,
+      "eval_wer": 34.549659010739205,
+      "step": 4000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 1000,
+  "total_flos": 4.15026143428608e+18,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf5b63d3f1c4d65ecbb09db96f5af94aedb9a779b82935c28e2defd51e78b2e4
-size 5240

 version https://git-lfs.github.com/spec/v1
+oid sha256:732ebdff1db4d788549bb01f769a1897c9bcdd43f11ac888330d69d60a4f1e52
+size 5112