DewiBrynJones commited on
Commit
d086623
1 Parent(s): 74b237f

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.5226
21
  - Wer: 0.4001
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-clean-with-ccv
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-CLEAN-WITH-CCV - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.5226
23
  - Wer: 0.4001
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 0.38476337052712584,
3
- "eval_loss": 0.5865370631217957,
4
- "eval_runtime": 179.3337,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 39.156,
7
- "eval_steps_per_second": 2.448,
8
- "eval_wer": 0.44523578575319894,
9
- "total_flos": 6.835515215409967e+18,
10
- "train_loss": 0.9274428431193034,
11
- "train_runtime": 4544.4955,
12
  "train_samples": 124748,
13
- "train_samples_per_second": 10.562,
14
- "train_steps_per_second": 1.32
15
  }
 
1
  {
2
+ "epoch": 0.6412722842118763,
3
+ "eval_loss": 0.5226185917854309,
4
+ "eval_runtime": 188.9199,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.169,
7
+ "eval_steps_per_second": 0.582,
8
+ "eval_wer": 0.40009832711037885,
9
+ "total_flos": 1.1393778193380235e+19,
10
+ "train_loss": 0.7283544036865235,
11
+ "train_runtime": 7737.7643,
12
  "train_samples": 124748,
13
+ "train_samples_per_second": 10.339,
14
+ "train_steps_per_second": 1.292
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.38476337052712584,
3
- "eval_loss": 0.5865370631217957,
4
- "eval_runtime": 179.3337,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 39.156,
7
- "eval_steps_per_second": 2.448,
8
- "eval_wer": 0.44523578575319894
9
  }
 
1
  {
2
+ "epoch": 0.6412722842118763,
3
+ "eval_loss": 0.5226185917854309,
4
+ "eval_runtime": 188.9199,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.169,
7
+ "eval_steps_per_second": 0.582,
8
+ "eval_wer": 0.40009832711037885
9
  }
runs/Aug30_21-53-10_4b35055fdbcb/events.out.tfevents.1725061483.4b35055fdbcb.318.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bce9a3f5d305870094b0b15d0e6f2c1a88995a9bae5068167ab796c85fe77df5
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.38476337052712584,
3
- "total_flos": 6.835515215409967e+18,
4
- "train_loss": 0.9274428431193034,
5
- "train_runtime": 4544.4955,
6
  "train_samples": 124748,
7
- "train_samples_per_second": 10.562,
8
- "train_steps_per_second": 1.32
9
  }
 
1
  {
2
+ "epoch": 0.6412722842118763,
3
+ "total_flos": 1.1393778193380235e+19,
4
+ "train_loss": 0.7283544036865235,
5
+ "train_runtime": 7737.7643,
6
  "train_samples": 124748,
7
+ "train_samples_per_second": 10.339,
8
+ "train_steps_per_second": 1.292
9
  }
trainer_state.json CHANGED
@@ -1,217 +1,345 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.38476337052712584,
5
  "eval_steps": 500,
6
- "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03206361421059382,
13
- "grad_norm": 10.316516876220703,
14
  "learning_rate": 0.0002465,
15
- "loss": 4.7051,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.03206361421059382,
20
- "eval_loss": 1.6561740636825562,
21
- "eval_runtime": 191.0416,
22
- "eval_samples_per_second": 36.756,
23
- "eval_steps_per_second": 2.298,
24
- "eval_wer": 0.9354894431230816,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.06412722842118763,
29
- "grad_norm": 4.912766933441162,
30
- "learning_rate": 0.00027816666666666663,
31
- "loss": 1.0362,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 0.06412722842118763,
36
- "eval_loss": 1.1839842796325684,
37
- "eval_runtime": 175.5788,
38
- "eval_samples_per_second": 39.993,
39
- "eval_steps_per_second": 2.5,
40
- "eval_wer": 0.7977517639086356,
41
  "step": 1000
42
  },
43
  {
44
  "epoch": 0.09619084263178146,
45
- "grad_norm": 4.178534030914307,
46
- "learning_rate": 0.00025038888888888886,
47
- "loss": 0.811,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 0.09619084263178146,
52
- "eval_loss": 1.0081175565719604,
53
- "eval_runtime": 175.5534,
54
- "eval_samples_per_second": 39.999,
55
- "eval_steps_per_second": 2.501,
56
- "eval_wer": 0.7174690070290597,
57
  "step": 1500
58
  },
59
  {
60
  "epoch": 0.12825445684237527,
61
- "grad_norm": 7.957609176635742,
62
- "learning_rate": 0.0002226111111111111,
63
- "loss": 0.6903,
64
  "step": 2000
65
  },
66
  {
67
  "epoch": 0.12825445684237527,
68
- "eval_loss": 0.8935067057609558,
69
- "eval_runtime": 176.1028,
70
- "eval_samples_per_second": 39.874,
71
- "eval_steps_per_second": 2.493,
72
- "eval_wer": 0.6401094885661516,
73
  "step": 2000
74
  },
75
  {
76
  "epoch": 0.16031807105296908,
77
- "grad_norm": 6.541042327880859,
78
- "learning_rate": 0.00019483333333333332,
79
- "loss": 0.6238,
80
  "step": 2500
81
  },
82
  {
83
  "epoch": 0.16031807105296908,
84
- "eval_loss": 0.8059775233268738,
85
- "eval_runtime": 177.0587,
86
- "eval_samples_per_second": 39.659,
87
- "eval_steps_per_second": 2.479,
88
- "eval_wer": 0.5848868573858276,
89
  "step": 2500
90
  },
91
  {
92
  "epoch": 0.19238168526356292,
93
- "grad_norm": 5.036514759063721,
94
- "learning_rate": 0.00016705555555555554,
95
- "loss": 0.5649,
96
  "step": 3000
97
  },
98
  {
99
  "epoch": 0.19238168526356292,
100
- "eval_loss": 0.7770201563835144,
101
- "eval_runtime": 178.1454,
102
- "eval_samples_per_second": 39.417,
103
- "eval_steps_per_second": 2.464,
104
- "eval_wer": 0.5589231852668783,
105
  "step": 3000
106
  },
107
  {
108
  "epoch": 0.22444529947415673,
109
- "grad_norm": 4.140881538391113,
110
- "learning_rate": 0.00013927777777777777,
111
- "loss": 0.5309,
112
  "step": 3500
113
  },
114
  {
115
  "epoch": 0.22444529947415673,
116
- "eval_loss": 0.7264481782913208,
117
- "eval_runtime": 176.7301,
118
- "eval_samples_per_second": 39.733,
119
- "eval_steps_per_second": 2.484,
120
- "eval_wer": 0.5326539018589139,
121
  "step": 3500
122
  },
123
  {
124
  "epoch": 0.25650891368475054,
125
- "grad_norm": 6.205782413482666,
126
- "learning_rate": 0.00011155555555555555,
127
- "loss": 0.4892,
128
  "step": 4000
129
  },
130
  {
131
  "epoch": 0.25650891368475054,
132
- "eval_loss": 0.6865007877349854,
133
- "eval_runtime": 178.7785,
134
- "eval_samples_per_second": 39.278,
135
- "eval_steps_per_second": 2.456,
136
- "eval_wer": 0.5106366016024662,
137
  "step": 4000
138
  },
139
  {
140
  "epoch": 0.2885725278953444,
141
- "grad_norm": 9.618443489074707,
142
- "learning_rate": 8.388888888888888e-05,
143
- "loss": 0.4521,
144
  "step": 4500
145
  },
146
  {
147
  "epoch": 0.2885725278953444,
148
- "eval_loss": 0.6477864980697632,
149
- "eval_runtime": 177.7072,
150
- "eval_samples_per_second": 39.514,
151
- "eval_steps_per_second": 2.47,
152
- "eval_wer": 0.4861345486918508,
153
  "step": 4500
154
  },
155
  {
156
  "epoch": 0.32063614210593816,
157
- "grad_norm": 8.743291854858398,
158
- "learning_rate": 5.61111111111111e-05,
159
- "loss": 0.4309,
160
  "step": 5000
161
  },
162
  {
163
  "epoch": 0.32063614210593816,
164
- "eval_loss": 0.6221807599067688,
165
- "eval_runtime": 177.9115,
166
- "eval_samples_per_second": 39.469,
167
- "eval_steps_per_second": 2.468,
168
- "eval_wer": 0.4763416999960138,
169
  "step": 5000
170
  },
171
  {
172
  "epoch": 0.352699756316532,
173
- "grad_norm": 8.870036125183105,
174
- "learning_rate": 2.833333333333333e-05,
175
- "loss": 0.4055,
176
  "step": 5500
177
  },
178
  {
179
  "epoch": 0.352699756316532,
180
- "eval_loss": 0.5988152027130127,
181
- "eval_runtime": 178.6515,
182
- "eval_samples_per_second": 39.306,
183
- "eval_steps_per_second": 2.457,
184
- "eval_wer": 0.45257045668956536,
185
  "step": 5500
186
  },
187
  {
188
  "epoch": 0.38476337052712584,
189
- "grad_norm": 14.018965721130371,
190
- "learning_rate": 6.11111111111111e-07,
191
- "loss": 0.3896,
192
  "step": 6000
193
  },
194
  {
195
  "epoch": 0.38476337052712584,
196
- "eval_loss": 0.5865370631217957,
197
- "eval_runtime": 176.8507,
198
- "eval_samples_per_second": 39.706,
199
- "eval_steps_per_second": 2.482,
200
- "eval_wer": 0.44523578575319894,
201
  "step": 6000
202
  },
203
  {
204
- "epoch": 0.38476337052712584,
205
- "step": 6000,
206
- "total_flos": 6.835515215409967e+18,
207
- "train_loss": 0.9274428431193034,
208
- "train_runtime": 4544.4955,
209
- "train_samples_per_second": 10.562,
210
- "train_steps_per_second": 1.32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  }
212
  ],
213
  "logging_steps": 500,
214
- "max_steps": 6000,
215
  "num_input_tokens_seen": 0,
216
  "num_train_epochs": 1,
217
  "save_steps": 500,
@@ -227,7 +355,7 @@
227
  "attributes": {}
228
  }
229
  },
230
- "total_flos": 6.835515215409967e+18,
231
  "train_batch_size": 8,
232
  "trial_name": null,
233
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6412722842118763,
5
  "eval_steps": 500,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03206361421059382,
13
+ "grad_norm": 11.533838272094727,
14
  "learning_rate": 0.0002465,
15
+ "loss": 4.6618,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.03206361421059382,
20
+ "eval_loss": 1.5996026992797852,
21
+ "eval_runtime": 185.1067,
22
+ "eval_samples_per_second": 37.935,
23
+ "eval_steps_per_second": 0.594,
24
+ "eval_wer": 0.9161163448889834,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.06412722842118763,
29
+ "grad_norm": 4.801280975341797,
30
+ "learning_rate": 0.0002874574468085106,
31
+ "loss": 1.0278,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 0.06412722842118763,
36
+ "eval_loss": 1.1463252305984497,
37
+ "eval_runtime": 184.8935,
38
+ "eval_samples_per_second": 37.979,
39
+ "eval_steps_per_second": 0.595,
40
+ "eval_wer": 0.7792157748574922,
41
  "step": 1000
42
  },
43
  {
44
  "epoch": 0.09619084263178146,
45
+ "grad_norm": 4.746730327606201,
46
+ "learning_rate": 0.0002715,
47
+ "loss": 0.8164,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 0.09619084263178146,
52
+ "eval_loss": 1.0590689182281494,
53
+ "eval_runtime": 185.3865,
54
+ "eval_samples_per_second": 37.878,
55
+ "eval_steps_per_second": 0.593,
56
+ "eval_wer": 0.7363238948165668,
57
  "step": 1500
58
  },
59
  {
60
  "epoch": 0.12825445684237527,
61
+ "grad_norm": 21.847057342529297,
62
+ "learning_rate": 0.00025554255319148935,
63
+ "loss": 0.7124,
64
  "step": 2000
65
  },
66
  {
67
  "epoch": 0.12825445684237527,
68
+ "eval_loss": 0.9373884797096252,
69
+ "eval_runtime": 186.5741,
70
+ "eval_samples_per_second": 37.637,
71
+ "eval_steps_per_second": 0.59,
72
+ "eval_wer": 0.6622596632960842,
73
  "step": 2000
74
  },
75
  {
76
  "epoch": 0.16031807105296908,
77
+ "grad_norm": 4.238761901855469,
78
+ "learning_rate": 0.0002395851063829787,
79
+ "loss": 0.6566,
80
  "step": 2500
81
  },
82
  {
83
  "epoch": 0.16031807105296908,
84
+ "eval_loss": 0.8721805810928345,
85
+ "eval_runtime": 187.0392,
86
+ "eval_samples_per_second": 37.543,
87
+ "eval_steps_per_second": 0.588,
88
+ "eval_wer": 0.615168949893036,
89
  "step": 2500
90
  },
91
  {
92
  "epoch": 0.19238168526356292,
93
+ "grad_norm": 5.65878963470459,
94
+ "learning_rate": 0.00022362765957446805,
95
+ "loss": 0.6101,
96
  "step": 3000
97
  },
98
  {
99
  "epoch": 0.19238168526356292,
100
+ "eval_loss": 0.8194323182106018,
101
+ "eval_runtime": 187.1888,
102
+ "eval_samples_per_second": 37.513,
103
+ "eval_steps_per_second": 0.588,
104
+ "eval_wer": 0.5927264513214366,
105
  "step": 3000
106
  },
107
  {
108
  "epoch": 0.22444529947415673,
109
+ "grad_norm": 6.294471740722656,
110
+ "learning_rate": 0.00020767021276595744,
111
+ "loss": 0.5777,
112
  "step": 3500
113
  },
114
  {
115
  "epoch": 0.22444529947415673,
116
+ "eval_loss": 0.7799355387687683,
117
+ "eval_runtime": 187.6349,
118
+ "eval_samples_per_second": 37.424,
119
+ "eval_steps_per_second": 0.586,
120
+ "eval_wer": 0.5707490134070343,
121
  "step": 3500
122
  },
123
  {
124
  "epoch": 0.25650891368475054,
125
+ "grad_norm": 3.848400115966797,
126
+ "learning_rate": 0.0001917446808510638,
127
+ "loss": 0.5431,
128
  "step": 4000
129
  },
130
  {
131
  "epoch": 0.25650891368475054,
132
+ "eval_loss": 0.7453157901763916,
133
+ "eval_runtime": 187.5991,
134
+ "eval_samples_per_second": 37.431,
135
+ "eval_steps_per_second": 0.586,
136
+ "eval_wer": 0.550233194700966,
137
  "step": 4000
138
  },
139
  {
140
  "epoch": 0.2885725278953444,
141
+ "grad_norm": 9.198270797729492,
142
+ "learning_rate": 0.00017585106382978722,
143
+ "loss": 0.512,
144
  "step": 4500
145
  },
146
  {
147
  "epoch": 0.2885725278953444,
148
+ "eval_loss": 0.7209311127662659,
149
+ "eval_runtime": 188.2502,
150
+ "eval_samples_per_second": 37.301,
151
+ "eval_steps_per_second": 0.584,
152
+ "eval_wer": 0.5346735938558843,
153
  "step": 4500
154
  },
155
  {
156
  "epoch": 0.32063614210593816,
157
+ "grad_norm": 7.854990482330322,
158
+ "learning_rate": 0.00015989361702127658,
159
+ "loss": 0.4953,
160
  "step": 5000
161
  },
162
  {
163
  "epoch": 0.32063614210593816,
164
+ "eval_loss": 0.6942155957221985,
165
+ "eval_runtime": 187.3133,
166
+ "eval_samples_per_second": 37.488,
167
+ "eval_steps_per_second": 0.587,
168
+ "eval_wer": 0.5225022920846676,
169
  "step": 5000
170
  },
171
  {
172
  "epoch": 0.352699756316532,
173
+ "grad_norm": 10.654298782348633,
174
+ "learning_rate": 0.00014393617021276595,
175
+ "loss": 0.4746,
176
  "step": 5500
177
  },
178
  {
179
  "epoch": 0.352699756316532,
180
+ "eval_loss": 0.6680377721786499,
181
+ "eval_runtime": 188.6546,
182
+ "eval_samples_per_second": 37.221,
183
+ "eval_steps_per_second": 0.583,
184
+ "eval_wer": 0.495714798230112,
185
  "step": 5500
186
  },
187
  {
188
  "epoch": 0.38476337052712584,
189
+ "grad_norm": 19.936861038208008,
190
+ "learning_rate": 0.00012801063829787234,
191
+ "loss": 0.4535,
192
  "step": 6000
193
  },
194
  {
195
  "epoch": 0.38476337052712584,
196
+ "eval_loss": 0.6434958577156067,
197
+ "eval_runtime": 188.0277,
198
+ "eval_samples_per_second": 37.346,
199
+ "eval_steps_per_second": 0.585,
200
+ "eval_wer": 0.47308627539563375,
201
  "step": 6000
202
  },
203
  {
204
+ "epoch": 0.4168269847377196,
205
+ "grad_norm": 6.950649261474609,
206
+ "learning_rate": 0.0001120531914893617,
207
+ "loss": 0.4249,
208
+ "step": 6500
209
+ },
210
+ {
211
+ "epoch": 0.4168269847377196,
212
+ "eval_loss": 0.6293027400970459,
213
+ "eval_runtime": 187.8772,
214
+ "eval_samples_per_second": 37.375,
215
+ "eval_steps_per_second": 0.585,
216
+ "eval_wer": 0.4816832538301067,
217
+ "step": 6500
218
+ },
219
+ {
220
+ "epoch": 0.44889059894831346,
221
+ "grad_norm": 5.574142932891846,
222
+ "learning_rate": 9.612765957446806e-05,
223
+ "loss": 0.4065,
224
+ "step": 7000
225
+ },
226
+ {
227
+ "epoch": 0.44889059894831346,
228
+ "eval_loss": 0.5997486710548401,
229
+ "eval_runtime": 188.7633,
230
+ "eval_samples_per_second": 37.2,
231
+ "eval_steps_per_second": 0.583,
232
+ "eval_wer": 0.44947448145736724,
233
+ "step": 7000
234
+ },
235
+ {
236
+ "epoch": 0.4809542131589073,
237
+ "grad_norm": 5.602737903594971,
238
+ "learning_rate": 8.017021276595744e-05,
239
+ "loss": 0.393,
240
+ "step": 7500
241
+ },
242
+ {
243
+ "epoch": 0.4809542131589073,
244
+ "eval_loss": 0.5802670121192932,
245
+ "eval_runtime": 189.0166,
246
+ "eval_samples_per_second": 37.15,
247
+ "eval_steps_per_second": 0.582,
248
+ "eval_wer": 0.44260487117819797,
249
+ "step": 7500
250
+ },
251
+ {
252
+ "epoch": 0.5130178273695011,
253
+ "grad_norm": 15.019088745117188,
254
+ "learning_rate": 6.424468085106383e-05,
255
+ "loss": 0.3808,
256
+ "step": 8000
257
+ },
258
+ {
259
+ "epoch": 0.5130178273695011,
260
+ "eval_loss": 0.5661880970001221,
261
+ "eval_runtime": 189.1322,
262
+ "eval_samples_per_second": 37.127,
263
+ "eval_steps_per_second": 0.582,
264
+ "eval_wer": 0.4299153589603901,
265
+ "step": 8000
266
+ },
267
+ {
268
+ "epoch": 0.5450814415800949,
269
+ "grad_norm": 3.79109525680542,
270
+ "learning_rate": 4.8287234042553194e-05,
271
+ "loss": 0.3722,
272
+ "step": 8500
273
+ },
274
+ {
275
+ "epoch": 0.5450814415800949,
276
+ "eval_loss": 0.553141176700592,
277
+ "eval_runtime": 188.8506,
278
+ "eval_samples_per_second": 37.183,
279
+ "eval_steps_per_second": 0.582,
280
+ "eval_wer": 0.4194182755550831,
281
+ "step": 8500
282
+ },
283
+ {
284
+ "epoch": 0.5771450557906888,
285
+ "grad_norm": 12.16059398651123,
286
+ "learning_rate": 3.232978723404255e-05,
287
+ "loss": 0.3622,
288
+ "step": 9000
289
+ },
290
+ {
291
+ "epoch": 0.5771450557906888,
292
+ "eval_loss": 0.5399113297462463,
293
+ "eval_runtime": 188.6804,
294
+ "eval_samples_per_second": 37.216,
295
+ "eval_steps_per_second": 0.583,
296
+ "eval_wer": 0.40727354867856336,
297
+ "step": 9000
298
+ },
299
+ {
300
+ "epoch": 0.6092086700012825,
301
+ "grad_norm": 7.863190650939941,
302
+ "learning_rate": 1.6372340425531912e-05,
303
+ "loss": 0.3526,
304
+ "step": 9500
305
+ },
306
+ {
307
+ "epoch": 0.6092086700012825,
308
+ "eval_loss": 0.5277913808822632,
309
+ "eval_runtime": 188.1946,
310
+ "eval_samples_per_second": 37.312,
311
+ "eval_steps_per_second": 0.585,
312
+ "eval_wer": 0.40279567892212226,
313
+ "step": 9500
314
+ },
315
+ {
316
+ "epoch": 0.6412722842118763,
317
+ "grad_norm": 5.124056339263916,
318
+ "learning_rate": 4.1489361702127654e-07,
319
+ "loss": 0.3337,
320
+ "step": 10000
321
+ },
322
+ {
323
+ "epoch": 0.6412722842118763,
324
+ "eval_loss": 0.5226185917854309,
325
+ "eval_runtime": 188.4518,
326
+ "eval_samples_per_second": 37.262,
327
+ "eval_steps_per_second": 0.584,
328
+ "eval_wer": 0.40009832711037885,
329
+ "step": 10000
330
+ },
331
+ {
332
+ "epoch": 0.6412722842118763,
333
+ "step": 10000,
334
+ "total_flos": 1.1393778193380235e+19,
335
+ "train_loss": 0.7283544036865235,
336
+ "train_runtime": 7737.7643,
337
+ "train_samples_per_second": 10.339,
338
+ "train_steps_per_second": 1.292
339
  }
340
  ],
341
  "logging_steps": 500,
342
+ "max_steps": 10000,
343
  "num_input_tokens_seen": 0,
344
  "num_train_epochs": 1,
345
  "save_steps": 500,
 
355
  "attributes": {}
356
  }
357
  },
358
+ "total_flos": 1.1393778193380235e+19,
359
  "train_batch_size": 8,
360
  "trial_name": null,
361
  "trial_params": null