DewiBrynJones commited on
Commit
ba4e5ca
1 Parent(s): d02b06f

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.7622
21
  - Wer: 0.5187
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-clean-with-ccv
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-CLEAN-WITH-CCV - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.7622
23
  - Wer: 0.5187
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 1.0080645161290323,
3
- "eval_loss": 0.5417820811271667,
4
- "eval_runtime": 188.0972,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 37.332,
7
- "eval_steps_per_second": 2.334,
8
- "eval_wer": 0.4178370693206128,
9
- "total_flos": 6.709869296482936e+18,
10
- "train_loss": 1.3100875549316406,
11
- "train_runtime": 4950.9591,
12
  "train_samples": 47607,
13
- "train_samples_per_second": 9.695,
14
- "train_steps_per_second": 0.606
15
  }
 
1
  {
2
+ "epoch": 1.6803898504453034,
3
+ "eval_loss": 0.7622119784355164,
4
+ "eval_runtime": 181.0985,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 38.774,
7
+ "eval_steps_per_second": 2.424,
8
+ "eval_wer": 0.518741944485045,
9
+ "total_flos": 1.1109120866520904e+19,
10
+ "train_loss": 0.9622068939208984,
11
+ "train_runtime": 7532.6917,
12
  "train_samples": 47607,
13
+ "train_samples_per_second": 10.62,
14
+ "train_steps_per_second": 1.328
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0080645161290323,
3
- "eval_loss": 0.5417820811271667,
4
- "eval_runtime": 188.0972,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 37.332,
7
- "eval_steps_per_second": 2.334,
8
- "eval_wer": 0.4178370693206128
9
  }
 
1
  {
2
+ "epoch": 1.6803898504453034,
3
+ "eval_loss": 0.7622119784355164,
4
+ "eval_runtime": 181.0985,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 38.774,
7
+ "eval_steps_per_second": 2.424,
8
+ "eval_wer": 0.518741944485045
9
  }
runs/Aug29_22-44-30_6d77da852b30/events.out.tfevents.1724976520.6d77da852b30.1954.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19f8712d7f4a903ff1562719ce4868b4e22853555b545994fdd7895e59221dce
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0080645161290323,
3
- "total_flos": 6.709869296482936e+18,
4
- "train_loss": 1.3100875549316406,
5
- "train_runtime": 4950.9591,
6
  "train_samples": 47607,
7
- "train_samples_per_second": 9.695,
8
- "train_steps_per_second": 0.606
9
  }
 
1
  {
2
+ "epoch": 1.6803898504453034,
3
+ "total_flos": 1.1109120866520904e+19,
4
+ "train_loss": 0.9622068939208984,
5
+ "train_runtime": 7532.6917,
6
  "train_samples": 47607,
7
+ "train_samples_per_second": 10.62,
8
+ "train_steps_per_second": 1.328
9
  }
trainer_state.json CHANGED
@@ -1,202 +1,345 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0080645161290323,
5
- "eval_steps": 200,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.06720430107526881,
13
- "eval_loss": 3.1445324420928955,
14
- "eval_runtime": 187.2734,
15
- "eval_samples_per_second": 37.496,
16
- "eval_steps_per_second": 2.344,
17
- "eval_wer": 1.0,
18
- "step": 200
19
  },
20
  {
21
- "epoch": 0.13440860215053763,
22
- "eval_loss": 2.7407212257385254,
23
- "eval_runtime": 185.5858,
24
- "eval_samples_per_second": 37.837,
25
- "eval_steps_per_second": 2.365,
26
- "eval_wer": 0.9999867125526515,
27
- "step": 400
28
  },
29
  {
30
- "epoch": 0.16801075268817203,
31
- "grad_norm": 8.384466171264648,
32
- "learning_rate": 0.00027866666666666665,
33
- "loss": 4.0188,
34
- "step": 500
35
  },
36
  {
37
- "epoch": 0.20161290322580644,
38
- "eval_loss": 1.2700377702713013,
39
- "eval_runtime": 187.6926,
40
- "eval_samples_per_second": 37.412,
41
- "eval_steps_per_second": 2.339,
42
- "eval_wer": 0.8484433755431244,
43
- "step": 600
44
  },
45
  {
46
- "epoch": 0.26881720430107525,
47
- "eval_loss": 0.9953192472457886,
48
- "eval_runtime": 187.642,
49
- "eval_samples_per_second": 37.422,
50
- "eval_steps_per_second": 2.34,
51
- "eval_wer": 0.7435389787267968,
52
- "step": 800
53
  },
54
  {
55
- "epoch": 0.33602150537634407,
56
- "grad_norm": 5.077725410461426,
57
- "learning_rate": 0.0002232222222222222,
58
- "loss": 1.0707,
59
- "step": 1000
 
 
60
  },
61
  {
62
- "epoch": 0.33602150537634407,
63
- "eval_loss": 0.8646696209907532,
64
- "eval_runtime": 187.682,
65
- "eval_samples_per_second": 37.414,
66
- "eval_steps_per_second": 2.339,
67
- "eval_wer": 0.6541277455188084,
68
- "step": 1000
69
  },
70
  {
71
- "epoch": 0.4032258064516129,
72
- "eval_loss": 0.7888585925102234,
73
- "eval_runtime": 187.1558,
74
- "eval_samples_per_second": 37.52,
75
- "eval_steps_per_second": 2.346,
76
- "eval_wer": 0.5784025830797646,
77
- "step": 1200
78
  },
79
  {
80
- "epoch": 0.47043010752688175,
81
- "eval_loss": 0.7465152740478516,
82
- "eval_runtime": 185.9206,
83
- "eval_samples_per_second": 37.769,
84
- "eval_steps_per_second": 2.361,
85
- "eval_wer": 0.5440412442365697,
86
- "step": 1400
87
  },
88
  {
89
- "epoch": 0.5040322580645161,
90
- "grad_norm": 7.621553897857666,
91
- "learning_rate": 0.00016777777777777776,
92
- "loss": 0.8175,
93
- "step": 1500
 
 
 
 
 
 
 
 
 
94
  },
95
  {
96
- "epoch": 0.5376344086021505,
97
- "eval_loss": 0.68277907371521,
98
- "eval_runtime": 187.6331,
99
- "eval_samples_per_second": 37.424,
100
- "eval_steps_per_second": 2.34,
101
- "eval_wer": 0.5042453394278426,
102
- "step": 1600
103
  },
104
  {
105
- "epoch": 0.6048387096774194,
106
- "eval_loss": 0.6549283862113953,
107
- "eval_runtime": 186.6705,
108
- "eval_samples_per_second": 37.617,
109
- "eval_steps_per_second": 2.352,
110
- "eval_wer": 0.4952098752308694,
111
- "step": 1800
112
  },
113
  {
114
- "epoch": 0.6720430107526881,
115
- "grad_norm": 6.983826637268066,
116
- "learning_rate": 0.00011233333333333333,
117
- "loss": 0.7148,
118
- "step": 2000
 
 
119
  },
120
  {
121
- "epoch": 0.6720430107526881,
122
- "eval_loss": 0.6289859414100647,
123
- "eval_runtime": 188.3084,
124
- "eval_samples_per_second": 37.29,
125
- "eval_steps_per_second": 2.331,
126
- "eval_wer": 0.4905858435535949,
127
- "step": 2000
128
  },
129
  {
130
- "epoch": 0.739247311827957,
131
- "eval_loss": 0.6112708449363708,
132
- "eval_runtime": 188.0636,
133
- "eval_samples_per_second": 37.338,
134
- "eval_steps_per_second": 2.334,
135
- "eval_wer": 0.45763297412934,
136
- "step": 2200
137
  },
138
  {
139
- "epoch": 0.8064516129032258,
140
- "eval_loss": 0.5718730688095093,
141
- "eval_runtime": 187.8424,
142
- "eval_samples_per_second": 37.382,
143
- "eval_steps_per_second": 2.337,
144
- "eval_wer": 0.4404788796024396,
145
- "step": 2400
146
  },
147
  {
148
- "epoch": 0.8400537634408602,
149
- "grad_norm": 6.760587215423584,
150
- "learning_rate": 5.6999999999999996e-05,
151
- "loss": 0.6374,
152
- "step": 2500
 
 
153
  },
154
  {
155
- "epoch": 0.8736559139784946,
156
- "eval_loss": 0.5643858313560486,
157
- "eval_runtime": 188.0058,
158
- "eval_samples_per_second": 37.35,
159
- "eval_steps_per_second": 2.335,
160
- "eval_wer": 0.431376978168724,
161
- "step": 2600
162
  },
163
  {
164
- "epoch": 0.9408602150537635,
165
- "eval_loss": 0.5482733249664307,
166
- "eval_runtime": 188.1104,
167
- "eval_samples_per_second": 37.329,
168
- "eval_steps_per_second": 2.334,
169
- "eval_wer": 0.41896650234523447,
170
- "step": 2800
171
  },
172
  {
173
- "epoch": 1.0080645161290323,
174
- "grad_norm": 1.7233390808105469,
175
- "learning_rate": 1.4444444444444445e-06,
176
- "loss": 0.6013,
177
- "step": 3000
178
  },
179
  {
180
- "epoch": 1.0080645161290323,
181
- "eval_loss": 0.5417820811271667,
182
- "eval_runtime": 189.023,
183
- "eval_samples_per_second": 37.149,
184
- "eval_steps_per_second": 2.322,
185
- "eval_wer": 0.4178370693206128,
186
- "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  },
188
  {
189
- "epoch": 1.0080645161290323,
190
- "step": 3000,
191
- "total_flos": 6.709869296482936e+18,
192
- "train_loss": 1.3100875549316406,
193
- "train_runtime": 4950.9591,
194
- "train_samples_per_second": 9.695,
195
- "train_steps_per_second": 0.606
196
  }
197
  ],
198
  "logging_steps": 500,
199
- "max_steps": 3000,
200
  "num_input_tokens_seen": 0,
201
  "num_train_epochs": 2,
202
  "save_steps": 500,
@@ -212,8 +355,8 @@
212
  "attributes": {}
213
  }
214
  },
215
- "total_flos": 6.709869296482936e+18,
216
- "train_batch_size": 16,
217
  "trial_name": null,
218
  "trial_params": null
219
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.6803898504453034,
5
+ "eval_steps": 500,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.08401949252226516,
13
+ "grad_norm": 4.783174514770508,
14
+ "learning_rate": 0.00029519999999999997,
15
+ "loss": 4.5088,
16
+ "step": 500
 
 
17
  },
18
  {
19
+ "epoch": 0.08401949252226516,
20
+ "eval_loss": 2.0263099670410156,
21
+ "eval_runtime": 176.3381,
22
+ "eval_samples_per_second": 39.821,
23
+ "eval_steps_per_second": 2.49,
24
+ "eval_wer": 1.0069493349632601,
25
+ "step": 500
26
  },
27
  {
28
+ "epoch": 0.16803898504453033,
29
+ "grad_norm": 5.4134521484375,
30
+ "learning_rate": 0.0002844631578947368,
31
+ "loss": 1.2959,
32
+ "step": 1000
33
  },
34
  {
35
+ "epoch": 0.16803898504453033,
36
+ "eval_loss": 1.0947818756103516,
37
+ "eval_runtime": 175.5242,
38
+ "eval_samples_per_second": 40.006,
39
+ "eval_steps_per_second": 2.501,
40
+ "eval_wer": 0.7472196016423285,
41
+ "step": 1000
42
  },
43
  {
44
+ "epoch": 0.2520584775667955,
45
+ "grad_norm": 3.8002684116363525,
46
+ "learning_rate": 0.00026867368421052627,
47
+ "loss": 0.9954,
48
+ "step": 1500
 
 
49
  },
50
  {
51
+ "epoch": 0.2520584775667955,
52
+ "eval_loss": 0.9248552918434143,
53
+ "eval_runtime": 177.606,
54
+ "eval_samples_per_second": 39.537,
55
+ "eval_steps_per_second": 2.472,
56
+ "eval_wer": 0.6713615647297998,
57
+ "step": 1500
58
  },
59
  {
60
+ "epoch": 0.33607797008906065,
61
+ "grad_norm": 6.10047721862793,
62
+ "learning_rate": 0.0002529157894736842,
63
+ "loss": 0.8907,
64
+ "step": 2000
 
 
65
  },
66
  {
67
+ "epoch": 0.33607797008906065,
68
+ "eval_loss": 0.8197005987167358,
69
+ "eval_runtime": 176.9088,
70
+ "eval_samples_per_second": 39.693,
71
+ "eval_steps_per_second": 2.482,
72
+ "eval_wer": 0.6131093955540201,
73
+ "step": 2000
74
  },
75
  {
76
+ "epoch": 0.4200974626113258,
77
+ "grad_norm": 7.82942533493042,
78
+ "learning_rate": 0.00023715789473684206,
79
+ "loss": 0.8197,
80
+ "step": 2500
 
 
81
  },
82
  {
83
+ "epoch": 0.4200974626113258,
84
+ "eval_loss": 0.7610413432121277,
85
+ "eval_runtime": 176.4046,
86
+ "eval_samples_per_second": 39.806,
87
+ "eval_steps_per_second": 2.489,
88
+ "eval_wer": 0.550126895122178,
89
+ "step": 2500
90
+ },
91
+ {
92
+ "epoch": 0.504116955133591,
93
+ "grad_norm": 7.377211093902588,
94
+ "learning_rate": 0.00022136842105263156,
95
+ "loss": 0.7873,
96
+ "step": 3000
97
  },
98
  {
99
+ "epoch": 0.504116955133591,
100
+ "eval_loss": 0.7495535612106323,
101
+ "eval_runtime": 176.1708,
102
+ "eval_samples_per_second": 39.859,
103
+ "eval_steps_per_second": 2.492,
104
+ "eval_wer": 0.5436824831581605,
105
+ "step": 3000
106
  },
107
  {
108
+ "epoch": 0.5881364476558562,
109
+ "grad_norm": 7.327758312225342,
110
+ "learning_rate": 0.00020557894736842103,
111
+ "loss": 0.748,
112
+ "step": 3500
 
 
113
  },
114
  {
115
+ "epoch": 0.5881364476558562,
116
+ "eval_loss": 0.6833189725875854,
117
+ "eval_runtime": 175.5214,
118
+ "eval_samples_per_second": 40.007,
119
+ "eval_steps_per_second": 2.501,
120
+ "eval_wer": 0.52397719874035,
121
+ "step": 3500
122
  },
123
  {
124
+ "epoch": 0.6721559401781213,
125
+ "grad_norm": 6.962770938873291,
126
+ "learning_rate": 0.00018982105263157893,
127
+ "loss": 0.7133,
128
+ "step": 4000
 
 
129
  },
130
  {
131
+ "epoch": 0.6721559401781213,
132
+ "eval_loss": 0.6442425847053528,
133
+ "eval_runtime": 176.4204,
134
+ "eval_samples_per_second": 39.803,
135
+ "eval_steps_per_second": 2.488,
136
+ "eval_wer": 0.4789859020183633,
137
+ "step": 4000
138
  },
139
  {
140
+ "epoch": 0.7561754327003865,
141
+ "grad_norm": 27.43330955505371,
142
+ "learning_rate": 0.00017406315789473683,
143
+ "loss": 0.6809,
144
+ "step": 4500
 
 
145
  },
146
  {
147
+ "epoch": 0.7561754327003865,
148
+ "eval_loss": 0.6224470734596252,
149
+ "eval_runtime": 176.8219,
150
+ "eval_samples_per_second": 39.712,
151
+ "eval_steps_per_second": 2.483,
152
+ "eval_wer": 0.47034906124184483,
153
+ "step": 4500
154
  },
155
  {
156
+ "epoch": 0.8401949252226516,
157
+ "grad_norm": 6.05275297164917,
158
+ "learning_rate": 0.00015830526315789472,
159
+ "loss": 0.6517,
160
+ "step": 5000
 
 
161
  },
162
  {
163
+ "epoch": 0.8401949252226516,
164
+ "eval_loss": 0.599228024482727,
165
+ "eval_runtime": 177.562,
166
+ "eval_samples_per_second": 39.547,
167
+ "eval_steps_per_second": 2.472,
168
+ "eval_wer": 0.45710147623540043,
169
+ "step": 5000
170
  },
171
  {
172
+ "epoch": 0.9242144177449169,
173
+ "grad_norm": 6.2337541580200195,
174
+ "learning_rate": 0.0001425157894736842,
175
+ "loss": 0.6363,
176
+ "step": 5500
177
  },
178
  {
179
+ "epoch": 0.9242144177449169,
180
+ "eval_loss": 0.5727323293685913,
181
+ "eval_runtime": 176.4451,
182
+ "eval_samples_per_second": 39.797,
183
+ "eval_steps_per_second": 2.488,
184
+ "eval_wer": 0.4373297545808475,
185
+ "step": 5500
186
+ },
187
+ {
188
+ "epoch": 1.008233910267182,
189
+ "grad_norm": 9.131902694702148,
190
+ "learning_rate": 0.0001267263157894737,
191
+ "loss": 0.6111,
192
+ "step": 6000
193
+ },
194
+ {
195
+ "epoch": 1.008233910267182,
196
+ "eval_loss": 0.5573195219039917,
197
+ "eval_runtime": 177.5647,
198
+ "eval_samples_per_second": 39.546,
199
+ "eval_steps_per_second": 2.472,
200
+ "eval_wer": 0.42004278558046215,
201
+ "step": 6000
202
+ },
203
+ {
204
+ "epoch": 1.092253402789447,
205
+ "grad_norm": 7.859574794769287,
206
+ "learning_rate": 0.00011096842105263158,
207
+ "loss": 0.5352,
208
+ "step": 6500
209
+ },
210
+ {
211
+ "epoch": 1.092253402789447,
212
+ "eval_loss": 0.5566655993461609,
213
+ "eval_runtime": 176.9291,
214
+ "eval_samples_per_second": 39.688,
215
+ "eval_steps_per_second": 2.481,
216
+ "eval_wer": 0.4258892624137977,
217
+ "step": 6500
218
+ },
219
+ {
220
+ "epoch": 1.1762728953117123,
221
+ "grad_norm": 10.820630073547363,
222
+ "learning_rate": 9.521052631578946e-05,
223
+ "loss": 0.6231,
224
+ "step": 7000
225
+ },
226
+ {
227
+ "epoch": 1.1762728953117123,
228
+ "eval_loss": 0.825137734413147,
229
+ "eval_runtime": 177.7828,
230
+ "eval_samples_per_second": 39.498,
231
+ "eval_steps_per_second": 2.469,
232
+ "eval_wer": 0.6299844536866023,
233
+ "step": 7000
234
+ },
235
+ {
236
+ "epoch": 1.2602923878339776,
237
+ "grad_norm": 11.600907325744629,
238
+ "learning_rate": 7.942105263157894e-05,
239
+ "loss": 0.7995,
240
+ "step": 7500
241
+ },
242
+ {
243
+ "epoch": 1.2602923878339776,
244
+ "eval_loss": 0.660980761051178,
245
+ "eval_runtime": 178.3219,
246
+ "eval_samples_per_second": 39.378,
247
+ "eval_steps_per_second": 2.462,
248
+ "eval_wer": 0.4686482679812381,
249
+ "step": 7500
250
+ },
251
+ {
252
+ "epoch": 1.3443118803562426,
253
+ "grad_norm": 35.45184326171875,
254
+ "learning_rate": 6.363157894736841e-05,
255
+ "loss": 0.7696,
256
+ "step": 8000
257
+ },
258
+ {
259
+ "epoch": 1.3443118803562426,
260
+ "eval_loss": 0.733095109462738,
261
+ "eval_runtime": 178.388,
262
+ "eval_samples_per_second": 39.364,
263
+ "eval_steps_per_second": 2.461,
264
+ "eval_wer": 0.4870380951115481,
265
+ "step": 8000
266
+ },
267
+ {
268
+ "epoch": 1.4283313728785079,
269
+ "grad_norm": 3.933194875717163,
270
+ "learning_rate": 4.784210526315789e-05,
271
+ "loss": 0.7978,
272
+ "step": 8500
273
+ },
274
+ {
275
+ "epoch": 1.4283313728785079,
276
+ "eval_loss": 0.7790142297744751,
277
+ "eval_runtime": 178.093,
278
+ "eval_samples_per_second": 39.429,
279
+ "eval_steps_per_second": 2.465,
280
+ "eval_wer": 0.5432041350536149,
281
+ "step": 8500
282
+ },
283
+ {
284
+ "epoch": 1.512350865400773,
285
+ "grad_norm": 16.54388427734375,
286
+ "learning_rate": 3.208421052631579e-05,
287
+ "loss": 0.7844,
288
+ "step": 9000
289
+ },
290
+ {
291
+ "epoch": 1.512350865400773,
292
+ "eval_loss": 0.8260045647621155,
293
+ "eval_runtime": 177.7102,
294
+ "eval_samples_per_second": 39.514,
295
+ "eval_steps_per_second": 2.47,
296
+ "eval_wer": 0.5481603529146015,
297
+ "step": 9000
298
+ },
299
+ {
300
+ "epoch": 1.5963703579230382,
301
+ "grad_norm": 179.12344360351562,
302
+ "learning_rate": 1.632631578947368e-05,
303
+ "loss": 0.8211,
304
+ "step": 9500
305
+ },
306
+ {
307
+ "epoch": 1.5963703579230382,
308
+ "eval_loss": 0.8065445423126221,
309
+ "eval_runtime": 177.2427,
310
+ "eval_samples_per_second": 39.618,
311
+ "eval_steps_per_second": 2.477,
312
+ "eval_wer": 0.5491303365710414,
313
+ "step": 9500
314
+ },
315
+ {
316
+ "epoch": 1.6803898504453034,
317
+ "grad_norm": 22.516687393188477,
318
+ "learning_rate": 5.684210526315788e-07,
319
+ "loss": 0.7744,
320
+ "step": 10000
321
+ },
322
+ {
323
+ "epoch": 1.6803898504453034,
324
+ "eval_loss": 0.7622119784355164,
325
+ "eval_runtime": 177.7095,
326
+ "eval_samples_per_second": 39.514,
327
+ "eval_steps_per_second": 2.47,
328
+ "eval_wer": 0.518741944485045,
329
+ "step": 10000
330
  },
331
  {
332
+ "epoch": 1.6803898504453034,
333
+ "step": 10000,
334
+ "total_flos": 1.1109120866520904e+19,
335
+ "train_loss": 0.9622068939208984,
336
+ "train_runtime": 7532.6917,
337
+ "train_samples_per_second": 10.62,
338
+ "train_steps_per_second": 1.328
339
  }
340
  ],
341
  "logging_steps": 500,
342
+ "max_steps": 10000,
343
  "num_input_tokens_seen": 0,
344
  "num_train_epochs": 2,
345
  "save_steps": 500,
 
355
  "attributes": {}
356
  }
357
  },
358
+ "total_flos": 1.1109120866520904e+19,
359
+ "train_batch_size": 8,
360
  "trial_name": null,
361
  "trial_params": null
362
  }