mprzibilla commited on
Commit
68125e7
1 Parent(s): e1fe65f

Training in progress, epoch 1

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe496069d41533adab99f5654f48d2852900e3a265c6f9761f806e206bddb82f
3
- size 168299
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2488c7c7e90101e4b14aa522a59ad2bf2118296861b9ea9158cca63add0769
3
+ size 721655813
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb4263fac02c7e3f7b0b8a14c2db49580404a71631cc58cd1845d1d7eb188f37
3
  size 377643361
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a87c8ec786e02155f6f7dec60ce648c9a2caaee133fb17f93a3d7e1be007a9
3
  size 377643361
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b93b6efe0b15dcc71515d500d0dd28ca0159793f22435ec50f82333b0c73b7f1
3
- size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea49d5f6f6fde3589f0e548b39f94eb336cef5282e7f7120921ec12941b1115e
3
+ size 14575
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e85dde748b24c92e67e7952517ef9975ea1302852fedfd5b4a4a96f5b401ed67
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d86624103fa47ceeb667c5882baed38a7d9803787a13ec04e49ed10f8b91695a
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05bcda048f77d9a7e1e5808d85cb3e44fb872033bb9c98a2805b9bf943cd48e5
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac6afd13d5ae29d5fa67e15b29421215e62ba138c49b108fef3d1c1618449c6
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,271 +1,33 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
- "global_step": 9330,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.831904332129964e-05,
13
- "loss": 94.6389,
14
  "step": 622
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_cer": 0.9636363636363636,
19
- "eval_loss": 4.338134765625,
20
- "eval_new_wer": 0.9,
21
  "eval_old_wer": 1.0,
22
- "eval_runtime": 8.6594,
23
- "eval_samples_per_second": 24.251,
24
- "eval_steps_per_second": 3.118,
25
  "step": 622
26
- },
27
- {
28
- "epoch": 2.0,
29
- "learning_rate": 9.130189530685921e-05,
30
- "loss": 3.6309,
31
- "step": 1244
32
- },
33
- {
34
- "epoch": 2.0,
35
- "eval_cer": 0.9636363636363636,
36
- "eval_loss": 4.148990154266357,
37
- "eval_new_wer": 0.9,
38
- "eval_old_wer": 1.0,
39
- "eval_runtime": 8.5756,
40
- "eval_samples_per_second": 24.488,
41
- "eval_steps_per_second": 3.148,
42
- "step": 1244
43
- },
44
- {
45
- "epoch": 3.0,
46
- "learning_rate": 8.428474729241878e-05,
47
- "loss": 3.3586,
48
- "step": 1866
49
- },
50
- {
51
- "epoch": 3.0,
52
- "eval_cer": 0.9636363636363636,
53
- "eval_loss": 4.03645133972168,
54
- "eval_new_wer": 0.9,
55
- "eval_old_wer": 1.0,
56
- "eval_runtime": 8.62,
57
- "eval_samples_per_second": 24.362,
58
- "eval_steps_per_second": 3.132,
59
- "step": 1866
60
- },
61
- {
62
- "epoch": 4.0,
63
- "learning_rate": 7.726759927797835e-05,
64
- "loss": 3.319,
65
- "step": 2488
66
- },
67
- {
68
- "epoch": 4.0,
69
- "eval_cer": 0.9636363636363636,
70
- "eval_loss": 3.9594714641571045,
71
- "eval_new_wer": 0.9,
72
- "eval_old_wer": 1.0,
73
- "eval_runtime": 8.8026,
74
- "eval_samples_per_second": 23.857,
75
- "eval_steps_per_second": 3.067,
76
- "step": 2488
77
- },
78
- {
79
- "epoch": 5.0,
80
- "learning_rate": 7.02504512635379e-05,
81
- "loss": 3.2905,
82
- "step": 3110
83
- },
84
- {
85
- "epoch": 5.0,
86
- "eval_cer": 0.9636363636363636,
87
- "eval_loss": 3.8774044513702393,
88
- "eval_new_wer": 0.9,
89
- "eval_old_wer": 1.0,
90
- "eval_runtime": 8.6675,
91
- "eval_samples_per_second": 24.228,
92
- "eval_steps_per_second": 3.115,
93
- "step": 3110
94
- },
95
- {
96
- "epoch": 6.0,
97
- "learning_rate": 6.323330324909747e-05,
98
- "loss": 3.2693,
99
- "step": 3732
100
- },
101
- {
102
- "epoch": 6.0,
103
- "eval_cer": 0.9636363636363636,
104
- "eval_loss": 3.8836023807525635,
105
- "eval_new_wer": 0.9,
106
- "eval_old_wer": 1.0,
107
- "eval_runtime": 8.4359,
108
- "eval_samples_per_second": 24.894,
109
- "eval_steps_per_second": 3.201,
110
- "step": 3732
111
- },
112
- {
113
- "epoch": 7.0,
114
- "learning_rate": 5.621615523465704e-05,
115
- "loss": 3.2546,
116
- "step": 4354
117
- },
118
- {
119
- "epoch": 7.0,
120
- "eval_cer": 0.9636363636363636,
121
- "eval_loss": 3.8469648361206055,
122
- "eval_new_wer": 0.9,
123
- "eval_old_wer": 1.0,
124
- "eval_runtime": 8.5607,
125
- "eval_samples_per_second": 24.531,
126
- "eval_steps_per_second": 3.154,
127
- "step": 4354
128
- },
129
- {
130
- "epoch": 8.0,
131
- "learning_rate": 4.9199007220216606e-05,
132
- "loss": 3.2424,
133
- "step": 4976
134
- },
135
- {
136
- "epoch": 8.0,
137
- "eval_cer": 0.9636363636363636,
138
- "eval_loss": 3.798175811767578,
139
- "eval_new_wer": 0.9,
140
- "eval_old_wer": 1.0,
141
- "eval_runtime": 8.5308,
142
- "eval_samples_per_second": 24.617,
143
- "eval_steps_per_second": 3.165,
144
- "step": 4976
145
- },
146
- {
147
- "epoch": 9.0,
148
- "learning_rate": 4.2181859205776176e-05,
149
- "loss": 3.2306,
150
- "step": 5598
151
- },
152
- {
153
- "epoch": 9.0,
154
- "eval_cer": 0.9636363636363636,
155
- "eval_loss": 3.7588889598846436,
156
- "eval_new_wer": 0.9,
157
- "eval_old_wer": 1.0,
158
- "eval_runtime": 8.6596,
159
- "eval_samples_per_second": 24.25,
160
- "eval_steps_per_second": 3.118,
161
- "step": 5598
162
- },
163
- {
164
- "epoch": 10.0,
165
- "learning_rate": 3.516471119133574e-05,
166
- "loss": 3.2218,
167
- "step": 6220
168
- },
169
- {
170
- "epoch": 10.0,
171
- "eval_cer": 0.9636363636363636,
172
- "eval_loss": 3.738402843475342,
173
- "eval_new_wer": 0.9,
174
- "eval_old_wer": 1.0,
175
- "eval_runtime": 8.5202,
176
- "eval_samples_per_second": 24.647,
177
- "eval_steps_per_second": 3.169,
178
- "step": 6220
179
- },
180
- {
181
- "epoch": 11.0,
182
- "learning_rate": 2.814756317689531e-05,
183
- "loss": 3.2149,
184
- "step": 6842
185
- },
186
- {
187
- "epoch": 11.0,
188
- "eval_cer": 0.9636363636363636,
189
- "eval_loss": 3.7330896854400635,
190
- "eval_new_wer": 0.9,
191
- "eval_old_wer": 1.0,
192
- "eval_runtime": 8.6236,
193
- "eval_samples_per_second": 24.352,
194
- "eval_steps_per_second": 3.131,
195
- "step": 6842
196
- },
197
- {
198
- "epoch": 12.0,
199
- "learning_rate": 2.1130415162454876e-05,
200
- "loss": 3.2088,
201
- "step": 7464
202
- },
203
- {
204
- "epoch": 12.0,
205
- "eval_cer": 0.9636363636363636,
206
- "eval_loss": 3.721147298812866,
207
- "eval_new_wer": 0.9,
208
- "eval_old_wer": 1.0,
209
- "eval_runtime": 8.7097,
210
- "eval_samples_per_second": 24.111,
211
- "eval_steps_per_second": 3.1,
212
- "step": 7464
213
- },
214
- {
215
- "epoch": 13.0,
216
- "learning_rate": 1.4113267148014443e-05,
217
- "loss": 3.2056,
218
- "step": 8086
219
- },
220
- {
221
- "epoch": 13.0,
222
- "eval_cer": 0.9636363636363636,
223
- "eval_loss": 3.7029576301574707,
224
- "eval_new_wer": 0.9,
225
- "eval_old_wer": 1.0,
226
- "eval_runtime": 8.5372,
227
- "eval_samples_per_second": 24.598,
228
- "eval_steps_per_second": 3.163,
229
- "step": 8086
230
- },
231
- {
232
- "epoch": 14.0,
233
- "learning_rate": 7.0961191335740075e-06,
234
- "loss": 3.2031,
235
- "step": 8708
236
- },
237
- {
238
- "epoch": 14.0,
239
- "eval_cer": 0.9636363636363636,
240
- "eval_loss": 3.6995913982391357,
241
- "eval_new_wer": 0.9,
242
- "eval_old_wer": 1.0,
243
- "eval_runtime": 8.6411,
244
- "eval_samples_per_second": 24.302,
245
- "eval_steps_per_second": 3.125,
246
- "step": 8708
247
- },
248
- {
249
- "epoch": 15.0,
250
- "learning_rate": 7.8971119133574e-08,
251
- "loss": 3.1984,
252
- "step": 9330
253
- },
254
- {
255
- "epoch": 15.0,
256
- "eval_cer": 0.9636363636363636,
257
- "eval_loss": 3.7015275955200195,
258
- "eval_new_wer": 0.9,
259
- "eval_old_wer": 1.0,
260
- "eval_runtime": 8.7621,
261
- "eval_samples_per_second": 23.967,
262
- "eval_steps_per_second": 3.081,
263
- "step": 9330
264
  }
265
  ],
266
  "max_steps": 9330,
267
  "num_train_epochs": 15,
268
- "total_flos": 7.036379245117149e+18,
269
  "trial_name": null,
270
  "trial_params": null
271
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 622,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9.83303249097473e-05,
13
+ "loss": 16.7556,
14
  "step": 622
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_cer": 0.9341991341991343,
19
+ "eval_loss": 3.3474249839782715,
20
+ "eval_new_wer": 0.9380952380952381,
21
  "eval_old_wer": 1.0,
22
+ "eval_runtime": 7.7874,
23
+ "eval_samples_per_second": 26.967,
24
+ "eval_steps_per_second": 3.467,
25
  "step": 622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "max_steps": 9330,
29
  "num_train_epochs": 15,
30
+ "total_flos": 4.58996805867552e+17,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8c3732970442c8712223c0ddb31aa73105a191b2acb15fdb899c989a36e1288
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0000b096c0b617d6be374ea1bea5add3f1148e5ae411824afcaf501618c33f42
3
  size 3387
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb4263fac02c7e3f7b0b8a14c2db49580404a71631cc58cd1845d1d7eb188f37
3
  size 377643361
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a87c8ec786e02155f6f7dec60ce648c9a2caaee133fb17f93a3d7e1be007a9
3
  size 377643361
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8c3732970442c8712223c0ddb31aa73105a191b2acb15fdb899c989a36e1288
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0000b096c0b617d6be374ea1bea5add3f1148e5ae411824afcaf501618c33f42
3
  size 3387