mprzibilla commited on
Commit
da6e01f
1 Parent(s): ceb438d

Training in progress, epoch 1

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:616a0a6be5a29eebf810851ed67dd6a3c318e21864fa31e058c64c7fad505b40
3
- size 180587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4949bb00d7ca15f057d50fae774c2c6f9a465230ae278e77c6cdd0934a3e9fa
3
+ size 721668101
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6ec674c1c6f40d13cb9f4f66472ae211329f893fb31cd7ff1f4fb6ad1cb74ed
3
  size 377649505
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67b6585ff5ff973a18347ce73aae8cdfcc6753d94a283426d2b16d5f054cec33
3
  size 377649505
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c3d2e69352425000ce4e1001ef6ade8fbfa4e9c02b5a24d5a3554e05684c3e6
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f8be3d1844aecea40ef8c131bd79a4e8e08348c2548590172ae269a182679a2
3
+ size 14639
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2ac061210834cca32f8f940f47b10e93dc80b0dbc17fb92570b8d7f836f0dc8
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4584e9de52bfcb5211b8ab7e5657a1cdbdfabfbb603d0f603dec927f84c5177a
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7de22907ced1cb54d5cd795c776d98eec139cf091efb3e0653ea4f465608104
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ac3bb5feef285af2ff3f8db67718e42884e76005d20a7c599d3dd027e85afd1
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,271 +1,33 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
- "global_step": 20085,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.830197578743254e-05,
13
- "loss": 68.0265,
14
  "step": 1339
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_cer": 1.0,
19
- "eval_loss": 3.677399158477783,
20
- "eval_new_wer": 1.0,
21
- "eval_old_wer": 1.0,
22
- "eval_runtime": 8.4041,
23
- "eval_samples_per_second": 24.988,
24
- "eval_steps_per_second": 3.213,
25
  "step": 1339
26
- },
27
- {
28
- "epoch": 2.0,
29
- "learning_rate": 9.128452387191448e-05,
30
- "loss": 3.4235,
31
- "step": 2678
32
- },
33
- {
34
- "epoch": 2.0,
35
- "eval_cer": 1.0,
36
- "eval_loss": 3.696988821029663,
37
- "eval_new_wer": 1.0,
38
- "eval_old_wer": 1.0,
39
- "eval_runtime": 8.3938,
40
- "eval_samples_per_second": 25.018,
41
- "eval_steps_per_second": 3.217,
42
- "step": 2678
43
- },
44
- {
45
- "epoch": 3.0,
46
- "learning_rate": 8.426707195639642e-05,
47
- "loss": 3.3447,
48
- "step": 4017
49
- },
50
- {
51
- "epoch": 3.0,
52
- "eval_cer": 1.0,
53
- "eval_loss": 3.692437171936035,
54
- "eval_new_wer": 1.0,
55
- "eval_old_wer": 1.0,
56
- "eval_runtime": 8.4383,
57
- "eval_samples_per_second": 24.886,
58
- "eval_steps_per_second": 3.2,
59
- "step": 4017
60
- },
61
- {
62
- "epoch": 4.0,
63
- "learning_rate": 7.724962004087837e-05,
64
- "loss": 3.3042,
65
- "step": 5356
66
- },
67
- {
68
- "epoch": 4.0,
69
- "eval_cer": 1.0,
70
- "eval_loss": 3.6443004608154297,
71
- "eval_new_wer": 1.0,
72
- "eval_old_wer": 1.0,
73
- "eval_runtime": 8.3107,
74
- "eval_samples_per_second": 25.269,
75
- "eval_steps_per_second": 3.249,
76
- "step": 5356
77
- },
78
- {
79
- "epoch": 5.0,
80
- "learning_rate": 7.023216812536031e-05,
81
- "loss": 3.2724,
82
- "step": 6695
83
- },
84
- {
85
- "epoch": 5.0,
86
- "eval_cer": 1.0,
87
- "eval_loss": 3.557971954345703,
88
- "eval_new_wer": 1.0,
89
- "eval_old_wer": 1.0,
90
- "eval_runtime": 8.7787,
91
- "eval_samples_per_second": 23.922,
92
- "eval_steps_per_second": 3.076,
93
- "step": 6695
94
- },
95
- {
96
- "epoch": 6.0,
97
- "learning_rate": 6.321471620984226e-05,
98
- "loss": 3.2404,
99
- "step": 8034
100
- },
101
- {
102
- "epoch": 6.0,
103
- "eval_cer": 0.9965367965367965,
104
- "eval_loss": 3.4822113513946533,
105
- "eval_new_wer": 1.0,
106
- "eval_old_wer": 1.0,
107
- "eval_runtime": 8.2906,
108
- "eval_samples_per_second": 25.33,
109
- "eval_steps_per_second": 3.257,
110
- "step": 8034
111
- },
112
- {
113
- "epoch": 7.0,
114
- "learning_rate": 5.61972642943242e-05,
115
- "loss": 3.2063,
116
- "step": 9373
117
- },
118
- {
119
- "epoch": 7.0,
120
- "eval_cer": 0.9956709956709957,
121
- "eval_loss": 3.402449607849121,
122
- "eval_new_wer": 1.0,
123
- "eval_old_wer": 1.0,
124
- "eval_runtime": 8.3982,
125
- "eval_samples_per_second": 25.005,
126
- "eval_steps_per_second": 3.215,
127
- "step": 9373
128
- },
129
- {
130
- "epoch": 8.0,
131
- "learning_rate": 4.9179812378806144e-05,
132
- "loss": 3.162,
133
- "step": 10712
134
- },
135
- {
136
- "epoch": 8.0,
137
- "eval_cer": 0.9930735930735931,
138
- "eval_loss": 3.3249175548553467,
139
- "eval_new_wer": 1.0,
140
- "eval_old_wer": 1.0,
141
- "eval_runtime": 8.6025,
142
- "eval_samples_per_second": 24.411,
143
- "eval_steps_per_second": 3.139,
144
- "step": 10712
145
- },
146
- {
147
- "epoch": 9.0,
148
- "learning_rate": 4.216236046328809e-05,
149
- "loss": 3.106,
150
- "step": 12051
151
- },
152
- {
153
- "epoch": 9.0,
154
- "eval_cer": 0.9904761904761905,
155
- "eval_loss": 3.253854990005493,
156
- "eval_new_wer": 1.0,
157
- "eval_old_wer": 1.0,
158
- "eval_runtime": 7.9844,
159
- "eval_samples_per_second": 26.301,
160
- "eval_steps_per_second": 3.382,
161
- "step": 12051
162
- },
163
- {
164
- "epoch": 10.0,
165
- "learning_rate": 3.514490854777003e-05,
166
- "loss": 3.0482,
167
- "step": 13390
168
- },
169
- {
170
- "epoch": 10.0,
171
- "eval_cer": 0.9896103896103896,
172
- "eval_loss": 3.210585832595825,
173
- "eval_new_wer": 1.0,
174
- "eval_old_wer": 1.0,
175
- "eval_runtime": 7.9205,
176
- "eval_samples_per_second": 26.514,
177
- "eval_steps_per_second": 3.409,
178
- "step": 13390
179
- },
180
- {
181
- "epoch": 11.0,
182
- "learning_rate": 2.812745663225198e-05,
183
- "loss": 2.9975,
184
- "step": 14729
185
- },
186
- {
187
- "epoch": 11.0,
188
- "eval_cer": 0.9887445887445887,
189
- "eval_loss": 3.1696033477783203,
190
- "eval_new_wer": 1.0,
191
- "eval_old_wer": 1.0,
192
- "eval_runtime": 7.9544,
193
- "eval_samples_per_second": 26.4,
194
- "eval_steps_per_second": 3.394,
195
- "step": 14729
196
- },
197
- {
198
- "epoch": 12.0,
199
- "learning_rate": 2.1110004716733924e-05,
200
- "loss": 2.9573,
201
- "step": 16068
202
- },
203
- {
204
- "epoch": 12.0,
205
- "eval_cer": 0.9887445887445887,
206
- "eval_loss": 3.1518020629882812,
207
- "eval_new_wer": 1.0,
208
- "eval_old_wer": 1.0,
209
- "eval_runtime": 7.8673,
210
- "eval_samples_per_second": 26.693,
211
- "eval_steps_per_second": 3.432,
212
- "step": 16068
213
- },
214
- {
215
- "epoch": 13.0,
216
- "learning_rate": 1.409255280121587e-05,
217
- "loss": 2.9287,
218
- "step": 17407
219
- },
220
- {
221
- "epoch": 13.0,
222
- "eval_cer": 0.9904761904761905,
223
- "eval_loss": 3.1377389430999756,
224
- "eval_new_wer": 1.0,
225
- "eval_old_wer": 1.0,
226
- "eval_runtime": 7.8781,
227
- "eval_samples_per_second": 26.656,
228
- "eval_steps_per_second": 3.427,
229
- "step": 17407
230
- },
231
- {
232
- "epoch": 14.0,
233
- "learning_rate": 7.080341701168702e-06,
234
- "loss": 2.9083,
235
- "step": 18746
236
- },
237
- {
238
- "epoch": 14.0,
239
- "eval_cer": 0.9904761904761905,
240
- "eval_loss": 3.140259265899658,
241
- "eval_new_wer": 1.0,
242
- "eval_old_wer": 1.0,
243
- "eval_runtime": 7.9649,
244
- "eval_samples_per_second": 26.366,
245
- "eval_steps_per_second": 3.39,
246
- "step": 18746
247
- },
248
- {
249
- "epoch": 15.0,
250
- "learning_rate": 6.288978565064725e-08,
251
- "loss": 2.8993,
252
- "step": 20085
253
- },
254
- {
255
- "epoch": 15.0,
256
- "eval_cer": 0.9904761904761905,
257
- "eval_loss": 3.1403210163116455,
258
- "eval_new_wer": 1.0,
259
- "eval_old_wer": 1.0,
260
- "eval_runtime": 7.9302,
261
- "eval_samples_per_second": 26.481,
262
- "eval_steps_per_second": 3.405,
263
- "step": 20085
264
  }
265
  ],
266
  "max_steps": 20085,
267
  "num_train_epochs": 15,
268
- "total_flos": 1.5746037785270553e+19,
269
  "trial_name": null,
270
  "trial_params": null
271
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 1339,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9.828625334101986e-05,
13
+ "loss": 11.016,
14
  "step": 1339
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_cer": 0.24761904761904763,
19
+ "eval_loss": 1.0964484214782715,
20
+ "eval_new_wer": 0.12380952380952381,
21
+ "eval_old_wer": 0.7619047619047619,
22
+ "eval_runtime": 7.9497,
23
+ "eval_samples_per_second": 26.416,
24
+ "eval_steps_per_second": 3.396,
25
  "step": 1339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "max_steps": 20085,
29
  "num_train_epochs": 15,
30
+ "total_flos": 1.0183925695349146e+18,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b335db8e6f2872d356dfb28f9345f3b4eb5c195a983190a4dbe94d702a39dfe5
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e59e976e9cd6f81f13cc292aef25698fbfbf3bab290ae8030b707d8c8e21db2f
3
  size 3387
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6ec674c1c6f40d13cb9f4f66472ae211329f893fb31cd7ff1f4fb6ad1cb74ed
3
  size 377649505
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67b6585ff5ff973a18347ce73aae8cdfcc6753d94a283426d2b16d5f054cec33
3
  size 377649505
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b335db8e6f2872d356dfb28f9345f3b4eb5c195a983190a4dbe94d702a39dfe5
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e59e976e9cd6f81f13cc292aef25698fbfbf3bab290ae8030b707d8c8e21db2f
3
  size 3387