mprzibilla commited on
Commit
3b2a89b
1 Parent(s): c1f461a

Training in progress, epoch 1

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:128a94b56ac83cb4272fe5debfffb148e1b4a4e73c41edd529851347b4952513
3
- size 174443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a365e4891d90ec23233f68651248b0bb5da2d876f36817a4be0ad6f4f0c322
3
+ size 721661957
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71a045dea2c5c6e15f1f42b075b58894e90de6d6a649eadded10af44c12c7bd0
3
  size 377646433
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea916919791655f74aec3b70a6233614608427e834da06bc5f606ea72149c620
3
  size 377646433
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9bdcf09921bc14cad1e6e13a86c29a6b2e2586fed5f02d8ae0b665e3f67ad32
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41734c590d4ca9111afda14d4d6e0db4819c1b65e8ffee27fa29d4e1cde666e4
3
+ size 14639
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d345ce6510ab8af247d205e03e585917625e9354fcd14c7564f682739dbc9cc
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd86c1f3d5f1b7e58ab1b190ff0893321dea6cfb611bb6b4388ebc93c6931aa8
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:748a98a49b1ce62329be45e166c4c382fbcf1153bd7d7c5e4d58f6bb85eae1b8
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a50eceed5d5a14aef24a7445d77f16047b39c151ba5bdc2775618ddbda6486ac
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,271 +1,33 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
- "global_step": 11085,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.833823948342988e-05,
13
- "loss": 106.1639,
14
  "step": 739
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_cer": 0.9636363636363636,
19
- "eval_loss": 3.7620224952697754,
20
- "eval_new_wer": 0.9,
21
  "eval_old_wer": 1.0,
22
- "eval_runtime": 7.2762,
23
- "eval_samples_per_second": 24.738,
24
- "eval_steps_per_second": 3.161,
25
  "step": 739
26
- },
27
- {
28
- "epoch": 2.0,
29
- "learning_rate": 9.132086221631374e-05,
30
- "loss": 3.7546,
31
- "step": 1478
32
- },
33
- {
34
- "epoch": 2.0,
35
- "eval_cer": 0.9636363636363636,
36
- "eval_loss": 3.780017852783203,
37
- "eval_new_wer": 0.9,
38
- "eval_old_wer": 1.0,
39
- "eval_runtime": 7.3406,
40
- "eval_samples_per_second": 24.521,
41
- "eval_steps_per_second": 3.133,
42
- "step": 1478
43
- },
44
- {
45
- "epoch": 3.0,
46
- "learning_rate": 8.43034849491976e-05,
47
- "loss": 3.4946,
48
- "step": 2217
49
- },
50
- {
51
- "epoch": 3.0,
52
- "eval_cer": 0.9636363636363636,
53
- "eval_loss": 3.61942720413208,
54
- "eval_new_wer": 0.9,
55
- "eval_old_wer": 1.0,
56
- "eval_runtime": 7.4322,
57
- "eval_samples_per_second": 24.219,
58
- "eval_steps_per_second": 3.095,
59
- "step": 2217
60
- },
61
- {
62
- "epoch": 4.0,
63
- "learning_rate": 7.728610768208148e-05,
64
- "loss": 3.4427,
65
- "step": 2956
66
- },
67
- {
68
- "epoch": 4.0,
69
- "eval_cer": 0.9636363636363636,
70
- "eval_loss": 3.581315040588379,
71
- "eval_new_wer": 0.9,
72
- "eval_old_wer": 1.0,
73
- "eval_runtime": 7.2928,
74
- "eval_samples_per_second": 24.682,
75
- "eval_steps_per_second": 3.154,
76
- "step": 2956
77
- },
78
- {
79
- "epoch": 5.0,
80
- "learning_rate": 7.026873041496534e-05,
81
- "loss": 3.4094,
82
- "step": 3695
83
- },
84
- {
85
- "epoch": 5.0,
86
- "eval_cer": 0.9636363636363636,
87
- "eval_loss": 3.5385422706604004,
88
- "eval_new_wer": 0.9,
89
- "eval_old_wer": 1.0,
90
- "eval_runtime": 7.2804,
91
- "eval_samples_per_second": 24.724,
92
- "eval_steps_per_second": 3.159,
93
- "step": 3695
94
- },
95
- {
96
- "epoch": 6.0,
97
- "learning_rate": 6.325135314784922e-05,
98
- "loss": 3.384,
99
- "step": 4434
100
- },
101
- {
102
- "epoch": 6.0,
103
- "eval_cer": 0.9636363636363636,
104
- "eval_loss": 3.447321891784668,
105
- "eval_new_wer": 0.9,
106
- "eval_old_wer": 1.0,
107
- "eval_runtime": 7.2913,
108
- "eval_samples_per_second": 24.687,
109
- "eval_steps_per_second": 3.154,
110
- "step": 4434
111
- },
112
- {
113
- "epoch": 7.0,
114
- "learning_rate": 5.6233975880733084e-05,
115
- "loss": 3.3675,
116
- "step": 5173
117
- },
118
- {
119
- "epoch": 7.0,
120
- "eval_cer": 0.9636363636363636,
121
- "eval_loss": 3.407554864883423,
122
- "eval_new_wer": 0.9,
123
- "eval_old_wer": 1.0,
124
- "eval_runtime": 7.8277,
125
- "eval_samples_per_second": 22.995,
126
- "eval_steps_per_second": 2.938,
127
- "step": 5173
128
- },
129
- {
130
- "epoch": 8.0,
131
- "learning_rate": 4.9216598613616946e-05,
132
- "loss": 3.3539,
133
- "step": 5912
134
- },
135
- {
136
- "epoch": 8.0,
137
- "eval_cer": 0.9636363636363636,
138
- "eval_loss": 3.375277042388916,
139
- "eval_new_wer": 0.9,
140
- "eval_old_wer": 1.0,
141
- "eval_runtime": 7.2374,
142
- "eval_samples_per_second": 24.871,
143
- "eval_steps_per_second": 3.178,
144
- "step": 5912
145
- },
146
- {
147
- "epoch": 9.0,
148
- "learning_rate": 4.2199221346500814e-05,
149
- "loss": 3.3382,
150
- "step": 6651
151
- },
152
- {
153
- "epoch": 9.0,
154
- "eval_cer": 0.9636363636363636,
155
- "eval_loss": 3.335272789001465,
156
- "eval_new_wer": 0.9,
157
- "eval_old_wer": 1.0,
158
- "eval_runtime": 7.2156,
159
- "eval_samples_per_second": 24.946,
160
- "eval_steps_per_second": 3.188,
161
- "step": 6651
162
- },
163
- {
164
- "epoch": 10.0,
165
- "learning_rate": 3.5181844079384676e-05,
166
- "loss": 3.3304,
167
- "step": 7390
168
- },
169
- {
170
- "epoch": 10.0,
171
- "eval_cer": 0.9636363636363636,
172
- "eval_loss": 3.328141212463379,
173
- "eval_new_wer": 0.9,
174
- "eval_old_wer": 1.0,
175
- "eval_runtime": 7.5963,
176
- "eval_samples_per_second": 23.696,
177
- "eval_steps_per_second": 3.028,
178
- "step": 7390
179
- },
180
- {
181
- "epoch": 11.0,
182
- "learning_rate": 2.816446681226854e-05,
183
- "loss": 3.3184,
184
- "step": 8129
185
- },
186
- {
187
- "epoch": 11.0,
188
- "eval_cer": 0.9636363636363636,
189
- "eval_loss": 3.3093180656433105,
190
- "eval_new_wer": 0.9,
191
- "eval_old_wer": 1.0,
192
- "eval_runtime": 7.2653,
193
- "eval_samples_per_second": 24.775,
194
- "eval_steps_per_second": 3.166,
195
- "step": 8129
196
- },
197
- {
198
- "epoch": 12.0,
199
- "learning_rate": 2.1147089545152407e-05,
200
- "loss": 3.3116,
201
- "step": 8868
202
- },
203
- {
204
- "epoch": 12.0,
205
- "eval_cer": 0.9636363636363636,
206
- "eval_loss": 3.3055644035339355,
207
- "eval_new_wer": 0.9,
208
- "eval_old_wer": 1.0,
209
- "eval_runtime": 7.2417,
210
- "eval_samples_per_second": 24.856,
211
- "eval_steps_per_second": 3.176,
212
- "step": 8868
213
- },
214
- {
215
- "epoch": 13.0,
216
- "learning_rate": 1.4129712278036275e-05,
217
- "loss": 3.3032,
218
- "step": 9607
219
- },
220
- {
221
- "epoch": 13.0,
222
- "eval_cer": 0.9636363636363636,
223
- "eval_loss": 3.3021399974823,
224
- "eval_new_wer": 0.9,
225
- "eval_old_wer": 1.0,
226
- "eval_runtime": 7.3911,
227
- "eval_samples_per_second": 24.354,
228
- "eval_steps_per_second": 3.112,
229
- "step": 9607
230
- },
231
- {
232
- "epoch": 14.0,
233
- "learning_rate": 7.112335010920141e-06,
234
- "loss": 3.3038,
235
- "step": 10346
236
- },
237
- {
238
- "epoch": 14.0,
239
- "eval_cer": 0.9636363636363636,
240
- "eval_loss": 3.3013970851898193,
241
- "eval_new_wer": 0.9,
242
- "eval_old_wer": 1.0,
243
- "eval_runtime": 7.3511,
244
- "eval_samples_per_second": 24.486,
245
- "eval_steps_per_second": 3.129,
246
- "step": 10346
247
- },
248
- {
249
- "epoch": 15.0,
250
- "learning_rate": 9.495774380400722e-08,
251
- "loss": 3.2971,
252
- "step": 11085
253
- },
254
- {
255
- "epoch": 15.0,
256
- "eval_cer": 0.9636363636363636,
257
- "eval_loss": 3.2989487648010254,
258
- "eval_new_wer": 0.9,
259
- "eval_old_wer": 1.0,
260
- "eval_runtime": 7.2244,
261
- "eval_samples_per_second": 24.915,
262
- "eval_steps_per_second": 3.184,
263
- "step": 11085
264
  }
265
  ],
266
  "max_steps": 11085,
267
  "num_train_epochs": 15,
268
- "total_flos": 9.523001095942625e+18,
269
  "trial_name": null,
270
  "trial_params": null
271
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 739,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9.831924793466908e-05,
13
+ "loss": 18.2448,
14
  "step": 739
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_cer": 1.0,
19
+ "eval_loss": 3.3216891288757324,
20
+ "eval_new_wer": 1.0,
21
  "eval_old_wer": 1.0,
22
+ "eval_runtime": 7.2541,
23
+ "eval_samples_per_second": 24.814,
24
+ "eval_steps_per_second": 3.171,
25
  "step": 739
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "max_steps": 11085,
29
  "num_train_epochs": 15,
30
+ "total_flos": 5.7487243575776256e+17,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:115fc2c46d06a974e25903ba4a4848c2b0bd38c134a25db9593ebb2b4a25589e
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c03ddeae8e5b6c8b1c706e99c450b1a83dc9b242862b2db4c5fb889a74640900
3
  size 3387
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71a045dea2c5c6e15f1f42b075b58894e90de6d6a649eadded10af44c12c7bd0
3
  size 377646433
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea916919791655f74aec3b70a6233614608427e834da06bc5f606ea72149c620
3
  size 377646433
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:115fc2c46d06a974e25903ba4a4848c2b0bd38c134a25db9593ebb2b4a25589e
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c03ddeae8e5b6c8b1c706e99c450b1a83dc9b242862b2db4c5fb889a74640900
3
  size 3387