marinone94 commited on
Commit
fdf3f3d
β€’
1 Parent(s): ac1c759

Training in progress, step 700

Browse files
{checkpoint-400 β†’ checkpoint-700}/config.json RENAMED
File without changes
{checkpoint-400 β†’ checkpoint-700}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b573eee933c6c36bd4ddb6906221d3843d7e3e13dab0b96a39404d3a30f74898
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2734597abb10a2a590cf139e66decfdf1bc18e26d32aa5f4749a68f74109f763
3
  size 2490337809
{checkpoint-400 β†’ checkpoint-700}/preprocessor_config.json RENAMED
File without changes
{checkpoint-400 β†’ checkpoint-700}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28c7e3ec36d37a20298023cfa6ac730ae02e82d170ef8859ec5740936e2bc809
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:866e0246bd78706b1bfb6c07818d34662e459927e54718460f8a950f5fc33ae8
3
  size 1262063089
{checkpoint-400 β†’ checkpoint-700}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:855318e4e46176f35f66cfa7dbcef4ac9632248b73c3328855184518a3b76d7f
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21798b4aee64ae056ff3e0e6a64832942beff047fe94bbbddb9d06dcf74d9125
3
+ size 14567
{checkpoint-400 β†’ checkpoint-700}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:476e510c8ea7edbd2b51d1e76a4e037820a5639381c0d8b5d32dafa492795a1e
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb213daf5cce18a5f92167ca14da9df084d907f2b9796efc4666630f312b58c
3
  size 559
{checkpoint-400 β†’ checkpoint-700}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d06447f3e192636cdb71ae042312281cde114831b8524799c0556275e17d331
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcdecc30328ffcc8e9b8e7d7b6cfb5d7c85a8e3d5e777d680eff74cf60f2dc24
3
  size 623
{checkpoint-400 β†’ checkpoint-700}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.39344262295082,
5
- "global_step": 400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -162,11 +162,128 @@
162
  "eval_steps_per_second": 0.766,
163
  "eval_wer": 1.0,
164
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  }
166
  ],
167
  "max_steps": 4550,
168
  "num_train_epochs": 50,
169
- "total_flos": 6.222452805866187e+18,
170
  "trial_name": null,
171
  "trial_params": null
172
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.688524590163935,
5
+ "global_step": 700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
162
  "eval_steps_per_second": 0.766,
163
  "eval_wer": 1.0,
164
  "step": 400
165
+ },
166
+ {
167
+ "epoch": 4.61,
168
+ "learning_rate": 5.25e-05,
169
+ "loss": 3.0309,
170
+ "step": 420
171
+ },
172
+ {
173
+ "epoch": 4.83,
174
+ "learning_rate": 5.5e-05,
175
+ "loss": 3.0259,
176
+ "step": 440
177
+ },
178
+ {
179
+ "epoch": 5.05,
180
+ "learning_rate": 5.75e-05,
181
+ "loss": 3.0998,
182
+ "step": 460
183
+ },
184
+ {
185
+ "epoch": 5.27,
186
+ "learning_rate": 6e-05,
187
+ "loss": 3.0152,
188
+ "step": 480
189
+ },
190
+ {
191
+ "epoch": 5.49,
192
+ "learning_rate": 6.25e-05,
193
+ "loss": 3.0129,
194
+ "step": 500
195
+ },
196
+ {
197
+ "epoch": 5.49,
198
+ "eval_loss": 3.039973497390747,
199
+ "eval_runtime": 190.8567,
200
+ "eval_samples_per_second": 25.375,
201
+ "eval_steps_per_second": 0.796,
202
+ "eval_wer": 1.0,
203
+ "step": 500
204
+ },
205
+ {
206
+ "epoch": 5.71,
207
+ "learning_rate": 6.500000000000001e-05,
208
+ "loss": 3.0088,
209
+ "step": 520
210
+ },
211
+ {
212
+ "epoch": 5.93,
213
+ "learning_rate": 6.75e-05,
214
+ "loss": 3.0051,
215
+ "step": 540
216
+ },
217
+ {
218
+ "epoch": 6.15,
219
+ "learning_rate": 7.000000000000001e-05,
220
+ "loss": 3.073,
221
+ "step": 560
222
+ },
223
+ {
224
+ "epoch": 6.37,
225
+ "learning_rate": 7.25e-05,
226
+ "loss": 3.0031,
227
+ "step": 580
228
+ },
229
+ {
230
+ "epoch": 6.59,
231
+ "learning_rate": 7.5e-05,
232
+ "loss": 2.9964,
233
+ "step": 600
234
+ },
235
+ {
236
+ "epoch": 6.59,
237
+ "eval_loss": 2.998962640762329,
238
+ "eval_runtime": 193.5213,
239
+ "eval_samples_per_second": 25.026,
240
+ "eval_steps_per_second": 0.785,
241
+ "eval_wer": 1.0,
242
+ "step": 600
243
+ },
244
+ {
245
+ "epoch": 6.81,
246
+ "learning_rate": 7.75e-05,
247
+ "loss": 2.9921,
248
+ "step": 620
249
+ },
250
+ {
251
+ "epoch": 7.03,
252
+ "learning_rate": 8e-05,
253
+ "loss": 3.0665,
254
+ "step": 640
255
+ },
256
+ {
257
+ "epoch": 7.25,
258
+ "learning_rate": 8.25e-05,
259
+ "loss": 2.9826,
260
+ "step": 660
261
+ },
262
+ {
263
+ "epoch": 7.47,
264
+ "learning_rate": 8.5e-05,
265
+ "loss": 2.9689,
266
+ "step": 680
267
+ },
268
+ {
269
+ "epoch": 7.69,
270
+ "learning_rate": 8.75e-05,
271
+ "loss": 2.9602,
272
+ "step": 700
273
+ },
274
+ {
275
+ "epoch": 7.69,
276
+ "eval_loss": 2.9620397090911865,
277
+ "eval_runtime": 193.5851,
278
+ "eval_samples_per_second": 25.017,
279
+ "eval_steps_per_second": 0.785,
280
+ "eval_wer": 1.0,
281
+ "step": 700
282
  }
283
  ],
284
  "max_steps": 4550,
285
  "num_train_epochs": 50,
286
+ "total_flos": 1.0835835827730653e+19,
287
  "trial_name": null,
288
  "trial_params": null
289
  }
{checkpoint-400 β†’ checkpoint-700}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3baffe7a116d58666fc0726fb4fa6188834614eac15e96843270fc53529b943e
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:866e0246bd78706b1bfb6c07818d34662e459927e54718460f8a950f5fc33ae8
3
  size 1262063089