mamlong34 commited on
Commit
3f09e17
1 Parent(s): fe7e6c9

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_accuracy": 0.7986,
4
- "eval_loss": 0.33286044001579285,
5
- "eval_runtime": 151.7682,
6
  "eval_samples": 4887,
7
- "eval_samples_per_second": 32.2,
8
- "eval_steps_per_second": 2.016,
9
- "train_loss": 0.0,
10
- "train_runtime": 0.057,
11
  "train_samples": 87866,
12
- "train_samples_per_second": 1541330.369,
13
- "train_steps_per_second": 192661.911
14
  }
1
  {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8023,
4
+ "eval_loss": 0.438219279050827,
5
+ "eval_runtime": 146.5968,
6
  "eval_samples": 4887,
7
+ "eval_samples_per_second": 33.336,
8
+ "eval_steps_per_second": 2.087,
9
+ "train_loss": 0.0359802827722491,
10
+ "train_runtime": 7164.0026,
11
  "train_samples": 87866,
12
+ "train_samples_per_second": 36.795,
13
+ "train_steps_per_second": 4.599
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_accuracy": 0.7986,
4
- "eval_loss": 0.33286044001579285,
5
- "eval_runtime": 151.7682,
6
  "eval_samples": 4887,
7
- "eval_samples_per_second": 32.2,
8
- "eval_steps_per_second": 2.016
9
  }
1
  {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8023,
4
+ "eval_loss": 0.438219279050827,
5
+ "eval_runtime": 146.5968,
6
  "eval_samples": 4887,
7
+ "eval_samples_per_second": 33.336,
8
+ "eval_steps_per_second": 2.087
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c129de941d14268599c41de3b73b6af275457445a117b4fb84eae464de58f04
3
  size 2950790023
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35721e0aa79f878718ecc57475a889391a1d6b6404c863fc4fc0d17a8d9ca7c3
3
  size 2950790023
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.0,
3
- "train_loss": 0.0,
4
- "train_runtime": 0.057,
5
  "train_samples": 87866,
6
- "train_samples_per_second": 1541330.369,
7
- "train_steps_per_second": 192661.911
8
  }
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.0359802827722491,
4
+ "train_runtime": 7164.0026,
5
  "train_samples": 87866,
6
+ "train_samples_per_second": 36.795,
7
+ "train_steps_per_second": 4.599
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9999544771703008,
5
- "global_step": 21966,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -285,17 +285,158 @@
285
  },
286
  {
287
  "epoch": 2.0,
288
- "step": 21966,
289
- "total_flos": 3.80468091420672e+17,
290
- "train_loss": 0.0,
291
- "train_runtime": 0.057,
292
- "train_samples_per_second": 1541330.369,
293
- "train_steps_per_second": 192661.911
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  }
295
  ],
296
- "max_steps": 10983,
297
- "num_train_epochs": 1,
298
- "total_flos": 3.80468091420672e+17,
299
  "trial_name": null,
300
  "trial_params": null
301
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.999954477170301,
5
+ "global_step": 32949,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
285
  },
286
  {
287
  "epoch": 2.0,
288
+ "learning_rate": 3.3331303844865904e-05,
289
+ "loss": 0.138,
290
+ "step": 22000
291
+ },
292
+ {
293
+ "epoch": 2.05,
294
+ "learning_rate": 3.1809187494292065e-05,
295
+ "loss": 0.104,
296
+ "step": 22500
297
+ },
298
+ {
299
+ "epoch": 2.09,
300
+ "learning_rate": 3.0287071143718226e-05,
301
+ "loss": 0.112,
302
+ "step": 23000
303
+ },
304
+ {
305
+ "epoch": 2.14,
306
+ "learning_rate": 2.876495479314439e-05,
307
+ "loss": 0.1109,
308
+ "step": 23500
309
+ },
310
+ {
311
+ "epoch": 2.19,
312
+ "learning_rate": 2.724283844257055e-05,
313
+ "loss": 0.1059,
314
+ "step": 24000
315
+ },
316
+ {
317
+ "epoch": 2.23,
318
+ "learning_rate": 2.5720722091996712e-05,
319
+ "loss": 0.1048,
320
+ "step": 24500
321
+ },
322
+ {
323
+ "epoch": 2.28,
324
+ "learning_rate": 2.4198605741422876e-05,
325
+ "loss": 0.1067,
326
+ "step": 25000
327
+ },
328
+ {
329
+ "epoch": 2.32,
330
+ "learning_rate": 2.2676489390849037e-05,
331
+ "loss": 0.1168,
332
+ "step": 25500
333
+ },
334
+ {
335
+ "epoch": 2.37,
336
+ "learning_rate": 2.11543730402752e-05,
337
+ "loss": 0.1084,
338
+ "step": 26000
339
+ },
340
+ {
341
+ "epoch": 2.41,
342
+ "learning_rate": 1.9632256689701363e-05,
343
+ "loss": 0.1146,
344
+ "step": 26500
345
+ },
346
+ {
347
+ "epoch": 2.46,
348
+ "learning_rate": 1.8110140339127524e-05,
349
+ "loss": 0.107,
350
+ "step": 27000
351
+ },
352
+ {
353
+ "epoch": 2.5,
354
+ "learning_rate": 1.6588023988553685e-05,
355
+ "loss": 0.1107,
356
+ "step": 27500
357
+ },
358
+ {
359
+ "epoch": 2.55,
360
+ "learning_rate": 1.5065907637979847e-05,
361
+ "loss": 0.1087,
362
+ "step": 28000
363
+ },
364
+ {
365
+ "epoch": 2.59,
366
+ "learning_rate": 1.354379128740601e-05,
367
+ "loss": 0.1109,
368
+ "step": 28500
369
+ },
370
+ {
371
+ "epoch": 2.64,
372
+ "learning_rate": 1.2021674936832173e-05,
373
+ "loss": 0.101,
374
+ "step": 29000
375
+ },
376
+ {
377
+ "epoch": 2.69,
378
+ "learning_rate": 1.0499558586258334e-05,
379
+ "loss": 0.1066,
380
+ "step": 29500
381
+ },
382
+ {
383
+ "epoch": 2.73,
384
+ "learning_rate": 8.977442235684496e-06,
385
+ "loss": 0.1092,
386
+ "step": 30000
387
+ },
388
+ {
389
+ "epoch": 2.78,
390
+ "learning_rate": 7.455325885110659e-06,
391
+ "loss": 0.1006,
392
+ "step": 30500
393
+ },
394
+ {
395
+ "epoch": 2.82,
396
+ "learning_rate": 5.93320953453682e-06,
397
+ "loss": 0.1155,
398
+ "step": 31000
399
+ },
400
+ {
401
+ "epoch": 2.87,
402
+ "learning_rate": 4.411093183962983e-06,
403
+ "loss": 0.1204,
404
+ "step": 31500
405
+ },
406
+ {
407
+ "epoch": 2.91,
408
+ "learning_rate": 2.8889768333891442e-06,
409
+ "loss": 0.0972,
410
+ "step": 32000
411
+ },
412
+ {
413
+ "epoch": 2.96,
414
+ "learning_rate": 1.3668604828153065e-06,
415
+ "loss": 0.0929,
416
+ "step": 32500
417
+ },
418
+ {
419
+ "epoch": 3.0,
420
+ "eval_accuracy": 0.8023,
421
+ "eval_loss": 0.438219279050827,
422
+ "eval_runtime": 146.5435,
423
+ "eval_samples_per_second": 33.348,
424
+ "eval_steps_per_second": 2.088,
425
+ "step": 32949
426
+ },
427
+ {
428
+ "epoch": 3.0,
429
+ "step": 32949,
430
+ "total_flos": 5.70697807036416e+17,
431
+ "train_loss": 0.0359802827722491,
432
+ "train_runtime": 7164.0026,
433
+ "train_samples_per_second": 36.795,
434
+ "train_steps_per_second": 4.599
435
  }
436
  ],
437
+ "max_steps": 32949,
438
+ "num_train_epochs": 3,
439
+ "total_flos": 5.70697807036416e+17,
440
  "trial_name": null,
441
  "trial_params": null
442
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a2565c39287ef9a560b024aded0fa87f7dd9a636e2ef6ff73357484ea0a21c7
3
  size 2927
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d00b7d9305a03ec44724d87e1c60fe5cfecdc9a609742300dd766c2e7395c56
3
  size 2927