Wikidepia commited on
Commit
3b9b5e8
1 Parent(s): a3e2d65

Update model

Browse files
README.md CHANGED
@@ -40,7 +40,7 @@ The following hyperparameters were used during training:
40
  - total_eval_batch_size: 16
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
- - num_epochs: 2.0
44
  - mixed_precision_training: Native AMP
45
 
46
  ### Training results
40
  - total_eval_batch_size: 16
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
+ - num_epochs: 1.0
44
  - mixed_precision_training: Native AMP
45
 
46
  ### Training results
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.0,
3
- "train_loss": 0.15709904239873834,
4
- "train_runtime": 1021.7988,
5
- "train_samples": 394099,
6
- "train_samples_per_second": 771.383,
7
- "train_steps_per_second": 12.053
8
  }
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.1791531401204783,
4
+ "train_runtime": 9599.0055,
5
+ "train_samples": 3130671,
6
+ "train_samples_per_second": 326.145,
7
+ "train_steps_per_second": 5.096
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc375a9ad426fa46d5d19ae02c80cfe321c94a602c32d66159b8b71af368c54
3
  size 44430423
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c502c71f1fde2c8f9b826c9049114f5448804fc0ac4b7e158801288d0fc2f47a
3
  size 44430423
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.0,
3
- "train_loss": 0.15709904239873834,
4
- "train_runtime": 1021.7988,
5
- "train_samples": 394099,
6
- "train_samples_per_second": 771.383,
7
- "train_steps_per_second": 12.053
8
  }
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.1791531401204783,
4
+ "train_runtime": 9599.0055,
5
+ "train_samples": 3130671,
6
+ "train_samples_per_second": 326.145,
7
+ "train_steps_per_second": 5.096
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8585972156918863,
5
- "global_step": 42000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -510,11 +510,98 @@
510
  "learning_rate": 7.087515587628023e-06,
511
  "loss": 0.1726,
512
  "step": 42000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
  }
514
  ],
515
  "max_steps": 48917,
516
  "num_train_epochs": 1,
517
- "total_flos": 1.4653685132951552e+16,
518
  "trial_name": null,
519
  "trial_params": null
520
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 48917,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
510
  "learning_rate": 7.087515587628023e-06,
511
  "loss": 0.1726,
512
  "step": 42000
513
+ },
514
+ {
515
+ "epoch": 0.87,
516
+ "learning_rate": 6.576445816382853e-06,
517
+ "loss": 0.1733,
518
+ "step": 42500
519
+ },
520
+ {
521
+ "epoch": 0.88,
522
+ "learning_rate": 6.0653760451376825e-06,
523
+ "loss": 0.1729,
524
+ "step": 43000
525
+ },
526
+ {
527
+ "epoch": 0.89,
528
+ "learning_rate": 5.555328413435002e-06,
529
+ "loss": 0.1719,
530
+ "step": 43500
531
+ },
532
+ {
533
+ "epoch": 0.9,
534
+ "learning_rate": 5.044258642189832e-06,
535
+ "loss": 0.1737,
536
+ "step": 44000
537
+ },
538
+ {
539
+ "epoch": 0.91,
540
+ "learning_rate": 4.533188870944662e-06,
541
+ "loss": 0.1731,
542
+ "step": 44500
543
+ },
544
+ {
545
+ "epoch": 0.92,
546
+ "learning_rate": 4.022119099699491e-06,
547
+ "loss": 0.1718,
548
+ "step": 45000
549
+ },
550
+ {
551
+ "epoch": 0.93,
552
+ "learning_rate": 3.511049328454321e-06,
553
+ "loss": 0.1714,
554
+ "step": 45500
555
+ },
556
+ {
557
+ "epoch": 0.94,
558
+ "learning_rate": 2.99997955720915e-06,
559
+ "loss": 0.173,
560
+ "step": 46000
561
+ },
562
+ {
563
+ "epoch": 0.95,
564
+ "learning_rate": 2.48890978596398e-06,
565
+ "loss": 0.1721,
566
+ "step": 46500
567
+ },
568
+ {
569
+ "epoch": 0.96,
570
+ "learning_rate": 1.9788621542613e-06,
571
+ "loss": 0.1713,
572
+ "step": 47000
573
+ },
574
+ {
575
+ "epoch": 0.97,
576
+ "learning_rate": 1.4677923830161294e-06,
577
+ "loss": 0.1738,
578
+ "step": 47500
579
+ },
580
+ {
581
+ "epoch": 0.98,
582
+ "learning_rate": 9.56722611770959e-07,
583
+ "loss": 0.1709,
584
+ "step": 48000
585
+ },
586
+ {
587
+ "epoch": 0.99,
588
+ "learning_rate": 4.4667498006827893e-07,
589
+ "loss": 0.1719,
590
+ "step": 48500
591
+ },
592
+ {
593
+ "epoch": 1.0,
594
+ "step": 48917,
595
+ "total_flos": 1.7063207956905984e+16,
596
+ "train_loss": 0.1791531401204783,
597
+ "train_runtime": 9599.0055,
598
+ "train_samples_per_second": 326.145,
599
+ "train_steps_per_second": 5.096
600
  }
601
  ],
602
  "max_steps": 48917,
603
  "num_train_epochs": 1,
604
+ "total_flos": 1.7063207956905984e+16,
605
  "trial_name": null,
606
  "trial_params": null
607
  }