Update model

Browse files

Files changed (5) hide show

README.md +1 -1
all_results.json +6 -6
pytorch_model.bin +1 -1
train_results.json +6 -6
trainer_state.json +90 -3

README.md CHANGED Viewed

@@ -40,7 +40,7 @@ The following hyperparameters were used during training:
 - total_eval_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 2.0
 - mixed_precision_training: Native AMP
 ### Training results

 - total_eval_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 1.0
 - mixed_precision_training: Native AMP
 ### Training results

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 2.0,
-    "train_loss": 0.15709904239873834,
-    "train_runtime": 1021.7988,
-    "train_samples": 394099,
-    "train_samples_per_second": 771.383,
-    "train_steps_per_second": 12.053
 }

 {
+    "epoch": 1.0,
+    "train_loss": 0.1791531401204783,
+    "train_runtime": 9599.0055,
+    "train_samples": 3130671,
+    "train_samples_per_second": 326.145,
+    "train_steps_per_second": 5.096
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8cc375a9ad426fa46d5d19ae02c80cfe321c94a602c32d66159b8b71af368c54
 size 44430423

 version https://git-lfs.github.com/spec/v1
+oid sha256:c502c71f1fde2c8f9b826c9049114f5448804fc0ac4b7e158801288d0fc2f47a
 size 44430423

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 2.0,
-    "train_loss": 0.15709904239873834,
-    "train_runtime": 1021.7988,
-    "train_samples": 394099,
-    "train_samples_per_second": 771.383,
-    "train_steps_per_second": 12.053
 }

 {
+    "epoch": 1.0,
+    "train_loss": 0.1791531401204783,
+    "train_runtime": 9599.0055,
+    "train_samples": 3130671,
+    "train_samples_per_second": 326.145,
+    "train_steps_per_second": 5.096
 }

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8585972156918863,
-  "global_step": 42000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -510,11 +510,98 @@
       "learning_rate": 7.087515587628023e-06,
       "loss": 0.1726,
       "step": 42000
     }
   ],
   "max_steps": 48917,
   "num_train_epochs": 1,
-  "total_flos": 1.4653685132951552e+16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "global_step": 48917,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.087515587628023e-06,
       "loss": 0.1726,
       "step": 42000
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 6.576445816382853e-06,
+      "loss": 0.1733,
+      "step": 42500
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 6.0653760451376825e-06,
+      "loss": 0.1729,
+      "step": 43000
+    },
+    {
+      "epoch": 0.89,
+      "learning_rate": 5.555328413435002e-06,
+      "loss": 0.1719,
+      "step": 43500
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 5.044258642189832e-06,
+      "loss": 0.1737,
+      "step": 44000
+    },
+    {
+      "epoch": 0.91,
+      "learning_rate": 4.533188870944662e-06,
+      "loss": 0.1731,
+      "step": 44500
+    },
+    {
+      "epoch": 0.92,
+      "learning_rate": 4.022119099699491e-06,
+      "loss": 0.1718,
+      "step": 45000
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 3.511049328454321e-06,
+      "loss": 0.1714,
+      "step": 45500
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 2.99997955720915e-06,
+      "loss": 0.173,
+      "step": 46000
+    },
+    {
+      "epoch": 0.95,
+      "learning_rate": 2.48890978596398e-06,
+      "loss": 0.1721,
+      "step": 46500
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 1.9788621542613e-06,
+      "loss": 0.1713,
+      "step": 47000
+    },
+    {
+      "epoch": 0.97,
+      "learning_rate": 1.4677923830161294e-06,
+      "loss": 0.1738,
+      "step": 47500
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 9.56722611770959e-07,
+      "loss": 0.1709,
+      "step": 48000
+    },
+    {
+      "epoch": 0.99,
+      "learning_rate": 4.4667498006827893e-07,
+      "loss": 0.1719,
+      "step": 48500
+    },
+    {
+      "epoch": 1.0,
+      "step": 48917,
+      "total_flos": 1.7063207956905984e+16,
+      "train_loss": 0.1791531401204783,
+      "train_runtime": 9599.0055,
+      "train_samples_per_second": 326.145,
+      "train_steps_per_second": 5.096
     }
   ],
   "max_steps": 48917,
   "num_train_epochs": 1,
+  "total_flos": 1.7063207956905984e+16,
   "trial_name": null,
   "trial_params": null
 }