End of training

Files changed (5) hide show

README.md CHANGED Viewed

@@ -37,6 +37,9 @@ The following hyperparameters were used during training:
 - eval_batch_size: 8
 - seed: 42
 - distributed_type: multi-GPU
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.1

 - eval_batch_size: 8
 - seed: 42
 - distributed_type: multi-GPU
+- num_devices: 2
+- total_train_batch_size: 16
+- total_eval_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.1

all_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "epoch": 0.0,
     "train_loss": 0.6931471824645996,
-    "train_runtime": 5.5894,
-    "train_samples_per_second": 1.431,
-    "train_steps_per_second": 0.179
 }

 {
     "epoch": 0.0,
     "train_loss": 0.6931471824645996,
+    "train_runtime": 10.6682,
+    "train_samples_per_second": 1.5,
+    "train_steps_per_second": 0.094
 }

train_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "epoch": 0.0,
     "train_loss": 0.6931471824645996,
-    "train_runtime": 5.5894,
-    "train_samples_per_second": 1.431,
-    "train_steps_per_second": 0.179
 }

 {
     "epoch": 0.0,
     "train_loss": 0.6931471824645996,
+    "train_runtime": 10.6682,
+    "train_samples_per_second": 1.5,
+    "train_steps_per_second": 0.094
 }

trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.00016066838046272492,
   "eval_steps": 500,
   "global_step": 1,
   "is_hyper_param_search": false,
@@ -11,8 +11,8 @@
     {
       "epoch": 0.0,
       "learning_rate": 0.0,
-      "logits/generated": -1.85471510887146,
-      "logits/real": -1.7835122346878052,
       "logps/generated": -667.4280395507812,
       "logps/real": -384.09954833984375,
       "loss": 0.6931,
@@ -27,9 +27,9 @@
       "step": 1,
       "total_flos": 0.0,
       "train_loss": 0.6931471824645996,
-      "train_runtime": 5.5894,
-      "train_samples_per_second": 1.431,
-      "train_steps_per_second": 0.179
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.00032133676092544985,
   "eval_steps": 500,
   "global_step": 1,
   "is_hyper_param_search": false,
     {
       "epoch": 0.0,
       "learning_rate": 0.0,
+      "logits/generated": -1.854715347290039,
+      "logits/real": -1.7835123538970947,
       "logps/generated": -667.4280395507812,
       "logps/real": -384.09954833984375,
       "loss": 0.6931,
       "step": 1,
       "total_flos": 0.0,
       "train_loss": 0.6931471824645996,
+      "train_runtime": 10.6682,
+      "train_samples_per_second": 1.5,
+      "train_steps_per_second": 0.094
     }
   ],
   "logging_steps": 10,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6d3e2546740d7f6f9c46fb71cf117eeec16eba45237766eb24cb6dd4b178259
 size 5880

 version https://git-lfs.github.com/spec/v1
+oid sha256:27400530099be3b59bee84a18af706fa5330678ad8aa576bb769c5f20bf70284
 size 5880