Training in progress, step 500

Files changed (14) hide show

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb01bd303b793ee9b03ad47871bea5cce22f3ecc606ecad28e5a2b49ef71cbf2
 size 1340618660

 version https://git-lfs.github.com/spec/v1
+oid sha256:b158fd653098d86c46de46c10ed9f58274466bc11b7d08ba4d3c4a6211456951
 size 1340618660

run-5/checkpoint-500/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:357d3682c1e4ede5f1a0b9d5f129558df5d851a090b2545fa90c4ef5be5903b6
 size 1340618660

 version https://git-lfs.github.com/spec/v1
+oid sha256:b158fd653098d86c46de46c10ed9f58274466bc11b7d08ba4d3c4a6211456951
 size 1340618660

run-5/checkpoint-500/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:000a1fc805d7ce1a0e08309195925a0fab19c31edeb84f41ccc0cd99be5c0541
 size 2681472237

 version https://git-lfs.github.com/spec/v1
+oid sha256:86ee2d9cd62ff27d32b77acdaf8604b32521014c74ec763601e2091b9b8bf1af
 size 2681472237

run-5/checkpoint-500/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c6a9c9fd1b99e6be430d5cdc6bb9521df012d61ecf8b704f1aaaffac011cf90
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b39308b250b2dde21cca6217a709d5456bcdcab3c796c6926f25c06b9c730de1
 size 14244

run-5/checkpoint-500/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6938609d41be10720f3285909fb74b78e456c7e81bdfbd969fdd1f67f9b31b60
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8dadc7a997de39d2d647fc47b4ac134868907ebf849745e928942d6ab4f4bf3
 size 1064

run-5/checkpoint-500/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.6737967914438503,
   "eval_steps": 500,
   "global_step": 500,
   "is_hyper_param_search": true,
@@ -9,41 +9,23 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 1.0,
-      "eval_accuracy": 0.6878980994224548,
-      "eval_loss": 0.5814566016197205,
-      "eval_runtime": 8.9565,
-      "eval_samples_per_second": 333.054,
-      "eval_steps_per_second": 20.879,
-      "step": 187
-    },
-    {
-      "epoch": 2.0,
-      "eval_accuracy": 0.7194100022315979,
-      "eval_loss": 0.5218587517738342,
-      "eval_runtime": 9.0661,
-      "eval_samples_per_second": 329.027,
-      "eval_steps_per_second": 20.626,
-      "step": 374
-    },
-    {
-      "epoch": 2.6737967914438503,
-      "grad_norm": 7.46340274810791,
-      "learning_rate": 1.5373335026225161e-06,
-      "loss": 0.5847,
       "step": 500
     }
   ],
   "logging_steps": 500,
-  "max_steps": 1870,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 10,
   "save_steps": 500,
-  "total_flos": 5397207649270092.0,
-  "train_batch_size": 64,
   "trial_name": null,
   "trial_params": {
-    "learning_rate": 2.098404124017595e-06,
-    "per_device_train_batch_size": 64
   }
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.6702412868632708,
   "eval_steps": 500,
   "global_step": 500,
   "is_hyper_param_search": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.6702412868632708,
+      "grad_norm": 5.984279632568359,
+      "learning_rate": 2.0252471293334746e-06,
+      "loss": 0.6163,
       "step": 500
     }
   ],
   "logging_steps": 500,
+  "max_steps": 7460,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 10,
   "save_steps": 500,
+  "total_flos": 1185529179906432.0,
+  "train_batch_size": 16,
   "trial_name": null,
   "trial_params": {
+    "learning_rate": 2.1707390208085805e-06,
+    "per_device_train_batch_size": 16
   }
 }

run-5/checkpoint-500/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a843713c73e12f359184fae200bd4db35c7e342723c6e05d613c5eed17ea8f97
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:da05404e8bdf5efeabed68f0c953ae9b437da82b1da4f9d80687c63c7ac975c6
 size 5048

run-6/checkpoint-500/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9692c44b359610754bd139a5992df3f05f8f8a1a390f1c352565a316919c6e35
 size 1340618660

 version https://git-lfs.github.com/spec/v1
+oid sha256:67447887bb219e19b0253c4ce363ae31d0f44f9960e50a3c1b151d0d4dde93cf
 size 1340618660

run-6/checkpoint-500/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58f7725749ae06d74bc1f273091e9ffd897756c4d63a540e3eebb99b520d11e4
 size 2681472237

 version https://git-lfs.github.com/spec/v1
+oid sha256:441fdd4c76699d691b8af016501a2492caeb57d6bcf0666d27cab94d2c2f81f8
 size 2681472237

run-6/checkpoint-500/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a810bb0465d3808013c33fffe8149fd14f93aa80861273d550d960733afaa60a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b39308b250b2dde21cca6217a709d5456bcdcab3c796c6926f25c06b9c730de1
 size 14244

run-6/checkpoint-500/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de4cf75190dc8cde5ab50fc4ce09574e9d14e4af413d82535995087b4c13b9e0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2044358e999d974fad94fce0b3277465b00cc97cdb9bfa74b9f1b4e710be7a6
 size 1064

run-6/checkpoint-500/trainer_state.json CHANGED Viewed

@@ -10,9 +10,9 @@
   "log_history": [
     {
       "epoch": 0.6702412868632708,
-      "grad_norm": 14.874946594238281,
-      "learning_rate": 1.2753159063918482e-06,
-      "loss": 0.6462,
       "step": 500
     }
   ],
@@ -21,11 +21,11 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 10,
   "save_steps": 500,
-  "total_flos": 1770780098630016.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": {
-    "learning_rate": 1.3669334284027569e-06,
     "per_device_train_batch_size": 16
   }
 }

   "log_history": [
     {
       "epoch": 0.6702412868632708,
+      "grad_norm": 1.9533641338348389,
+      "learning_rate": 5.731758049539664e-05,
+      "loss": 0.7033,
       "step": 500
     }
   ],
   "num_input_tokens_seen": 0,
   "num_train_epochs": 10,
   "save_steps": 500,
+  "total_flos": 1770153959360880.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": {
+    "learning_rate": 6.143522277236479e-05,
     "per_device_train_batch_size": 16
   }
 }

run-6/checkpoint-500/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:447fb76bf1cd954dcdb9d5f301776e075e204e563173456d66a06fb83f06c732
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:671cb65399c61f1ebf916b877d3ad0d22485f099eae19a204bad67a9e7b48322
 size 5048

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c539e782ca0b96266cde875b3e061950504d8d6e280a5e81a555601aa0081af
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:da05404e8bdf5efeabed68f0c953ae9b437da82b1da4f9d80687c63c7ac975c6
 size 5048