Training in progress, step 90, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df0c4dd1a2df7a622f7460ba10c96b316ca2e5755f8dff63c88e8647b9c85909
 size 966995080

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a770f3185b4bc959d9093b622efcb1c6a0649f574dd4cb2f803a7a0b0f5847e
 size 966995080

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60b759fe6baa60f3f159d4ecc2a95ca6aaa39ba91aded1ce9c8c352852a22ab1
 size 1925064044

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9404b3b8073985a0ad59571a09b36826009505c1e25c30b4a3bcaea415bb5aa
 size 1925064044

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41c17a6b8cedc846257b89a1ab8ff684919717e9d5cf8771c309b21cc6315f5a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5dfc9586ba42710aa4ce0391828f29de924e90b3b8e4b33ef6fb16e4e88e645f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:052f014cd69d6e12b5a27bac6856cb07fa8f819e18d6b5a58297317aa0344599
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:df8acfe66f0561ea918b9d7da9a7d6ce9312bdfd8b3c3cd4af6fca3356e7db5c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.02753303964757709,
   "eval_steps": 10,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -19,6 +19,12 @@
       "learning_rate": 9.200000000000001e-07,
       "loss": 2.3577,
       "step": 50
     }
   ],
   "logging_steps": 25,
@@ -26,7 +32,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 10,
-  "total_flos": 2.30868320256e+17,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.04955947136563876,
   "eval_steps": 10,
+  "global_step": 90,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.200000000000001e-07,
       "loss": 2.3577,
       "step": 50
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 1.42e-06,
+      "loss": 1.7546,
+      "step": 75
     }
   ],
   "logging_steps": 25,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 10,
+  "total_flos": 4.155629764608e+17,
   "trial_name": null,
   "trial_params": null
 }