Training in progress, step 4000

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +141 -3
pytorch_model.bin +1 -1
runs/Apr27_18-49-25_ed0e8029d983/events.out.tfevents.1651085391.ed0e8029d983.38.0 +2 -2

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98deb32c79b42cbb7d29bd1d342401d4526eaad7b0335b7895f05b0f8569c981
 size 2217170313

 version https://git-lfs.github.com/spec/v1
+oid sha256:00dbab2c669d1749c1221f16d0db9d773e53d869a6fbdb8e8de4cffede927844
 size 2217170313

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6ab2b57357837dcb7a2ac0e9c7f4b5663e60d42f3062fdaec7b31dacc509bbd
 size 1110979563

 version https://git-lfs.github.com/spec/v1
+oid sha256:6126262be252d9a97ffe5ce05868864986d359a880a56b15b73335b718325b32
 size 1110979563

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad3fe493c6b75cf37fbf1683049b17cb20d40fca4ab7678c59ce13018603fd83
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c154dc7011c0d78a47e9f03952eaa952ff95903d7e1bbd999c35b6a645c480d
 size 14503

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c8ebafee11ba1a9f8b3828ac7ef3cc74823857c9864133f6b5adf8aed082b63
 size 559

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e4d531696be1263ac11d0d37e17da075ea84a1f159c73b303b19cb70a3843d8
 size 559

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f56d95f063d6bd3e6989f74085b8339015df8cf63e497ec203733677b486978e
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9ef11ca321da1d407dac8fbef3cc8f319dca5c16f01276dcd7d3b8d5447a0bc
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.3640661938534278,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -144,11 +144,149 @@
       "eval_samples_per_second": 21.084,
       "eval_steps_per_second": 1.32,
       "step": 2000
     }
   ],
   "max_steps": 4230,
   "num_train_epochs": 5,
-  "total_flos": 5.790864009434628e+18,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.7281323877068555,
+  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.084,
       "eval_steps_per_second": 1.32,
       "step": 2000
+    },
+    {
+      "epoch": 2.48,
+      "learning_rate": 2.5283687943262413e-05,
+      "loss": 1.1804,
+      "step": 2100
+    },
+    {
+      "epoch": 2.6,
+      "learning_rate": 2.4101654846335698e-05,
+      "loss": 1.0433,
+      "step": 2200
+    },
+    {
+      "epoch": 2.72,
+      "learning_rate": 2.2919621749408986e-05,
+      "loss": 1.0309,
+      "step": 2300
+    },
+    {
+      "epoch": 2.84,
+      "learning_rate": 2.173758865248227e-05,
+      "loss": 0.917,
+      "step": 2400
+    },
+    {
+      "epoch": 2.96,
+      "learning_rate": 2.0555555555555555e-05,
+      "loss": 0.8589,
+      "step": 2500
+    },
+    {
+      "epoch": 3.07,
+      "learning_rate": 1.9373522458628842e-05,
+      "loss": 0.7167,
+      "step": 2600
+    },
+    {
+      "epoch": 3.19,
+      "learning_rate": 1.819148936170213e-05,
+      "loss": 0.6517,
+      "step": 2700
+    },
+    {
+      "epoch": 3.31,
+      "learning_rate": 1.7009456264775415e-05,
+      "loss": 0.6548,
+      "step": 2800
+    },
+    {
+      "epoch": 3.43,
+      "learning_rate": 1.5827423167848703e-05,
+      "loss": 0.5966,
+      "step": 2900
+    },
+    {
+      "epoch": 3.55,
+      "learning_rate": 1.4645390070921985e-05,
+      "loss": 0.5682,
+      "step": 3000
+    },
+    {
+      "epoch": 3.55,
+      "eval_cer": 0.21450393954011898,
+      "eval_loss": 1.9335544109344482,
+      "eval_runtime": 33.7287,
+      "eval_samples_per_second": 22.266,
+      "eval_steps_per_second": 1.393,
+      "step": 3000
+    },
+    {
+      "epoch": 3.66,
+      "learning_rate": 1.3463356973995273e-05,
+      "loss": 0.5376,
+      "step": 3100
+    },
+    {
+      "epoch": 3.78,
+      "learning_rate": 1.2281323877068558e-05,
+      "loss": 0.4875,
+      "step": 3200
+    },
+    {
+      "epoch": 3.9,
+      "learning_rate": 1.1099290780141844e-05,
+      "loss": 0.4462,
+      "step": 3300
+    },
+    {
+      "epoch": 4.02,
+      "learning_rate": 9.91725768321513e-06,
+      "loss": 0.4393,
+      "step": 3400
+    },
+    {
+      "epoch": 4.14,
+      "learning_rate": 8.735224586288416e-06,
+      "loss": 0.365,
+      "step": 3500
+    },
+    {
+      "epoch": 4.26,
+      "learning_rate": 7.5531914893617024e-06,
+      "loss": 0.3727,
+      "step": 3600
+    },
+    {
+      "epoch": 4.37,
+      "learning_rate": 6.371158392434988e-06,
+      "loss": 0.3343,
+      "step": 3700
+    },
+    {
+      "epoch": 4.49,
+      "learning_rate": 5.189125295508274e-06,
+      "loss": 0.3357,
+      "step": 3800
+    },
+    {
+      "epoch": 4.61,
+      "learning_rate": 4.007092198581561e-06,
+      "loss": 0.3244,
+      "step": 3900
+    },
+    {
+      "epoch": 4.73,
+      "learning_rate": 2.8250591016548463e-06,
+      "loss": 0.3038,
+      "step": 4000
+    },
+    {
+      "epoch": 4.73,
+      "eval_cer": 0.12493970091654606,
+      "eval_loss": 1.5810511112213135,
+      "eval_runtime": 33.6265,
+      "eval_samples_per_second": 22.334,
+      "eval_steps_per_second": 1.398,
+      "step": 4000
     }
   ],
   "max_steps": 4230,
   "num_train_epochs": 5,
+  "total_flos": 1.1581728018869256e+19,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6ab2b57357837dcb7a2ac0e9c7f4b5663e60d42f3062fdaec7b31dacc509bbd
 size 1110979563

 version https://git-lfs.github.com/spec/v1
+oid sha256:6126262be252d9a97ffe5ce05868864986d359a880a56b15b73335b718325b32
 size 1110979563

runs/Apr27_18-49-25_ed0e8029d983/events.out.tfevents.1651085391.ed0e8029d983.38.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d68230e4e12363a24d1d81ab97cc918416ca363ebdfd77cfd8cc0f21d0d1d24
-size 84406

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1df8acbb06ed443d1ac5af36c40e9566fba066f3958eedac067c8bea2422da8
+size 88182