Training in progress, step 3300

Browse files

Files changed (18) hide show

{checkpoint-2900 → checkpoint-3200}/config.json +0 -0
{checkpoint-2900 → checkpoint-3200}/optimizer.pt +1 -1
{checkpoint-2900 → checkpoint-3200}/preprocessor_config.json +0 -0
{checkpoint-3000 → checkpoint-3200}/pytorch_model.bin +1 -1
{checkpoint-3000 → checkpoint-3200}/rng_state.pth +2 -2
{checkpoint-3000 → checkpoint-3200}/scaler.pt +1 -1
{checkpoint-2900 → checkpoint-3200}/scheduler.pt +1 -1
{checkpoint-3000 → checkpoint-3200}/trainer_state.json +81 -3
{checkpoint-2900 → checkpoint-3200}/training_args.bin +0 -0
{checkpoint-3000 → checkpoint-3300}/config.json +0 -0
{checkpoint-3000 → checkpoint-3300}/optimizer.pt +1 -1
{checkpoint-3000 → checkpoint-3300}/preprocessor_config.json +0 -0
{checkpoint-2900 → checkpoint-3300}/pytorch_model.bin +1 -1
{checkpoint-2900 → checkpoint-3300}/rng_state.pth +2 -2
{checkpoint-2900 → checkpoint-3300}/scaler.pt +1 -1
{checkpoint-3000 → checkpoint-3300}/scheduler.pt +1 -1
{checkpoint-2900 → checkpoint-3300}/trainer_state.json +159 -3
{checkpoint-3000 → checkpoint-3300}/training_args.bin +0 -0

{checkpoint-2900 → checkpoint-3200}/config.json RENAMED Viewed

File without changes

{checkpoint-2900 → checkpoint-3200}/optimizer.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:459fd9cc8b47164c5e619165015eaa256572f1c2c4cf53b6619361a7675d46a1
 size 2490337809

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d0ef9d1762f7ffe1cc3a0e4748e6ebb583dea4f49da96382216e78f5647702c
 size 2490337809

{checkpoint-2900 → checkpoint-3200}/preprocessor_config.json RENAMED Viewed

File without changes

{checkpoint-3000 → checkpoint-3200}/pytorch_model.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e2564558a2c8e9677d5526af137102a80a73cbd4be834bcb3fa95a0e5c8ecef
 size 1262063089

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ae84798d472963cd276f2962517c928f65dbe900552c2adf1ac952b2b09cb73
 size 1262063089

{checkpoint-3000 → checkpoint-3200}/rng_state.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:813a474ad7f864479df922c7e373032d552ed88f79371c0f5114bee90fb0f0c6
-size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:2cdfe7c6a1cd08c22115fc108dd9766a68df8ce8189a197857ef6e07d3e9f573
+size 14503

{checkpoint-3000 → checkpoint-3200}/scaler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba461c86794433c4a650b89cfa19a2a74f927a6e243d52bc4fa86b75cc0b4312
 size 559

 version https://git-lfs.github.com/spec/v1
+oid sha256:adc06e1a20206447d0abcf52e13d1dede8b4a0f2043d99c40e3b996eae19e163
 size 559

{checkpoint-2900 → checkpoint-3200}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da5b41dc4267a4eb8832f6b51e0de784a14e4fa5985ff5c1c6135876b04560d7
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d6d7554ac67cd9c5c482a68804a21e6ce04d359c4a6dab9309204dc50f02e7d
 size 623

{checkpoint-3000 → checkpoint-3200}/trainer_state.json RENAMED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 32.96174863387978,
-  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1176,11 +1176,89 @@
       "eval_steps_per_second": 0.794,
       "eval_wer": 0.1401362475163213,
       "step": 3000
     }
   ],
   "max_steps": 4550,
   "num_train_epochs": 50,
-  "total_flos": 4.636386357187625e+19,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 35.16393442622951,
+  "global_step": 3200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_steps_per_second": 0.794,
       "eval_wer": 0.1401362475163213,
       "step": 3000
+    },
+    {
+      "epoch": 33.19,
+      "learning_rate": 0.00015019607843137257,
+      "loss": 0.8263,
+      "step": 3020
+    },
+    {
+      "epoch": 33.4,
+      "learning_rate": 0.00014823529411764705,
+      "loss": 0.8203,
+      "step": 3040
+    },
+    {
+      "epoch": 33.62,
+      "learning_rate": 0.00014627450980392157,
+      "loss": 0.8277,
+      "step": 3060
+    },
+    {
+      "epoch": 33.84,
+      "learning_rate": 0.00014431372549019607,
+      "loss": 0.8183,
+      "step": 3080
+    },
+    {
+      "epoch": 34.07,
+      "learning_rate": 0.0001423529411764706,
+      "loss": 0.8395,
+      "step": 3100
+    },
+    {
+      "epoch": 34.07,
+      "eval_loss": 0.1606692373752594,
+      "eval_runtime": 190.8222,
+      "eval_samples_per_second": 25.38,
+      "eval_steps_per_second": 0.797,
+      "eval_wer": 0.13755322168606302,
+      "step": 3100
+    },
+    {
+      "epoch": 34.28,
+      "learning_rate": 0.0001403921568627451,
+      "loss": 0.8207,
+      "step": 3120
+    },
+    {
+      "epoch": 34.5,
+      "learning_rate": 0.0001384313725490196,
+      "loss": 0.8106,
+      "step": 3140
+    },
+    {
+      "epoch": 34.72,
+      "learning_rate": 0.00013647058823529413,
+      "loss": 0.8329,
+      "step": 3160
+    },
+    {
+      "epoch": 34.94,
+      "learning_rate": 0.00013450980392156863,
+      "loss": 0.8048,
+      "step": 3180
+    },
+    {
+      "epoch": 35.16,
+      "learning_rate": 0.00013254901960784313,
+      "loss": 0.83,
+      "step": 3200
+    },
+    {
+      "epoch": 35.16,
+      "eval_loss": 0.15384173393249512,
+      "eval_runtime": 191.1912,
+      "eval_samples_per_second": 25.331,
+      "eval_steps_per_second": 0.795,
+      "eval_wer": 0.1379222253760999,
+      "step": 3200
     }
   ],
   "max_steps": 4550,
   "num_train_epochs": 50,
+  "total_flos": 4.949777145440599e+19,
   "trial_name": null,
   "trial_params": null
 }

{checkpoint-2900 → checkpoint-3200}/training_args.bin RENAMED Viewed

File without changes

{checkpoint-3000 → checkpoint-3300}/config.json RENAMED Viewed

File without changes

{checkpoint-3000 → checkpoint-3300}/optimizer.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:023713187972aa2b714a036a65a7542d7e006ad45109ce7ddc7eafb956260fe3
 size 2490337809

 version https://git-lfs.github.com/spec/v1
+oid sha256:806c8e4e7fd28f0571714cc9f9a0055970d60ee26b8981016c52d353bb4430e3
 size 2490337809

{checkpoint-3000 → checkpoint-3300}/preprocessor_config.json RENAMED Viewed

File without changes

{checkpoint-2900 → checkpoint-3300}/pytorch_model.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d39abe947b6927f79b7ffad55a6a3ba07cdd2ab995efc1159bb1ca188d440e7f
 size 1262063089

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0e5ce31e623a5f978a7686e5b6636e0db65bea23a7d3b791715106209b73f8c
 size 1262063089

{checkpoint-2900 → checkpoint-3300}/rng_state.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d40993892ed72701230daaea4a79ca45b84c615b387b7bc61e6e9982f8358e4
-size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:a716b25bdaec66a312a035315a78027e767ae161c16b37a11eeba450f275b66e
+size 14503

{checkpoint-2900 → checkpoint-3300}/scaler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:daedbf6df7d4c1927bb965444b3e2d13f98980e3a59e64cf90683cbd1e80bdf5
 size 559

 version https://git-lfs.github.com/spec/v1
+oid sha256:011849dafd5feecbd9c7cd405b92e51d3198c6a38da3d9f70b7ac2eb65d83b8f
 size 559

{checkpoint-3000 → checkpoint-3300}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fff88e552e894e867e2247a229e76319848f876eaeb56445f8251bd260defd4d
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa9db20ef4c55522e29abf089521aed25de7e4d0ceb54efd10455bf35f8ac946
 size 623

{checkpoint-2900 → checkpoint-3300}/trainer_state.json RENAMED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 31.863387978142075,
-  "global_step": 2900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1137,11 +1137,167 @@
       "eval_steps_per_second": 0.799,
       "eval_wer": 0.1397104740278172,
       "step": 2900
     }
   ],
   "max_steps": 4550,
   "num_train_epochs": 50,
-  "total_flos": 4.48632216197919e+19,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 36.26229508196721,
+  "global_step": 3300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_steps_per_second": 0.799,
       "eval_wer": 0.1397104740278172,
       "step": 2900
+    },
+    {
+      "epoch": 32.09,
+      "learning_rate": 0.00016,
+      "loss": 0.8634,
+      "step": 2920
+    },
+    {
+      "epoch": 32.31,
+      "learning_rate": 0.00015803921568627451,
+      "loss": 0.8351,
+      "step": 2940
+    },
+    {
+      "epoch": 32.52,
+      "learning_rate": 0.00015607843137254901,
+      "loss": 0.8427,
+      "step": 2960
+    },
+    {
+      "epoch": 32.74,
+      "learning_rate": 0.00015411764705882352,
+      "loss": 0.8324,
+      "step": 2980
+    },
+    {
+      "epoch": 32.96,
+      "learning_rate": 0.00015215686274509804,
+      "loss": 0.8491,
+      "step": 3000
+    },
+    {
+      "epoch": 32.96,
+      "eval_loss": 0.1594749242067337,
+      "eval_runtime": 191.4631,
+      "eval_samples_per_second": 25.295,
+      "eval_steps_per_second": 0.794,
+      "eval_wer": 0.1401362475163213,
+      "step": 3000
+    },
+    {
+      "epoch": 33.19,
+      "learning_rate": 0.00015019607843137257,
+      "loss": 0.8263,
+      "step": 3020
+    },
+    {
+      "epoch": 33.4,
+      "learning_rate": 0.00014823529411764705,
+      "loss": 0.8203,
+      "step": 3040
+    },
+    {
+      "epoch": 33.62,
+      "learning_rate": 0.00014627450980392157,
+      "loss": 0.8277,
+      "step": 3060
+    },
+    {
+      "epoch": 33.84,
+      "learning_rate": 0.00014431372549019607,
+      "loss": 0.8183,
+      "step": 3080
+    },
+    {
+      "epoch": 34.07,
+      "learning_rate": 0.0001423529411764706,
+      "loss": 0.8395,
+      "step": 3100
+    },
+    {
+      "epoch": 34.07,
+      "eval_loss": 0.1606692373752594,
+      "eval_runtime": 190.8222,
+      "eval_samples_per_second": 25.38,
+      "eval_steps_per_second": 0.797,
+      "eval_wer": 0.13755322168606302,
+      "step": 3100
+    },
+    {
+      "epoch": 34.28,
+      "learning_rate": 0.0001403921568627451,
+      "loss": 0.8207,
+      "step": 3120
+    },
+    {
+      "epoch": 34.5,
+      "learning_rate": 0.0001384313725490196,
+      "loss": 0.8106,
+      "step": 3140
+    },
+    {
+      "epoch": 34.72,
+      "learning_rate": 0.00013647058823529413,
+      "loss": 0.8329,
+      "step": 3160
+    },
+    {
+      "epoch": 34.94,
+      "learning_rate": 0.00013450980392156863,
+      "loss": 0.8048,
+      "step": 3180
+    },
+    {
+      "epoch": 35.16,
+      "learning_rate": 0.00013254901960784313,
+      "loss": 0.83,
+      "step": 3200
+    },
+    {
+      "epoch": 35.16,
+      "eval_loss": 0.15384173393249512,
+      "eval_runtime": 191.1912,
+      "eval_samples_per_second": 25.331,
+      "eval_steps_per_second": 0.795,
+      "eval_wer": 0.1379222253760999,
+      "step": 3200
+    },
+    {
+      "epoch": 35.38,
+      "learning_rate": 0.00013058823529411764,
+      "loss": 0.7963,
+      "step": 3220
+    },
+    {
+      "epoch": 35.6,
+      "learning_rate": 0.00012862745098039216,
+      "loss": 0.7895,
+      "step": 3240
+    },
+    {
+      "epoch": 35.82,
+      "learning_rate": 0.0001266666666666667,
+      "loss": 0.7964,
+      "step": 3260
+    },
+    {
+      "epoch": 36.04,
+      "learning_rate": 0.0001247058823529412,
+      "loss": 0.7931,
+      "step": 3280
+    },
+    {
+      "epoch": 36.26,
+      "learning_rate": 0.0001227450980392157,
+      "loss": 0.7835,
+      "step": 3300
+    },
+    {
+      "epoch": 36.26,
+      "eval_loss": 0.16020993888378143,
+      "eval_runtime": 192.1405,
+      "eval_samples_per_second": 25.206,
+      "eval_steps_per_second": 0.791,
+      "eval_wer": 0.1408458699971615,
+      "step": 3300
     }
   ],
   "max_steps": 4550,
   "num_train_epochs": 50,
+  "total_flos": 5.103177199568347e+19,
   "trial_name": null,
   "trial_params": null
 }

{checkpoint-3000 → checkpoint-3300}/training_args.bin RENAMED Viewed

File without changes