Model save

Files changed (9) hide show

README.md CHANGED Viewed

@@ -15,8 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
 This model was trained from scratch on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6016
-- Wer: 0.3413
 ## Model description
@@ -43,7 +43,7 @@ The following hyperparameters were used during training:
 - total_train_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 3.0
 - mixed_precision_training: Native AMP
 ### Training results
@@ -52,6 +52,9 @@ The following hyperparameters were used during training:
 |:-------------:|:-----:|:----:|:---------------:|:------:|
 | 0.6667        | 1.14  | 150  | 0.6710          | 0.3654 |
 | 0.5023        | 2.28  | 300  | 0.6016          | 0.3413 |
 ### Framework versions

 This model was trained from scratch on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5649
+- Wer: 0.3172
 ## Model description
 - total_train_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 6.0
 - mixed_precision_training: Native AMP
 ### Training results
 |:-------------:|:-----:|:----:|:---------------:|:------:|
 | 0.6667        | 1.14  | 150  | 0.6710          | 0.3654 |
 | 0.5023        | 2.28  | 300  | 0.6016          | 0.3413 |
+| 0.4384        | 3.43  | 450  | 0.5907          | 0.3325 |
+| 0.3536        | 4.57  | 600  | 0.5693          | 0.3221 |
+| 0.3158        | 5.71  | 750  | 0.5649          | 0.3172 |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
-    "epoch": 2.99,
-    "eval_loss": 0.5827092528343201,
-    "eval_runtime": 68.6039,
     "eval_samples": 1042,
-    "eval_samples_per_second": 15.189,
-    "eval_steps_per_second": 1.91,
-    "eval_wer": 0.3319146805527739,
-    "train_loss": 0.6268785054447087,
-    "train_runtime": 1592.3308,
     "train_samples": 4193,
-    "train_samples_per_second": 7.9,
-    "train_steps_per_second": 0.247
 }

 {
+    "epoch": 3.0,
+    "eval_loss": 0.5835905075073242,
+    "eval_runtime": 69.9705,
     "eval_samples": 1042,
+    "eval_samples_per_second": 14.892,
+    "eval_steps_per_second": 1.872,
+    "eval_wer": 0.3325155217304226,
+    "train_loss": 0.10885493445942421,
+    "train_runtime": 326.0744,
     "train_samples": 4193,
+    "train_samples_per_second": 38.577,
+    "train_steps_per_second": 1.205
 }

eval_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 2.99,
-    "eval_loss": 0.5827092528343201,
-    "eval_runtime": 68.6039,
     "eval_samples": 1042,
-    "eval_samples_per_second": 15.189,
-    "eval_steps_per_second": 1.91,
-    "eval_wer": 0.3319146805527739
 }

 {
+    "epoch": 3.0,
+    "eval_loss": 0.5835905075073242,
+    "eval_runtime": 69.9705,
     "eval_samples": 1042,
+    "eval_samples_per_second": 14.892,
+    "eval_steps_per_second": 1.872,
+    "eval_wer": 0.3325155217304226
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:80c3c975ba443a304094adc8ab387fa7c8c9091ba01bd4d87d6b793d57f6ec45
 size 1262012432

 version https://git-lfs.github.com/spec/v1
+oid sha256:fac84934db9c8554aab3ce8cee85785fd318dc840cc531e5fc0096f87cd29cf7
 size 1262012432

runs/Apr17_14-41-03_tranceformer/events.out.tfevents.1713365442.tranceformer.212254.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:978e0c96ef9de222bf00db0ac475a3630f887e3cbd568a320402afbb50ad5415
+size 406

runs/Apr17_14-56-37_tranceformer/events.out.tfevents.1713365920.tranceformer.213799.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:62c75486a7832f19fdb07472811621991094221e4892a336ff91f2430f734e1f
+size 9959

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 2.99,
-    "train_loss": 0.6268785054447087,
-    "train_runtime": 1592.3308,
     "train_samples": 4193,
-    "train_samples_per_second": 7.9,
-    "train_steps_per_second": 0.247
 }

 {
+    "epoch": 3.0,
+    "train_loss": 0.10885493445942421,
+    "train_runtime": 326.0744,
     "train_samples": 4193,
+    "train_samples_per_second": 38.577,
+    "train_steps_per_second": 1.205
 }

trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.988593155893536,
   "eval_steps": 150,
   "global_step": 393,
   "is_hyper_param_search": false,
@@ -69,20 +69,20 @@
       "step": 300
     },
     {
-      "epoch": 2.66,
-      "grad_norm": 0.6847591996192932,
       "learning_rate": 3.282442748091603e-05,
-      "loss": 0.4834,
       "step": 350
     },
     {
-      "epoch": 2.99,
       "step": 393,
-      "total_flos": 6.536711598368473e+18,
-      "train_loss": 0.6268785054447087,
-      "train_runtime": 1592.3308,
-      "train_samples_per_second": 7.9,
-      "train_steps_per_second": 0.247
     }
   ],
   "logging_steps": 50,
@@ -90,7 +90,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 150,
-  "total_flos": 6.536711598368473e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.9961977186311786,
   "eval_steps": 150,
   "global_step": 393,
   "is_hyper_param_search": false,
       "step": 300
     },
     {
+      "epoch": 2.67,
+      "grad_norm": 0.5447213053703308,
       "learning_rate": 3.282442748091603e-05,
+      "loss": 0.4813,
       "step": 350
     },
     {
+      "epoch": 3.0,
       "step": 393,
+      "total_flos": 6.537821345695587e+18,
+      "train_loss": 0.10885493445942421,
+      "train_runtime": 326.0744,
+      "train_samples_per_second": 38.577,
+      "train_steps_per_second": 1.205
     }
   ],
   "logging_steps": 50,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 150,
+  "total_flos": 6.537821345695587e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ab7c93030ee0675f7cb62ef84fae937633275f393809179d1c97eb8efc9f8a9
 size 4603

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b7c6b1294f1c7a5b41661b7b81e7a20f1435fd30a5935be0006c11d392ed527
 size 4603