mattlc commited on
Commit
947bd63
1 Parent(s): 9700680

Model save

Browse files
README.md CHANGED
@@ -15,8 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model was trained from scratch on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.6016
19
- - Wer: 0.3413
20
 
21
  ## Model description
22
 
@@ -43,7 +43,7 @@ The following hyperparameters were used during training:
43
  - total_train_batch_size: 32
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
- - num_epochs: 3.0
47
  - mixed_precision_training: Native AMP
48
 
49
  ### Training results
@@ -52,6 +52,9 @@ The following hyperparameters were used during training:
52
  |:-------------:|:-----:|:----:|:---------------:|:------:|
53
  | 0.6667 | 1.14 | 150 | 0.6710 | 0.3654 |
54
  | 0.5023 | 2.28 | 300 | 0.6016 | 0.3413 |
 
 
 
55
 
56
 
57
  ### Framework versions
 
15
 
16
  This model was trained from scratch on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.5649
19
+ - Wer: 0.3172
20
 
21
  ## Model description
22
 
 
43
  - total_train_batch_size: 32
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
+ - num_epochs: 6.0
47
  - mixed_precision_training: Native AMP
48
 
49
  ### Training results
 
52
  |:-------------:|:-----:|:----:|:---------------:|:------:|
53
  | 0.6667 | 1.14 | 150 | 0.6710 | 0.3654 |
54
  | 0.5023 | 2.28 | 300 | 0.6016 | 0.3413 |
55
+ | 0.4384 | 3.43 | 450 | 0.5907 | 0.3325 |
56
+ | 0.3536 | 4.57 | 600 | 0.5693 | 0.3221 |
57
+ | 0.3158 | 5.71 | 750 | 0.5649 | 0.3172 |
58
 
59
 
60
  ### Framework versions
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 2.99,
3
- "eval_loss": 0.5827092528343201,
4
- "eval_runtime": 68.6039,
5
  "eval_samples": 1042,
6
- "eval_samples_per_second": 15.189,
7
- "eval_steps_per_second": 1.91,
8
- "eval_wer": 0.3319146805527739,
9
- "train_loss": 0.6268785054447087,
10
- "train_runtime": 1592.3308,
11
  "train_samples": 4193,
12
- "train_samples_per_second": 7.9,
13
- "train_steps_per_second": 0.247
14
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.5835905075073242,
4
+ "eval_runtime": 69.9705,
5
  "eval_samples": 1042,
6
+ "eval_samples_per_second": 14.892,
7
+ "eval_steps_per_second": 1.872,
8
+ "eval_wer": 0.3325155217304226,
9
+ "train_loss": 0.10885493445942421,
10
+ "train_runtime": 326.0744,
11
  "train_samples": 4193,
12
+ "train_samples_per_second": 38.577,
13
+ "train_steps_per_second": 1.205
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.99,
3
- "eval_loss": 0.5827092528343201,
4
- "eval_runtime": 68.6039,
5
  "eval_samples": 1042,
6
- "eval_samples_per_second": 15.189,
7
- "eval_steps_per_second": 1.91,
8
- "eval_wer": 0.3319146805527739
9
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.5835905075073242,
4
+ "eval_runtime": 69.9705,
5
  "eval_samples": 1042,
6
+ "eval_samples_per_second": 14.892,
7
+ "eval_steps_per_second": 1.872,
8
+ "eval_wer": 0.3325155217304226
9
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80c3c975ba443a304094adc8ab387fa7c8c9091ba01bd4d87d6b793d57f6ec45
3
  size 1262012432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac84934db9c8554aab3ce8cee85785fd318dc840cc531e5fc0096f87cd29cf7
3
  size 1262012432
runs/Apr17_14-41-03_tranceformer/events.out.tfevents.1713365442.tranceformer.212254.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:978e0c96ef9de222bf00db0ac475a3630f887e3cbd568a320402afbb50ad5415
3
+ size 406
runs/Apr17_14-56-37_tranceformer/events.out.tfevents.1713365920.tranceformer.213799.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c75486a7832f19fdb07472811621991094221e4892a336ff91f2430f734e1f
3
+ size 9959
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.99,
3
- "train_loss": 0.6268785054447087,
4
- "train_runtime": 1592.3308,
5
  "train_samples": 4193,
6
- "train_samples_per_second": 7.9,
7
- "train_steps_per_second": 0.247
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.10885493445942421,
4
+ "train_runtime": 326.0744,
5
  "train_samples": 4193,
6
+ "train_samples_per_second": 38.577,
7
+ "train_steps_per_second": 1.205
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.988593155893536,
5
  "eval_steps": 150,
6
  "global_step": 393,
7
  "is_hyper_param_search": false,
@@ -69,20 +69,20 @@
69
  "step": 300
70
  },
71
  {
72
- "epoch": 2.66,
73
- "grad_norm": 0.6847591996192932,
74
  "learning_rate": 3.282442748091603e-05,
75
- "loss": 0.4834,
76
  "step": 350
77
  },
78
  {
79
- "epoch": 2.99,
80
  "step": 393,
81
- "total_flos": 6.536711598368473e+18,
82
- "train_loss": 0.6268785054447087,
83
- "train_runtime": 1592.3308,
84
- "train_samples_per_second": 7.9,
85
- "train_steps_per_second": 0.247
86
  }
87
  ],
88
  "logging_steps": 50,
@@ -90,7 +90,7 @@
90
  "num_input_tokens_seen": 0,
91
  "num_train_epochs": 3,
92
  "save_steps": 150,
93
- "total_flos": 6.536711598368473e+18,
94
  "train_batch_size": 16,
95
  "trial_name": null,
96
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.9961977186311786,
5
  "eval_steps": 150,
6
  "global_step": 393,
7
  "is_hyper_param_search": false,
 
69
  "step": 300
70
  },
71
  {
72
+ "epoch": 2.67,
73
+ "grad_norm": 0.5447213053703308,
74
  "learning_rate": 3.282442748091603e-05,
75
+ "loss": 0.4813,
76
  "step": 350
77
  },
78
  {
79
+ "epoch": 3.0,
80
  "step": 393,
81
+ "total_flos": 6.537821345695587e+18,
82
+ "train_loss": 0.10885493445942421,
83
+ "train_runtime": 326.0744,
84
+ "train_samples_per_second": 38.577,
85
+ "train_steps_per_second": 1.205
86
  }
87
  ],
88
  "logging_steps": 50,
 
90
  "num_input_tokens_seen": 0,
91
  "num_train_epochs": 3,
92
  "save_steps": 150,
93
+ "total_flos": 6.537821345695587e+18,
94
  "train_batch_size": 16,
95
  "trial_name": null,
96
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ab7c93030ee0675f7cb62ef84fae937633275f393809179d1c97eb8efc9f8a9
3
  size 4603
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b7c6b1294f1c7a5b41661b7b81e7a20f1435fd30a5935be0006c11d392ed527
3
  size 4603