arslanarjumand/wav2vec-read-aloud

Browse files

Files changed (4) hide show

README.md +15 -15
config.json +4 -4
model.safetensors +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -15,11 +15,11 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 973.4864
-- Pcc Accuracy: 0.7547
-- Pcc Fluency: 0.7664
-- Pcc Total Score: 0.8143
-- Pcc Content: nan
 ## Model description
@@ -38,7 +38,7 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 5.5e-05
 - train_batch_size: 2
 - eval_batch_size: 6
 - seed: 42
@@ -46,20 +46,20 @@ The following hyperparameters were used during training:
 - total_train_batch_size: 24
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
-- lr_scheduler_warmup_ratio: 0.4
-- num_epochs: 15
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Pcc Accuracy | Pcc Fluency | Pcc Total Score | Pcc Content |
 |:-------------:|:-----:|:----:|:---------------:|:------------:|:-----------:|:---------------:|:-----------:|
-| 2390.3109     | 1.95  | 500  | 2342.6951       | nan          | 0.4815      | nan             | nan         |
-| 2164.6891     | 3.9   | 1000 | 2318.7217       | nan          | 0.6461      | nan             | nan         |
-| 1078.8019     | 5.85  | 1500 | 1029.2085       | 0.6188       | 0.7014      | 0.6845          | nan         |
-| 974.6556      | 7.8   | 2000 | 985.5543        | 0.7117       | 0.7355      | 0.7743          | nan         |
-| 1002.623      | 9.75  | 2500 | 989.1628        | 0.7401       | 0.7533      | 0.7995          | nan         |
-| 947.5643      | 11.7  | 3000 | 972.3806        | 0.7507       | 0.7628      | 0.8103          | nan         |
-| 995.6286      | 13.65 | 3500 | 973.4864        | 0.7547       | 0.7664      | 0.8143          | nan         |
 ### Framework versions

 This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1115
+- Pcc Accuracy: 0.7918
+- Pcc Fluency: 0.7940
+- Pcc Total Score: 0.8472
+- Pcc Content: 0.8160
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.00055
 - train_batch_size: 2
 - eval_batch_size: 6
 - seed: 42
 - total_train_batch_size: 24
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.25
+- num_epochs: 14
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Pcc Accuracy | Pcc Fluency | Pcc Total Score | Pcc Content |
 |:-------------:|:-----:|:----:|:---------------:|:------------:|:-----------:|:---------------:|:-----------:|
+| 0.1483        | 1.94  | 500  | 0.1659          | 0.7256       | 0.6982      | 0.7616          | 0.7480      |
+| 0.1338        | 3.89  | 1000 | 0.1369          | 0.7706       | 0.7680      | 0.8154          | 0.7835      |
+| 0.124         | 5.83  | 1500 | 0.1754          | 0.6686       | 0.6459      | 0.7110          | 0.6823      |
+| 0.1147        | 7.77  | 2000 | 0.1149          | 0.7838       | 0.7848      | 0.8368          | 0.8048      |
+| 0.1024        | 9.72  | 2500 | 0.1135          | 0.7802       | 0.7819      | 0.8340          | 0.8048      |
+| 0.0945        | 11.66 | 3000 | 0.1168          | 0.7891       | 0.7876      | 0.8418          | 0.8095      |
+| 0.0945        | 13.61 | 3500 | 0.1115          | 0.7918       | 0.7940      | 0.8472          | 0.8160      |
 ### Framework versions

config.json CHANGED Viewed

@@ -11,7 +11,7 @@
   ],
   "attention_dropout": 0.0094,
   "bos_token_id": 1,
-  "classifier_proj_size": 128,
   "codevector_dim": 768,
   "conformer_conv_dropout": 0.1,
   "contrastive_logits_temperature": 0.1,
@@ -46,17 +46,17 @@
   "left_max_position_embeddings": 64,
   "mask_feature_length": 5,
   "mask_feature_min_masks": 5,
-  "mask_feature_prob": 0.0575,
   "mask_time_length": 5,
   "mask_time_min_masks": 5,
-  "mask_time_prob": 0.0585,
   "max_source_positions": 5000,
   "model_type": "wav2vec2-bert",
   "num_adapter_layers": 1,
   "num_attention_heads": 16,
   "num_codevector_groups": 2,
   "num_codevectors_per_group": 320,
-  "num_hidden_layers": 12,
   "num_negatives": 100,
   "output_hidden_size": 1024,
   "pad_token_id": 0,

   ],
   "attention_dropout": 0.0094,
   "bos_token_id": 1,
+  "classifier_proj_size": 64,
   "codevector_dim": 768,
   "conformer_conv_dropout": 0.1,
   "contrastive_logits_temperature": 0.1,
   "left_max_position_embeddings": 64,
   "mask_feature_length": 5,
   "mask_feature_min_masks": 5,
+  "mask_feature_prob": 0.00575,
   "mask_time_length": 5,
   "mask_time_min_masks": 5,
+  "mask_time_prob": 0.00585,
   "max_source_positions": 5000,
   "model_type": "wav2vec2-bert",
   "num_adapter_layers": 1,
   "num_attention_heads": 16,
   "num_codevector_groups": 2,
   "num_codevectors_per_group": 320,
+  "num_hidden_layers": 10,
   "num_negatives": 100,
   "output_hidden_size": 1024,
   "pad_token_id": 0,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4660acd2a4de557833aa1ce4fbd7ab93333e7c01cf645ab396a4e3a220db11a
-size 1206994592

 version https://git-lfs.github.com/spec/v1
+oid sha256:51cea6e9fe9f1cd5e680f7f8cb4412cd742b203bb3c39686b62ab7ab1d3af4ca
+size 1000576520

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:596191023888232fe0a588808a22c46817c7b68e607063d9fa88732a803e892a
 size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:602f8283ebf51735a04449e81f60a8b79bbd12594166992e39d583a5f9b0d583
 size 4728