SpeechResearch/wtimit-base-normal-all-nofreeze

Browse files

Files changed (5) hide show

README.md +199 -0
config.json +118 -0
model.safetensors +3 -0
preprocessor_config.json +9 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,199 @@

+---
+license: apache-2.0
+base_model: facebook/wav2vec2-base
+tags:
+- generated_from_trainer
+datasets:
+- wtimit_asr
+metrics:
+- wer
+model-index:
+- name: wtimit-base-normal-all-nofreeze
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: wtimit_asr
+      type: wtimit_asr
+      config: clean
+      split: None
+      args: clean
+    metrics:
+    - name: Wer
+      type: wer
+      value: 0.09987953700309014
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# wtimit-base-normal-all-nofreeze
+This model is a fine-tuned version of [facebook/wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base) on the wtimit_asr dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.3190
+- Wer: 0.0999
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 1000
+- num_epochs: 50
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step   | Validation Loss | Wer    |
+|:-------------:|:-----:|:------:|:---------------:|:------:|
+| 1.5076        | 0.4   | 1000   | 1.1220          | 0.6793 |
+| 0.4102        | 0.81  | 2000   | 0.7851          | 0.4338 |
+| 0.2278        | 1.21  | 3000   | 0.6897          | 0.3203 |
+| 0.1723        | 1.61  | 4000   | 0.5668          | 0.2890 |
+| 0.1407        | 2.02  | 5000   | 0.4399          | 0.2362 |
+| 0.117         | 2.42  | 6000   | 0.4853          | 0.2508 |
+| 0.098         | 2.83  | 7000   | 0.6732          | 0.2871 |
+| 0.0862        | 3.23  | 8000   | 0.5802          | 0.2680 |
+| 0.0806        | 3.63  | 9000   | 0.4730          | 0.2488 |
+| 0.0706        | 4.04  | 10000  | 0.4001          | 0.1953 |
+| 0.061         | 4.44  | 11000  | 0.4108          | 0.1971 |
+| 0.063         | 4.84  | 12000  | 0.4544          | 0.2056 |
+| 0.0527        | 5.25  | 13000  | 0.4235          | 0.1938 |
+| 0.049         | 5.65  | 14000  | 0.4375          | 0.2054 |
+| 0.0489        | 6.06  | 15000  | 0.5451          | 0.2522 |
+| 0.0473        | 6.46  | 16000  | 0.3939          | 0.1868 |
+| 0.0442        | 6.86  | 17000  | 0.5662          | 0.2548 |
+| 0.0428        | 7.27  | 18000  | 0.6695          | 0.2755 |
+| 0.0379        | 7.67  | 19000  | 0.3929          | 0.1947 |
+| 0.0398        | 8.07  | 20000  | 0.4446          | 0.2066 |
+| 0.0336        | 8.48  | 21000  | 0.5409          | 0.2260 |
+| 0.0316        | 8.88  | 22000  | 0.3819          | 0.1715 |
+| 0.0322        | 9.29  | 23000  | 0.3861          | 0.1711 |
+| 0.0352        | 9.69  | 24000  | 0.4063          | 0.1728 |
+| 0.0315        | 10.09 | 25000  | 0.4992          | 0.2146 |
+| 0.0254        | 10.5  | 26000  | 0.5838          | 0.2158 |
+| 0.0243        | 10.9  | 27000  | 0.3458          | 0.1523 |
+| 0.0245        | 11.3  | 28000  | 0.5121          | 0.1953 |
+| 0.0231        | 11.71 | 29000  | 0.3773          | 0.1616 |
+| 0.0202        | 12.11 | 30000  | 0.4110          | 0.1715 |
+| 0.0261        | 12.52 | 31000  | 0.5376          | 0.2116 |
+| 0.0243        | 12.92 | 32000  | 0.4066          | 0.1569 |
+| 0.0201        | 13.32 | 33000  | 0.5944          | 0.2276 |
+| 0.0211        | 13.73 | 34000  | 0.4670          | 0.1997 |
+| 0.0249        | 14.13 | 35000  | 0.5521          | 0.2254 |
+| 0.021         | 14.53 | 36000  | 0.4602          | 0.2061 |
+| 0.0169        | 14.94 | 37000  | 0.4870          | 0.1690 |
+| 0.0184        | 15.34 | 38000  | 0.6038          | 0.2208 |
+| 0.0207        | 15.74 | 39000  | 0.5266          | 0.2068 |
+| 0.0209        | 16.15 | 40000  | 0.5197          | 0.2083 |
+| 0.0175        | 16.55 | 41000  | 0.5074          | 0.1927 |
+| 0.0164        | 16.96 | 42000  | 0.4594          | 0.1615 |
+| 0.0164        | 17.36 | 43000  | 0.2956          | 0.1151 |
+| 0.0142        | 17.76 | 44000  | 0.3834          | 0.1580 |
+| 0.0139        | 18.17 | 45000  | 0.5316          | 0.2175 |
+| 0.0181        | 18.57 | 46000  | 0.5226          | 0.1890 |
+| 0.0159        | 18.97 | 47000  | 0.4914          | 0.1689 |
+| 0.0127        | 19.38 | 48000  | 0.5454          | 0.1957 |
+| 0.0136        | 19.78 | 49000  | 0.5530          | 0.2172 |
+| 0.0129        | 20.19 | 50000  | 0.6980          | 0.2636 |
+| 0.0131        | 20.59 | 51000  | 0.3984          | 0.1379 |
+| 0.0123        | 20.99 | 52000  | 0.4925          | 0.1843 |
+| 0.0095        | 21.4  | 53000  | 0.5367          | 0.1931 |
+| 0.0124        | 21.8  | 54000  | 0.4299          | 0.1763 |
+| 0.0115        | 22.2  | 55000  | 0.4797          | 0.1803 |
+| 0.0136        | 22.61 | 56000  | 0.6638          | 0.2300 |
+| 0.0121        | 23.01 | 57000  | 0.4292          | 0.1530 |
+| 0.0097        | 23.42 | 58000  | 0.4064          | 0.1520 |
+| 0.0143        | 23.82 | 59000  | 0.4691          | 0.1771 |
+| 0.0092        | 24.22 | 60000  | 0.5134          | 0.2009 |
+| 0.0097        | 24.63 | 61000  | 0.6165          | 0.2281 |
+| 0.0078        | 25.03 | 62000  | 0.4828          | 0.1863 |
+| 0.0114        | 25.43 | 63000  | 0.4817          | 0.1868 |
+| 0.0089        | 25.84 | 64000  | 0.5137          | 0.2003 |
+| 0.0083        | 26.24 | 65000  | 0.4194          | 0.1524 |
+| 0.01          | 26.65 | 66000  | 0.3416          | 0.1332 |
+| 0.0102        | 27.05 | 67000  | 0.3834          | 0.1475 |
+| 0.0076        | 27.45 | 68000  | 0.3390          | 0.1277 |
+| 0.0085        | 27.86 | 69000  | 0.4708          | 0.1843 |
+| 0.0074        | 28.26 | 70000  | 0.4434          | 0.1530 |
+| 0.0078        | 28.66 | 71000  | 0.2942          | 0.1104 |
+| 0.0075        | 29.07 | 72000  | 0.3623          | 0.1442 |
+| 0.0066        | 29.47 | 73000  | 0.4709          | 0.1547 |
+| 0.0073        | 29.87 | 74000  | 0.5198          | 0.1750 |
+| 0.0056        | 30.28 | 75000  | 0.3083          | 0.1211 |
+| 0.0066        | 30.68 | 76000  | 0.3204          | 0.1243 |
+| 0.0048        | 31.09 | 77000  | 0.3713          | 0.1326 |
+| 0.0047        | 31.49 | 78000  | 0.3121          | 0.1018 |
+| 0.0066        | 31.89 | 79000  | 0.4510          | 0.1473 |
+| 0.0053        | 32.3  | 80000  | 0.3599          | 0.1130 |
+| 0.0058        | 32.7  | 81000  | 0.4256          | 0.1463 |
+| 0.0056        | 33.1  | 82000  | 0.4393          | 0.1605 |
+| 0.0046        | 33.51 | 83000  | 0.6327          | 0.2056 |
+| 0.0049        | 33.91 | 84000  | 0.4069          | 0.1360 |
+| 0.0031        | 34.32 | 85000  | 0.4359          | 0.1458 |
+| 0.0052        | 34.72 | 86000  | 0.2825          | 0.1032 |
+| 0.0039        | 35.12 | 87000  | 0.3545          | 0.1256 |
+| 0.003         | 35.53 | 88000  | 0.3674          | 0.1252 |
+| 0.004         | 35.93 | 89000  | 0.3849          | 0.1288 |
+| 0.0029        | 36.33 | 90000  | 0.3465          | 0.1130 |
+| 0.003         | 36.74 | 91000  | 0.4034          | 0.1294 |
+| 0.0036        | 37.14 | 92000  | 0.3456          | 0.1209 |
+| 0.0033        | 37.55 | 93000  | 0.3882          | 0.1407 |
+| 0.0037        | 37.95 | 94000  | 0.3372          | 0.1094 |
+| 0.0025        | 38.35 | 95000  | 0.3601          | 0.1137 |
+| 0.0037        | 38.76 | 96000  | 0.2804          | 0.1027 |
+| 0.0022        | 39.16 | 97000  | 0.4160          | 0.1354 |
+| 0.0027        | 39.56 | 98000  | 0.3379          | 0.1202 |
+| 0.002         | 39.97 | 99000  | 0.3462          | 0.1171 |
+| 0.0021        | 40.37 | 100000 | 0.3694          | 0.1272 |
+| 0.0014        | 40.78 | 101000 | 0.3315          | 0.1048 |
+| 0.0025        | 41.18 | 102000 | 0.3316          | 0.1088 |
+| 0.002         | 41.58 | 103000 | 0.3776          | 0.1319 |
+| 0.0028        | 41.99 | 104000 | 0.3024          | 0.1028 |
+| 0.0015        | 42.39 | 105000 | 0.3087          | 0.1102 |
+| 0.0018        | 42.79 | 106000 | 0.3254          | 0.1067 |
+| 0.0028        | 43.2  | 107000 | 0.3305          | 0.1081 |
+| 0.002         | 43.6  | 108000 | 0.3445          | 0.1120 |
+| 0.0019        | 44.0  | 109000 | 0.3264          | 0.1082 |
+| 0.0019        | 44.41 | 110000 | 0.3650          | 0.1202 |
+| 0.001         | 44.81 | 111000 | 0.3415          | 0.1133 |
+| 0.0015        | 45.22 | 112000 | 0.3194          | 0.1044 |
+| 0.0011        | 45.62 | 113000 | 0.3302          | 0.1085 |
+| 0.0013        | 46.02 | 114000 | 0.3083          | 0.1053 |
+| 0.0008        | 46.43 | 115000 | 0.2976          | 0.0982 |
+| 0.0019        | 46.83 | 116000 | 0.3212          | 0.1057 |
+| 0.0006        | 47.23 | 117000 | 0.3415          | 0.1089 |
+| 0.0025        | 47.64 | 118000 | 0.3188          | 0.1043 |
+| 0.0009        | 48.04 | 119000 | 0.3136          | 0.1025 |
+| 0.0015        | 48.45 | 120000 | 0.3180          | 0.1050 |
+| 0.0013        | 48.85 | 121000 | 0.3439          | 0.1110 |
+| 0.0007        | 49.25 | 122000 | 0.3286          | 0.1048 |
+| 0.0014        | 49.66 | 123000 | 0.3190          | 0.0999 |
+### Framework versions
+- Transformers 4.39.3
+- Pytorch 2.0.1+cu117
+- Datasets 2.18.0
+- Tokenizers 0.15.2

config.json ADDED Viewed

	@@ -0,0 +1,118 @@

+{
+  "_name_or_path": "facebook/wav2vec2-base",
+  "activation_dropout": 0.0,
+  "adapter_attn_dim": null,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 256,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": false,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "freeze_feat_extract_train": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_channel_length": 10,
+  "mask_channel_min_space": 1,
+  "mask_channel_other": 0.0,
+  "mask_channel_prob": 0.0,
+  "mask_channel_selection": "static",
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_min_space": 1,
+  "mask_time_other": 0.0,
+  "mask_time_prob": 0.05,
+  "mask_time_selection": "static",
+  "model_type": "wav2vec2",
+  "no_mask_channel_overlap": false,
+  "no_mask_time_overlap": false,
+  "num_adapter_layers": 3,
+  "num_attention_heads": 12,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 12,
+  "num_negatives": 100,
+  "output_hidden_size": 768,
+  "pad_token_id": 29,
+  "proj_codevector_dim": 256,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.3",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f575b545fffd1953aa07fe018005f19e497e8a4fcb5d96b8e42bb7c6a94e4b33
+size 377611072

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:045d0b96326a5a4cff916fed2db4d066d80cccd2322ab47447c9947983f250eb
+size 4475