First model version

Browse files

Files changed (5) hide show

README.md +157 -0
config.json +109 -0
preprocessor_config.json +9 -0
pytorch_model.bin +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,157 @@

+---
+license: apache-2.0
+base_model: facebook/wav2vec2-xls-r-300m
+tags:
+- generated_from_trainer
+datasets:
+- audiofolder
+metrics:
+- wer
+model-index:
+- name: wav2vec2-xls-r-300m-ja-cv-14_4
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: audiofolder
+      type: audiofolder
+      config: default
+      split: train[:20%]
+      args: default
+    metrics:
+    - name: Wer
+      type: wer
+      value: 0.05644302449414271
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# wav2vec2-xls-r-300m-ja-cv-14_4
+This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the audiofolder dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.2072
+- Wer: 0.0564
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 2
+- eval_batch_size: 1
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 8
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 500
+- num_epochs: 30
+### Training results
+| Training Loss | Epoch | Step   | Validation Loss | Wer    |
+|:-------------:|:-----:|:------:|:---------------:|:------:|
+| 8.3105        | 0.37  | 2000   | 1.9407          | 0.5234 |
+| 1.6271        | 0.75  | 4000   | 1.1355          | 0.3500 |
+| 1.1075        | 1.12  | 6000   | 0.8428          | 0.2740 |
+| 0.8659        | 1.49  | 8000   | 0.7523          | 0.2491 |
+| 0.7568        | 1.87  | 10000  | 0.6115          | 0.2097 |
+| 0.6269        | 2.24  | 12000  | 0.5771          | 0.1973 |
+| 0.5631        | 2.61  | 14000  | 0.5204          | 0.1713 |
+| 0.5197        | 2.99  | 16000  | 0.4896          | 0.1635 |
+| 0.4218        | 3.36  | 18000  | 0.4506          | 0.1554 |
+| 0.4069        | 3.73  | 20000  | 0.4684          | 0.1579 |
+| 0.3629        | 4.1   | 22000  | 0.4190          | 0.1408 |
+| 0.324         | 4.48  | 24000  | 0.3981          | 0.1337 |
+| 0.3168        | 4.85  | 26000  | 0.3947          | 0.1310 |
+| 0.2845        | 5.22  | 28000  | 0.3974          | 0.1266 |
+| 0.2714        | 5.6   | 30000  | 0.3703          | 0.1166 |
+| 0.2626        | 5.97  | 32000  | 0.3711          | 0.1173 |
+| 0.2274        | 6.34  | 34000  | 0.3991          | 0.1254 |
+| 0.2251        | 6.72  | 36000  | 0.3844          | 0.1159 |
+| 0.2132        | 7.09  | 38000  | 0.3480          | 0.1088 |
+| 0.1867        | 7.46  | 40000  | 0.3330          | 0.1045 |
+| 0.1929        | 7.84  | 42000  | 0.3320          | 0.1054 |
+| 0.1663        | 8.21  | 44000  | 0.3492          | 0.1116 |
+| 0.1668        | 8.58  | 46000  | 0.3580          | 0.1045 |
+| 0.1641        | 8.96  | 48000  | 0.2957          | 0.0920 |
+| 0.1494        | 9.33  | 50000  | 0.3281          | 0.0990 |
+| 0.1495        | 9.7   | 52000  | 0.3155          | 0.0948 |
+| 0.1454        | 10.07 | 54000  | 0.3297          | 0.0994 |
+| 0.1323        | 10.45 | 56000  | 0.3151          | 0.0946 |
+| 0.1321        | 10.82 | 58000  | 0.3073          | 0.0901 |
+| 0.1263        | 11.19 | 60000  | 0.2979          | 0.0887 |
+| 0.1165        | 11.57 | 62000  | 0.3122          | 0.0968 |
+| 0.1179        | 11.94 | 64000  | 0.2941          | 0.0892 |
+| 0.107         | 12.31 | 66000  | 0.2907          | 0.0847 |
+| 0.1037        | 12.69 | 68000  | 0.2964          | 0.0851 |
+| 0.105         | 13.06 | 70000  | 0.2777          | 0.0820 |
+| 0.0942        | 13.43 | 72000  | 0.2758          | 0.0783 |
+| 0.0977        | 13.81 | 74000  | 0.2706          | 0.0768 |
+| 0.0931        | 14.18 | 76000  | 0.2638          | 0.0755 |
+| 0.0881        | 14.55 | 78000  | 0.2835          | 0.0771 |
+| 0.0861        | 14.93 | 80000  | 0.2704          | 0.0776 |
+| 0.0834        | 15.3  | 82000  | 0.2619          | 0.0765 |
+| 0.079         | 15.67 | 84000  | 0.2583          | 0.0759 |
+| 0.0783        | 16.04 | 86000  | 0.2459          | 0.0724 |
+| 0.0753        | 16.42 | 88000  | 0.2647          | 0.0793 |
+| 0.0739        | 16.79 | 90000  | 0.2375          | 0.0696 |
+| 0.0721        | 17.16 | 92000  | 0.2432          | 0.0702 |
+| 0.0687        | 17.54 | 94000  | 0.2269          | 0.0675 |
+| 0.0685        | 17.91 | 96000  | 0.2516          | 0.0724 |
+| 0.066         | 18.28 | 98000  | 0.2372          | 0.0679 |
+| 0.0631        | 18.66 | 100000 | 0.2417          | 0.0670 |
+| 0.0626        | 19.03 | 102000 | 0.2416          | 0.0676 |
+| 0.0583        | 19.4  | 104000 | 0.2491          | 0.0696 |
+| 0.0575        | 19.78 | 106000 | 0.2445          | 0.0675 |
+| 0.0545        | 20.15 | 108000 | 0.2320          | 0.0635 |
+| 0.0517        | 20.52 | 110000 | 0.2312          | 0.0647 |
+| 0.0514        | 20.9  | 112000 | 0.2511          | 0.0676 |
+| 0.0499        | 21.27 | 114000 | 0.2299          | 0.0663 |
+| 0.0486        | 21.64 | 116000 | 0.2400          | 0.0635 |
+| 0.0467        | 22.01 | 118000 | 0.2318          | 0.0624 |
+| 0.0441        | 22.39 | 120000 | 0.2221          | 0.0599 |
+| 0.0441        | 22.76 | 122000 | 0.2359          | 0.0630 |
+| 0.0427        | 23.13 | 124000 | 0.2220          | 0.0603 |
+| 0.0412        | 23.51 | 126000 | 0.2345          | 0.0608 |
+| 0.041         | 23.88 | 128000 | 0.2292          | 0.0598 |
+| 0.0386        | 24.25 | 130000 | 0.2342          | 0.0615 |
+| 0.0376        | 24.63 | 132000 | 0.2291          | 0.0612 |
+| 0.0385        | 25.0  | 134000 | 0.2231          | 0.0631 |
+| 0.0347        | 25.37 | 136000 | 0.2196          | 0.0616 |
+| 0.035         | 25.75 | 138000 | 0.2147          | 0.0608 |
+| 0.0328        | 26.12 | 140000 | 0.2216          | 0.0616 |
+| 0.0318        | 26.49 | 142000 | 0.2195          | 0.0587 |
+| 0.0315        | 26.87 | 144000 | 0.2216          | 0.0594 |
+| 0.0303        | 27.24 | 146000 | 0.2126          | 0.0591 |
+| 0.0292        | 27.61 | 148000 | 0.2126          | 0.0563 |
+| 0.0291        | 27.99 | 150000 | 0.2134          | 0.0574 |
+| 0.0275        | 28.36 | 152000 | 0.2187          | 0.0583 |
+| 0.0281        | 28.73 | 154000 | 0.2098          | 0.0571 |
+| 0.0257        | 29.1  | 156000 | 0.2086          | 0.0564 |
+| 0.0261        | 29.48 | 158000 | 0.2071          | 0.0568 |
+| 0.0247        | 29.85 | 160000 | 0.2072          | 0.0564 |
+### Framework versions
+- Transformers 4.31.0
+- Pytorch 2.0.1+cu117
+- Datasets 2.14.3
+- Tokenizers 0.13.3

config.json ADDED Viewed

	@@ -0,0 +1,109 @@

+{
+  "_name_or_path": "facebook/wav2vec2-xls-r-300m",
+  "activation_dropout": 0.0,
+  "adapter_attn_dim": null,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 768,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.0,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.1,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 24,
+  "num_negatives": 100,
+  "output_hidden_size": 1024,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 768,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.31.0",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 2604,
+  "xvector_output_dim": 512
+}

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9628c65c15828e3704e011ec5f3844704b9b57b08e67177ce384fa434b9672b
+size 1272578221

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a8a09cd3b6183bd5499170b587274b4b73fae1882f01edcd1b26fcd7409f1c9
+size 4027