End of training

Browse files

Files changed (5) hide show

README.md +108 -0
config.json +109 -0
preprocessor_config.json +9 -0
pytorch_model.bin +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,108 @@

+---
+license: apache-2.0
+base_model: facebook/wav2vec2-xls-r-300m
+tags:
+- generated_from_trainer
+datasets:
+- common_voice_13_0
+metrics:
+- wer
+model-index:
+- name: output
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: common_voice_13_0
+      type: common_voice_13_0
+      config: hi
+      split: test
+      args: hi
+    metrics:
+    - name: Wer
+      type: wer
+      value: 1.019918009027289
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# output
+This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the common_voice_13_0 dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.7883
+- Wer: 1.0199
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0003
+- train_batch_size: 2
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 16
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 500
+- num_epochs: 30
+### Training results
+| Training Loss | Epoch | Step  | Validation Loss | Wer    |
+|:-------------:|:-----:|:-----:|:---------------:|:------:|
+| 5.92          | 0.95  | 400   | 2.9522          | 1.0026 |
+| 1.0435        | 1.89  | 800   | 0.8608          | 1.0552 |
+| 0.5354        | 2.84  | 1200  | 0.7762          | 1.0169 |
+| 0.404         | 3.79  | 1600  | 0.6984          | 1.0293 |
+| 0.3301        | 4.73  | 2000  | 0.6811          | 1.0217 |
+| 0.2745        | 5.68  | 2400  | 0.7027          | 1.0308 |
+| 0.2346        | 6.63  | 2800  | 0.7296          | 1.0185 |
+| 0.2096        | 7.57  | 3200  | 0.7148          | 1.0294 |
+| 0.1912        | 8.52  | 3600  | 0.7109          | 1.0335 |
+| 0.172         | 9.47  | 4000  | 0.7894          | 1.0252 |
+| 0.1567        | 10.41 | 4400  | 0.7592          | 1.0219 |
+| 0.1457        | 11.36 | 4800  | 0.8030          | 1.0141 |
+| 0.1337        | 12.31 | 5200  | 0.7811          | 1.0237 |
+| 0.1288        | 13.25 | 5600  | 0.7703          | 1.0188 |
+| 0.1165        | 14.2  | 6000  | 0.7728          | 1.0199 |
+| 0.105         | 15.15 | 6400  | 0.7934          | 1.0206 |
+| 0.1028        | 16.09 | 6800  | 0.7978          | 1.0185 |
+| 0.092         | 17.04 | 7200  | 0.8276          | 1.0289 |
+| 0.0901        | 17.99 | 7600  | 0.7881          | 1.0202 |
+| 0.0818        | 18.93 | 8000  | 0.7847          | 1.0162 |
+| 0.0801        | 19.88 | 8400  | 0.8142          | 1.0230 |
+| 0.0768        | 20.83 | 8800  | 0.7735          | 1.0215 |
+| 0.0721        | 21.78 | 9200  | 0.7941          | 1.0227 |
+| 0.0658        | 22.72 | 9600  | 0.8100          | 1.0219 |
+| 0.0627        | 23.67 | 10000 | 0.7592          | 1.0196 |
+| 0.0591        | 24.62 | 10400 | 0.8028          | 1.0210 |
+| 0.0537        | 25.56 | 10800 | 0.8019          | 1.0253 |
+| 0.0507        | 26.51 | 11200 | 0.7951          | 1.0212 |
+| 0.0495        | 27.46 | 11600 | 0.7893          | 1.0207 |
+| 0.0466        | 28.4  | 12000 | 0.7854          | 1.0188 |
+| 0.0431        | 29.35 | 12400 | 0.7883          | 1.0199 |
+### Framework versions
+- Transformers 4.32.1
+- Pytorch 2.2.0+cu121
+- Datasets 2.12.0
+- Tokenizers 0.13.2

config.json ADDED Viewed

	@@ -0,0 +1,109 @@

+{
+  "_name_or_path": "facebook/wav2vec2-xls-r-300m",
+  "activation_dropout": 0.0,
+  "adapter_attn_dim": null,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 768,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.0,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 24,
+  "num_negatives": 100,
+  "output_hidden_size": 1024,
+  "pad_token_id": 100,
+  "proj_codevector_dim": 768,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.32.1",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 102,
+  "xvector_output_dim": 512
+}

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24c2429c7148cb6f3d7db960bd0db87b4a02593fd8efa7465446c88dd917eaf6
+size 1262320554

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:151ceed9b3e926e02a9ab2312f33ff4816de2fa220db7dc87f19b2f110c7c22e
+size 4408