Training in progress, step 200

Browse files

Files changed (14) hide show

.ipynb_checkpoints/eval-checkpoint.py +13 -3
.ipynb_checkpoints/mozilla-foundation_common_voice_8_0_uz_test_eval_results-checkpoint.txt +2 -0
.ipynb_checkpoints/run-checkpoint.sh +6 -6
.ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py +1 -1
config.json +5 -6
log_mozilla-foundation_common_voice_8_0_uz_test_predictions.txt +0 -0
log_mozilla-foundation_common_voice_8_0_uz_test_targets.txt +0 -0
pytorch_model.bin +1 -1
run.sh +6 -6
run_speech_recognition_ctc.py +1 -1
runs/Feb06_16-03-20_job-0a778896-a7e2-46e9-bcf5-016f91f242cf/1644163569.9492478/events.out.tfevents.1644163569.job-0a778896-a7e2-46e9-bcf5-016f91f242cf.775427.1 +3 -0
runs/Feb06_16-03-20_job-0a778896-a7e2-46e9-bcf5-016f91f242cf/events.out.tfevents.1644163569.job-0a778896-a7e2-46e9-bcf5-016f91f242cf.775427.0 +3 -0
special_tokens_map.json +1 -1
training_args.bin +1 -1

.ipynb_checkpoints/eval-checkpoint.py CHANGED Viewed

@@ -2,6 +2,8 @@
 import argparse
 import functools
 import re
 from typing import Dict
 from datasets import Audio, Dataset, DatasetDict, load_dataset, load_metric
@@ -50,9 +52,17 @@ def log_results(result: Dataset, args: Dict[str, str]):
 def normalize_text(text: str) -> str:
     """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
-    chars_to_ignore_regex = '[!"%,.:;?\\_|©«¬»،؛؟‒–—’“”„…‹›−☺♂�\\\\-]'  # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
-    text = re.sub(chars_to_ignore_regex, "", text.lower())
     # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # note that order is important here!
@@ -107,7 +117,7 @@ def main(args):
     dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
     # for testing: only process the first two examples as a test
-    dataset = dataset.select(range(10))
     # load processor
     feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)

 import argparse
 import functools
 import re
+import string
+import unidecode
 from typing import Dict
 from datasets import Audio, Dataset, DatasetDict, load_dataset, load_metric
 def normalize_text(text: str) -> str:
     """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
+    chars_to_ignore_regex = f'[{re.escape(string.punctuation)}]'  # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
+    text = re.sub(
+        chars_to_ignore_regex,
+        "",
+        re.sub("['`´]", "’",   # elsewhere probably meant as glottal stop
+               re.sub("([og])['`´]", "\g<1>‘",  # after o/g indicate modified char
+                      unidecode.unidecode(text).lower()
+                     )
+              )
+    ) + " "
     # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # note that order is important here!
     dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
     # for testing: only process the first two examples as a test
+    # dataset = dataset.select(range(10))
     # load processor
     feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)

.ipynb_checkpoints/mozilla-foundation_common_voice_8_0_uz_test_eval_results-checkpoint.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ WER: 0.6097560975609756
2	+ CER: 0.12727272727272726

.ipynb_checkpoints/run-checkpoint.sh CHANGED Viewed

@@ -6,8 +6,8 @@ python ~/xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
 	--train_split_name="train[:30%]" \
 	--eval_split_name="validation[50%:]" \
 	--overwrite_output_dir \
-	--num_train_epochs="100" \
-	--per_device_train_batch_size="32" \
 	--per_device_eval_batch_size="8" \
 	--gradient_accumulation_steps="4" \
 	--learning_rate="3e-5" \
@@ -15,19 +15,19 @@ python ~/xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
 	--length_column_name="input_length" \
 	--evaluation_strategy="steps" \
 	--text_column_name="sentence" \
-	--eval_metrics="cer" \
 	--save_steps="200" \
 	--eval_steps="200" \
 	--logging_steps="100" \
 	--min_duration_in_seconds="0.2" \
-	--layerdrop="0.01" \
 	--activation_dropout="0.1" \
 	--save_total_limit="3" \
 	--freeze_feature_encoder \
-	--feat_proj_dropout="0.01" \
 	--mask_time_prob="0.50" \
 	--mask_time_length="10" \
-	--mask_feature_prob="0.25" \
 	--mask_feature_length="64" \
 	--gradient_checkpointing \
 	--use_auth_token \

 	--train_split_name="train[:30%]" \
 	--eval_split_name="validation[50%:]" \
 	--overwrite_output_dir \
+	--num_train_epochs="50" \
+	--per_device_train_batch_size="64" \
 	--per_device_eval_batch_size="8" \
 	--gradient_accumulation_steps="4" \
 	--learning_rate="3e-5" \
 	--length_column_name="input_length" \
 	--evaluation_strategy="steps" \
 	--text_column_name="sentence" \
+	--eval_metrics wer cer \
 	--save_steps="200" \
 	--eval_steps="200" \
 	--logging_steps="100" \
 	--min_duration_in_seconds="0.2" \
+	--layerdrop="0.05" \
 	--activation_dropout="0.1" \
 	--save_total_limit="3" \
 	--freeze_feature_encoder \
+	--feat_proj_dropout="0.05" \
 	--mask_time_prob="0.50" \
 	--mask_time_length="10" \
+	--mask_feature_prob="0.15" \
 	--mask_feature_length="64" \
 	--gradient_checkpointing \
 	--use_auth_token \

.ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py CHANGED Viewed

@@ -557,7 +557,7 @@ def main():
             "gradient_checkpointing": training_args.gradient_checkpointing,
             "layerdrop": model_args.layerdrop,
             "ctc_loss_reduction": model_args.ctc_loss_reduction,
-            "zero_infinity": True,
             "pad_token_id": tokenizer.pad_token_id,
             "vocab_size": len(tokenizer),
             "activation_dropout": model_args.activation_dropout,

             "gradient_checkpointing": training_args.gradient_checkpointing,
             "layerdrop": model_args.layerdrop,
             "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": True,
             "pad_token_id": tokenizer.pad_token_id,
             "vocab_size": len(tokenizer),
             "activation_dropout": model_args.activation_dropout,

config.json CHANGED Viewed

@@ -42,14 +42,14 @@
     2
   ],
   "ctc_loss_reduction": "mean",
-  "ctc_zero_infinity": false,
   "diversity_loss_weight": 0.1,
   "do_stable_layer_norm": true,
   "eos_token_id": 2,
   "feat_extract_activation": "gelu",
   "feat_extract_dropout": 0.0,
   "feat_extract_norm": "layer",
-  "feat_proj_dropout": 0.01,
   "feat_quantizer_dropout": 0.0,
   "final_dropout": 0.0,
   "hidden_act": "gelu",
@@ -58,10 +58,10 @@
   "initializer_range": 0.02,
   "intermediate_size": 4096,
   "layer_norm_eps": 1e-05,
-  "layerdrop": 0.01,
   "mask_feature_length": 64,
   "mask_feature_min_masks": 0,
-  "mask_feature_prob": 0.25,
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
   "mask_time_prob": 0.5,
@@ -103,6 +103,5 @@
   "transformers_version": "4.16.2",
   "use_weighted_layer_sum": false,
   "vocab_size": 33,
-  "xvector_output_dim": 512,
-  "zero_infinity": true
 }

     2
   ],
   "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
   "diversity_loss_weight": 0.1,
   "do_stable_layer_norm": true,
   "eos_token_id": 2,
   "feat_extract_activation": "gelu",
   "feat_extract_dropout": 0.0,
   "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.05,
   "feat_quantizer_dropout": 0.0,
   "final_dropout": 0.0,
   "hidden_act": "gelu",
   "initializer_range": 0.02,
   "intermediate_size": 4096,
   "layer_norm_eps": 1e-05,
+  "layerdrop": 0.05,
   "mask_feature_length": 64,
   "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.15,
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
   "mask_time_prob": 0.5,
   "transformers_version": "4.16.2",
   "use_weighted_layer_sum": false,
   "vocab_size": 33,
+  "xvector_output_dim": 512
 }

log_mozilla-foundation_common_voice_8_0_uz_test_predictions.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

log_mozilla-foundation_common_voice_8_0_uz_test_targets.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7063d4e144f587b5e67afd8d007b62551a94a76de9852bc5f372357bef8498e
 size 1262058993

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d6216acb4d702e0e8583a087346b1ef1816c37949dcb8bdc13eeafb9338efa2
 size 1262058993

run.sh CHANGED Viewed

@@ -6,8 +6,8 @@ python ~/xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
 	--train_split_name="train[:30%]" \
 	--eval_split_name="validation[50%:]" \
 	--overwrite_output_dir \
-	--num_train_epochs="100" \
-	--per_device_train_batch_size="32" \
 	--per_device_eval_batch_size="8" \
 	--gradient_accumulation_steps="4" \
 	--learning_rate="3e-5" \
@@ -15,19 +15,19 @@ python ~/xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
 	--length_column_name="input_length" \
 	--evaluation_strategy="steps" \
 	--text_column_name="sentence" \
-	--eval_metrics="cer" \
 	--save_steps="200" \
 	--eval_steps="200" \
 	--logging_steps="100" \
 	--min_duration_in_seconds="0.2" \
-	--layerdrop="0.01" \
 	--activation_dropout="0.1" \
 	--save_total_limit="3" \
 	--freeze_feature_encoder \
-	--feat_proj_dropout="0.01" \
 	--mask_time_prob="0.50" \
 	--mask_time_length="10" \
-	--mask_feature_prob="0.25" \
 	--mask_feature_length="64" \
 	--gradient_checkpointing \
 	--use_auth_token \

 	--train_split_name="train[:30%]" \
 	--eval_split_name="validation[50%:]" \
 	--overwrite_output_dir \
+	--num_train_epochs="50" \
+	--per_device_train_batch_size="64" \
 	--per_device_eval_batch_size="8" \
 	--gradient_accumulation_steps="4" \
 	--learning_rate="3e-5" \
 	--length_column_name="input_length" \
 	--evaluation_strategy="steps" \
 	--text_column_name="sentence" \
+	--eval_metrics wer cer \
 	--save_steps="200" \
 	--eval_steps="200" \
 	--logging_steps="100" \
 	--min_duration_in_seconds="0.2" \
+	--layerdrop="0.05" \
 	--activation_dropout="0.1" \
 	--save_total_limit="3" \
 	--freeze_feature_encoder \
+	--feat_proj_dropout="0.05" \
 	--mask_time_prob="0.50" \
 	--mask_time_length="10" \
+	--mask_feature_prob="0.15" \
 	--mask_feature_length="64" \
 	--gradient_checkpointing \
 	--use_auth_token \

run_speech_recognition_ctc.py CHANGED Viewed

@@ -557,7 +557,7 @@ def main():
             "gradient_checkpointing": training_args.gradient_checkpointing,
             "layerdrop": model_args.layerdrop,
             "ctc_loss_reduction": model_args.ctc_loss_reduction,
-            "zero_infinity": True,
             "pad_token_id": tokenizer.pad_token_id,
             "vocab_size": len(tokenizer),
             "activation_dropout": model_args.activation_dropout,

             "gradient_checkpointing": training_args.gradient_checkpointing,
             "layerdrop": model_args.layerdrop,
             "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": True,
             "pad_token_id": tokenizer.pad_token_id,
             "vocab_size": len(tokenizer),
             "activation_dropout": model_args.activation_dropout,

runs/Feb06_16-03-20_job-0a778896-a7e2-46e9-bcf5-016f91f242cf/1644163569.9492478/events.out.tfevents.1644163569.job-0a778896-a7e2-46e9-bcf5-016f91f242cf.775427.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0662fb29ecffb8d0f2f9bd7362d875e72af431c0d4df692fb69ed1b647e640a
+size 4826

runs/Feb06_16-03-20_job-0a778896-a7e2-46e9-bcf5-016f91f242cf/events.out.tfevents.1644163569.job-0a778896-a7e2-46e9-bcf5-016f91f242cf.775427.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e1d89f34d1d109d3588a2fa0cdbfc551fc59a5e849ed79e813b55d20f17e4da
+size 5424

special_tokens_map.json CHANGED Viewed

@@ -1 +1 @@

- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}

+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a661f5e8e620b98c6c94d0bb8c2ddd7376f2412096a9ce2ff8088711cea618b
 size 3055

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8038e4755ffed0897ea06919cec764a4e3039a214a6e022867aaec6e399aba3
 size 3055