lucio commited on
Commit
ec78bff
1 Parent(s): e42ea01

Training in progress, step 200

Browse files
.ipynb_checkpoints/eval-checkpoint.py CHANGED
@@ -2,6 +2,8 @@
2
  import argparse
3
  import functools
4
  import re
 
 
5
  from typing import Dict
6
 
7
  from datasets import Audio, Dataset, DatasetDict, load_dataset, load_metric
@@ -50,9 +52,17 @@ def log_results(result: Dataset, args: Dict[str, str]):
50
  def normalize_text(text: str) -> str:
51
  """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
52
 
53
- chars_to_ignore_regex = '[!"%,.:;?\\_|©«¬»،؛؟‒–—’“”„…‹›−☺♂�\\\\-]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
54
 
55
- text = re.sub(chars_to_ignore_regex, "", text.lower())
 
 
 
 
 
 
 
 
56
 
57
  # In addition, we can normalize the target text, e.g. removing new lines characters etc...
58
  # note that order is important here!
@@ -107,7 +117,7 @@ def main(args):
107
  dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
108
 
109
  # for testing: only process the first two examples as a test
110
- dataset = dataset.select(range(10))
111
 
112
  # load processor
113
  feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
2
  import argparse
3
  import functools
4
  import re
5
+ import string
6
+ import unidecode
7
  from typing import Dict
8
 
9
  from datasets import Audio, Dataset, DatasetDict, load_dataset, load_metric
52
  def normalize_text(text: str) -> str:
53
  """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
54
 
55
+ chars_to_ignore_regex = f'[{re.escape(string.punctuation)}]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
56
 
57
+ text = re.sub(
58
+ chars_to_ignore_regex,
59
+ "",
60
+ re.sub("['`´]", "’", # elsewhere probably meant as glottal stop
61
+ re.sub("([og])['`´]", "\g<1>‘", # after o/g indicate modified char
62
+ unidecode.unidecode(text).lower()
63
+ )
64
+ )
65
+ ) + " "
66
 
67
  # In addition, we can normalize the target text, e.g. removing new lines characters etc...
68
  # note that order is important here!
117
  dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
118
 
119
  # for testing: only process the first two examples as a test
120
+ # dataset = dataset.select(range(10))
121
 
122
  # load processor
123
  feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
.ipynb_checkpoints/mozilla-foundation_common_voice_8_0_uz_test_eval_results-checkpoint.txt ADDED
@@ -0,0 +1,2 @@
 
 
1
+ WER: 0.6097560975609756
2
+ CER: 0.12727272727272726
.ipynb_checkpoints/run-checkpoint.sh CHANGED
@@ -6,8 +6,8 @@ python ~/xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
6
  --train_split_name="train[:30%]" \
7
  --eval_split_name="validation[50%:]" \
8
  --overwrite_output_dir \
9
- --num_train_epochs="100" \
10
- --per_device_train_batch_size="32" \
11
  --per_device_eval_batch_size="8" \
12
  --gradient_accumulation_steps="4" \
13
  --learning_rate="3e-5" \
@@ -15,19 +15,19 @@ python ~/xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
15
  --length_column_name="input_length" \
16
  --evaluation_strategy="steps" \
17
  --text_column_name="sentence" \
18
- --eval_metrics="cer" \
19
  --save_steps="200" \
20
  --eval_steps="200" \
21
  --logging_steps="100" \
22
  --min_duration_in_seconds="0.2" \
23
- --layerdrop="0.01" \
24
  --activation_dropout="0.1" \
25
  --save_total_limit="3" \
26
  --freeze_feature_encoder \
27
- --feat_proj_dropout="0.01" \
28
  --mask_time_prob="0.50" \
29
  --mask_time_length="10" \
30
- --mask_feature_prob="0.25" \
31
  --mask_feature_length="64" \
32
  --gradient_checkpointing \
33
  --use_auth_token \
6
  --train_split_name="train[:30%]" \
7
  --eval_split_name="validation[50%:]" \
8
  --overwrite_output_dir \
9
+ --num_train_epochs="50" \
10
+ --per_device_train_batch_size="64" \
11
  --per_device_eval_batch_size="8" \
12
  --gradient_accumulation_steps="4" \
13
  --learning_rate="3e-5" \
15
  --length_column_name="input_length" \
16
  --evaluation_strategy="steps" \
17
  --text_column_name="sentence" \
18
+ --eval_metrics wer cer \
19
  --save_steps="200" \
20
  --eval_steps="200" \
21
  --logging_steps="100" \
22
  --min_duration_in_seconds="0.2" \
23
+ --layerdrop="0.05" \
24
  --activation_dropout="0.1" \
25
  --save_total_limit="3" \
26
  --freeze_feature_encoder \
27
+ --feat_proj_dropout="0.05" \
28
  --mask_time_prob="0.50" \
29
  --mask_time_length="10" \
30
+ --mask_feature_prob="0.15" \
31
  --mask_feature_length="64" \
32
  --gradient_checkpointing \
33
  --use_auth_token \
.ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py CHANGED
@@ -557,7 +557,7 @@ def main():
557
  "gradient_checkpointing": training_args.gradient_checkpointing,
558
  "layerdrop": model_args.layerdrop,
559
  "ctc_loss_reduction": model_args.ctc_loss_reduction,
560
- "zero_infinity": True,
561
  "pad_token_id": tokenizer.pad_token_id,
562
  "vocab_size": len(tokenizer),
563
  "activation_dropout": model_args.activation_dropout,
557
  "gradient_checkpointing": training_args.gradient_checkpointing,
558
  "layerdrop": model_args.layerdrop,
559
  "ctc_loss_reduction": model_args.ctc_loss_reduction,
560
+ "ctc_zero_infinity": True,
561
  "pad_token_id": tokenizer.pad_token_id,
562
  "vocab_size": len(tokenizer),
563
  "activation_dropout": model_args.activation_dropout,
config.json CHANGED
@@ -42,14 +42,14 @@
42
  2
43
  ],
44
  "ctc_loss_reduction": "mean",
45
- "ctc_zero_infinity": false,
46
  "diversity_loss_weight": 0.1,
47
  "do_stable_layer_norm": true,
48
  "eos_token_id": 2,
49
  "feat_extract_activation": "gelu",
50
  "feat_extract_dropout": 0.0,
51
  "feat_extract_norm": "layer",
52
- "feat_proj_dropout": 0.01,
53
  "feat_quantizer_dropout": 0.0,
54
  "final_dropout": 0.0,
55
  "hidden_act": "gelu",
@@ -58,10 +58,10 @@
58
  "initializer_range": 0.02,
59
  "intermediate_size": 4096,
60
  "layer_norm_eps": 1e-05,
61
- "layerdrop": 0.01,
62
  "mask_feature_length": 64,
63
  "mask_feature_min_masks": 0,
64
- "mask_feature_prob": 0.25,
65
  "mask_time_length": 10,
66
  "mask_time_min_masks": 2,
67
  "mask_time_prob": 0.5,
@@ -103,6 +103,5 @@
103
  "transformers_version": "4.16.2",
104
  "use_weighted_layer_sum": false,
105
  "vocab_size": 33,
106
- "xvector_output_dim": 512,
107
- "zero_infinity": true
108
  }
42
  2
43
  ],
44
  "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": true,
46
  "diversity_loss_weight": 0.1,
47
  "do_stable_layer_norm": true,
48
  "eos_token_id": 2,
49
  "feat_extract_activation": "gelu",
50
  "feat_extract_dropout": 0.0,
51
  "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.05,
53
  "feat_quantizer_dropout": 0.0,
54
  "final_dropout": 0.0,
55
  "hidden_act": "gelu",
58
  "initializer_range": 0.02,
59
  "intermediate_size": 4096,
60
  "layer_norm_eps": 1e-05,
61
+ "layerdrop": 0.05,
62
  "mask_feature_length": 64,
63
  "mask_feature_min_masks": 0,
64
+ "mask_feature_prob": 0.15,
65
  "mask_time_length": 10,
66
  "mask_time_min_masks": 2,
67
  "mask_time_prob": 0.5,
103
  "transformers_version": "4.16.2",
104
  "use_weighted_layer_sum": false,
105
  "vocab_size": 33,
106
+ "xvector_output_dim": 512
 
107
  }
log_mozilla-foundation_common_voice_8_0_uz_test_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
log_mozilla-foundation_common_voice_8_0_uz_test_targets.txt ADDED
The diff for this file is too large to render. See raw diff
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7063d4e144f587b5e67afd8d007b62551a94a76de9852bc5f372357bef8498e
3
  size 1262058993
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d6216acb4d702e0e8583a087346b1ef1816c37949dcb8bdc13eeafb9338efa2
3
  size 1262058993
run.sh CHANGED
@@ -6,8 +6,8 @@ python ~/xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
6
  --train_split_name="train[:30%]" \
7
  --eval_split_name="validation[50%:]" \
8
  --overwrite_output_dir \
9
- --num_train_epochs="100" \
10
- --per_device_train_batch_size="32" \
11
  --per_device_eval_batch_size="8" \
12
  --gradient_accumulation_steps="4" \
13
  --learning_rate="3e-5" \
@@ -15,19 +15,19 @@ python ~/xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
15
  --length_column_name="input_length" \
16
  --evaluation_strategy="steps" \
17
  --text_column_name="sentence" \
18
- --eval_metrics="cer" \
19
  --save_steps="200" \
20
  --eval_steps="200" \
21
  --logging_steps="100" \
22
  --min_duration_in_seconds="0.2" \
23
- --layerdrop="0.01" \
24
  --activation_dropout="0.1" \
25
  --save_total_limit="3" \
26
  --freeze_feature_encoder \
27
- --feat_proj_dropout="0.01" \
28
  --mask_time_prob="0.50" \
29
  --mask_time_length="10" \
30
- --mask_feature_prob="0.25" \
31
  --mask_feature_length="64" \
32
  --gradient_checkpointing \
33
  --use_auth_token \
6
  --train_split_name="train[:30%]" \
7
  --eval_split_name="validation[50%:]" \
8
  --overwrite_output_dir \
9
+ --num_train_epochs="50" \
10
+ --per_device_train_batch_size="64" \
11
  --per_device_eval_batch_size="8" \
12
  --gradient_accumulation_steps="4" \
13
  --learning_rate="3e-5" \
15
  --length_column_name="input_length" \
16
  --evaluation_strategy="steps" \
17
  --text_column_name="sentence" \
18
+ --eval_metrics wer cer \
19
  --save_steps="200" \
20
  --eval_steps="200" \
21
  --logging_steps="100" \
22
  --min_duration_in_seconds="0.2" \
23
+ --layerdrop="0.05" \
24
  --activation_dropout="0.1" \
25
  --save_total_limit="3" \
26
  --freeze_feature_encoder \
27
+ --feat_proj_dropout="0.05" \
28
  --mask_time_prob="0.50" \
29
  --mask_time_length="10" \
30
+ --mask_feature_prob="0.15" \
31
  --mask_feature_length="64" \
32
  --gradient_checkpointing \
33
  --use_auth_token \
run_speech_recognition_ctc.py CHANGED
@@ -557,7 +557,7 @@ def main():
557
  "gradient_checkpointing": training_args.gradient_checkpointing,
558
  "layerdrop": model_args.layerdrop,
559
  "ctc_loss_reduction": model_args.ctc_loss_reduction,
560
- "zero_infinity": True,
561
  "pad_token_id": tokenizer.pad_token_id,
562
  "vocab_size": len(tokenizer),
563
  "activation_dropout": model_args.activation_dropout,
557
  "gradient_checkpointing": training_args.gradient_checkpointing,
558
  "layerdrop": model_args.layerdrop,
559
  "ctc_loss_reduction": model_args.ctc_loss_reduction,
560
+ "ctc_zero_infinity": True,
561
  "pad_token_id": tokenizer.pad_token_id,
562
  "vocab_size": len(tokenizer),
563
  "activation_dropout": model_args.activation_dropout,
runs/Feb06_16-03-20_job-0a778896-a7e2-46e9-bcf5-016f91f242cf/1644163569.9492478/events.out.tfevents.1644163569.job-0a778896-a7e2-46e9-bcf5-016f91f242cf.775427.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0662fb29ecffb8d0f2f9bd7362d875e72af431c0d4df692fb69ed1b647e640a
3
+ size 4826
runs/Feb06_16-03-20_job-0a778896-a7e2-46e9-bcf5-016f91f242cf/events.out.tfevents.1644163569.job-0a778896-a7e2-46e9-bcf5-016f91f242cf.775427.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e1d89f34d1d109d3588a2fa0cdbfc551fc59a5e849ed79e813b55d20f17e4da
3
+ size 5424
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a661f5e8e620b98c6c94d0bb8c2ddd7376f2412096a9ce2ff8088711cea618b
3
  size 3055
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8038e4755ffed0897ea06919cec764a4e3039a214a6e022867aaec6e399aba3
3
  size 3055