Training in progress, step 200
Browse files- .ipynb_checkpoints/eval-checkpoint.py +13 -3
- .ipynb_checkpoints/mozilla-foundation_common_voice_8_0_uz_test_eval_results-checkpoint.txt +2 -0
- .ipynb_checkpoints/run-checkpoint.sh +6 -6
- .ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py +1 -1
- config.json +5 -6
- log_mozilla-foundation_common_voice_8_0_uz_test_predictions.txt +0 -0
- log_mozilla-foundation_common_voice_8_0_uz_test_targets.txt +0 -0
- pytorch_model.bin +1 -1
- run.sh +6 -6
- run_speech_recognition_ctc.py +1 -1
- runs/Feb06_16-03-20_job-0a778896-a7e2-46e9-bcf5-016f91f242cf/1644163569.9492478/events.out.tfevents.1644163569.job-0a778896-a7e2-46e9-bcf5-016f91f242cf.775427.1 +3 -0
- runs/Feb06_16-03-20_job-0a778896-a7e2-46e9-bcf5-016f91f242cf/events.out.tfevents.1644163569.job-0a778896-a7e2-46e9-bcf5-016f91f242cf.775427.0 +3 -0
- special_tokens_map.json +1 -1
- training_args.bin +1 -1
.ipynb_checkpoints/eval-checkpoint.py
CHANGED
@@ -2,6 +2,8 @@
|
|
2 |
import argparse
|
3 |
import functools
|
4 |
import re
|
|
|
|
|
5 |
from typing import Dict
|
6 |
|
7 |
from datasets import Audio, Dataset, DatasetDict, load_dataset, load_metric
|
@@ -50,9 +52,17 @@ def log_results(result: Dataset, args: Dict[str, str]):
|
|
50 |
def normalize_text(text: str) -> str:
|
51 |
"""DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
|
52 |
|
53 |
-
chars_to_ignore_regex = '[
|
54 |
|
55 |
-
text = re.sub(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
# In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
58 |
# note that order is important here!
|
@@ -107,7 +117,7 @@ def main(args):
|
|
107 |
dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
|
108 |
|
109 |
# for testing: only process the first two examples as a test
|
110 |
-
dataset = dataset.select(range(10))
|
111 |
|
112 |
# load processor
|
113 |
feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
|
|
|
2 |
import argparse
|
3 |
import functools
|
4 |
import re
|
5 |
+
import string
|
6 |
+
import unidecode
|
7 |
from typing import Dict
|
8 |
|
9 |
from datasets import Audio, Dataset, DatasetDict, load_dataset, load_metric
|
|
|
52 |
def normalize_text(text: str) -> str:
|
53 |
"""DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
|
54 |
|
55 |
+
chars_to_ignore_regex = f'[{re.escape(string.punctuation)}]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
|
56 |
|
57 |
+
text = re.sub(
|
58 |
+
chars_to_ignore_regex,
|
59 |
+
"",
|
60 |
+
re.sub("['`´]", "’", # elsewhere probably meant as glottal stop
|
61 |
+
re.sub("([og])['`´]", "\g<1>‘", # after o/g indicate modified char
|
62 |
+
unidecode.unidecode(text).lower()
|
63 |
+
)
|
64 |
+
)
|
65 |
+
) + " "
|
66 |
|
67 |
# In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
68 |
# note that order is important here!
|
|
|
117 |
dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
|
118 |
|
119 |
# for testing: only process the first two examples as a test
|
120 |
+
# dataset = dataset.select(range(10))
|
121 |
|
122 |
# load processor
|
123 |
feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
|
.ipynb_checkpoints/mozilla-foundation_common_voice_8_0_uz_test_eval_results-checkpoint.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
WER: 0.6097560975609756
|
2 |
+
CER: 0.12727272727272726
|
.ipynb_checkpoints/run-checkpoint.sh
CHANGED
@@ -6,8 +6,8 @@ python ~/xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
|
|
6 |
--train_split_name="train[:30%]" \
|
7 |
--eval_split_name="validation[50%:]" \
|
8 |
--overwrite_output_dir \
|
9 |
-
--num_train_epochs="
|
10 |
-
--per_device_train_batch_size="
|
11 |
--per_device_eval_batch_size="8" \
|
12 |
--gradient_accumulation_steps="4" \
|
13 |
--learning_rate="3e-5" \
|
@@ -15,19 +15,19 @@ python ~/xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
|
|
15 |
--length_column_name="input_length" \
|
16 |
--evaluation_strategy="steps" \
|
17 |
--text_column_name="sentence" \
|
18 |
-
--eval_metrics
|
19 |
--save_steps="200" \
|
20 |
--eval_steps="200" \
|
21 |
--logging_steps="100" \
|
22 |
--min_duration_in_seconds="0.2" \
|
23 |
-
--layerdrop="0.
|
24 |
--activation_dropout="0.1" \
|
25 |
--save_total_limit="3" \
|
26 |
--freeze_feature_encoder \
|
27 |
-
--feat_proj_dropout="0.
|
28 |
--mask_time_prob="0.50" \
|
29 |
--mask_time_length="10" \
|
30 |
-
--mask_feature_prob="0.
|
31 |
--mask_feature_length="64" \
|
32 |
--gradient_checkpointing \
|
33 |
--use_auth_token \
|
|
|
6 |
--train_split_name="train[:30%]" \
|
7 |
--eval_split_name="validation[50%:]" \
|
8 |
--overwrite_output_dir \
|
9 |
+
--num_train_epochs="50" \
|
10 |
+
--per_device_train_batch_size="64" \
|
11 |
--per_device_eval_batch_size="8" \
|
12 |
--gradient_accumulation_steps="4" \
|
13 |
--learning_rate="3e-5" \
|
|
|
15 |
--length_column_name="input_length" \
|
16 |
--evaluation_strategy="steps" \
|
17 |
--text_column_name="sentence" \
|
18 |
+
--eval_metrics wer cer \
|
19 |
--save_steps="200" \
|
20 |
--eval_steps="200" \
|
21 |
--logging_steps="100" \
|
22 |
--min_duration_in_seconds="0.2" \
|
23 |
+
--layerdrop="0.05" \
|
24 |
--activation_dropout="0.1" \
|
25 |
--save_total_limit="3" \
|
26 |
--freeze_feature_encoder \
|
27 |
+
--feat_proj_dropout="0.05" \
|
28 |
--mask_time_prob="0.50" \
|
29 |
--mask_time_length="10" \
|
30 |
+
--mask_feature_prob="0.15" \
|
31 |
--mask_feature_length="64" \
|
32 |
--gradient_checkpointing \
|
33 |
--use_auth_token \
|
.ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py
CHANGED
@@ -557,7 +557,7 @@ def main():
|
|
557 |
"gradient_checkpointing": training_args.gradient_checkpointing,
|
558 |
"layerdrop": model_args.layerdrop,
|
559 |
"ctc_loss_reduction": model_args.ctc_loss_reduction,
|
560 |
-
"
|
561 |
"pad_token_id": tokenizer.pad_token_id,
|
562 |
"vocab_size": len(tokenizer),
|
563 |
"activation_dropout": model_args.activation_dropout,
|
|
|
557 |
"gradient_checkpointing": training_args.gradient_checkpointing,
|
558 |
"layerdrop": model_args.layerdrop,
|
559 |
"ctc_loss_reduction": model_args.ctc_loss_reduction,
|
560 |
+
"ctc_zero_infinity": True,
|
561 |
"pad_token_id": tokenizer.pad_token_id,
|
562 |
"vocab_size": len(tokenizer),
|
563 |
"activation_dropout": model_args.activation_dropout,
|
config.json
CHANGED
@@ -42,14 +42,14 @@
|
|
42 |
2
|
43 |
],
|
44 |
"ctc_loss_reduction": "mean",
|
45 |
-
"ctc_zero_infinity":
|
46 |
"diversity_loss_weight": 0.1,
|
47 |
"do_stable_layer_norm": true,
|
48 |
"eos_token_id": 2,
|
49 |
"feat_extract_activation": "gelu",
|
50 |
"feat_extract_dropout": 0.0,
|
51 |
"feat_extract_norm": "layer",
|
52 |
-
"feat_proj_dropout": 0.
|
53 |
"feat_quantizer_dropout": 0.0,
|
54 |
"final_dropout": 0.0,
|
55 |
"hidden_act": "gelu",
|
@@ -58,10 +58,10 @@
|
|
58 |
"initializer_range": 0.02,
|
59 |
"intermediate_size": 4096,
|
60 |
"layer_norm_eps": 1e-05,
|
61 |
-
"layerdrop": 0.
|
62 |
"mask_feature_length": 64,
|
63 |
"mask_feature_min_masks": 0,
|
64 |
-
"mask_feature_prob": 0.
|
65 |
"mask_time_length": 10,
|
66 |
"mask_time_min_masks": 2,
|
67 |
"mask_time_prob": 0.5,
|
@@ -103,6 +103,5 @@
|
|
103 |
"transformers_version": "4.16.2",
|
104 |
"use_weighted_layer_sum": false,
|
105 |
"vocab_size": 33,
|
106 |
-
"xvector_output_dim": 512
|
107 |
-
"zero_infinity": true
|
108 |
}
|
|
|
42 |
2
|
43 |
],
|
44 |
"ctc_loss_reduction": "mean",
|
45 |
+
"ctc_zero_infinity": true,
|
46 |
"diversity_loss_weight": 0.1,
|
47 |
"do_stable_layer_norm": true,
|
48 |
"eos_token_id": 2,
|
49 |
"feat_extract_activation": "gelu",
|
50 |
"feat_extract_dropout": 0.0,
|
51 |
"feat_extract_norm": "layer",
|
52 |
+
"feat_proj_dropout": 0.05,
|
53 |
"feat_quantizer_dropout": 0.0,
|
54 |
"final_dropout": 0.0,
|
55 |
"hidden_act": "gelu",
|
|
|
58 |
"initializer_range": 0.02,
|
59 |
"intermediate_size": 4096,
|
60 |
"layer_norm_eps": 1e-05,
|
61 |
+
"layerdrop": 0.05,
|
62 |
"mask_feature_length": 64,
|
63 |
"mask_feature_min_masks": 0,
|
64 |
+
"mask_feature_prob": 0.15,
|
65 |
"mask_time_length": 10,
|
66 |
"mask_time_min_masks": 2,
|
67 |
"mask_time_prob": 0.5,
|
|
|
103 |
"transformers_version": "4.16.2",
|
104 |
"use_weighted_layer_sum": false,
|
105 |
"vocab_size": 33,
|
106 |
+
"xvector_output_dim": 512
|
|
|
107 |
}
|
log_mozilla-foundation_common_voice_8_0_uz_test_predictions.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
log_mozilla-foundation_common_voice_8_0_uz_test_targets.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262058993
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d6216acb4d702e0e8583a087346b1ef1816c37949dcb8bdc13eeafb9338efa2
|
3 |
size 1262058993
|
run.sh
CHANGED
@@ -6,8 +6,8 @@ python ~/xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
|
|
6 |
--train_split_name="train[:30%]" \
|
7 |
--eval_split_name="validation[50%:]" \
|
8 |
--overwrite_output_dir \
|
9 |
-
--num_train_epochs="
|
10 |
-
--per_device_train_batch_size="
|
11 |
--per_device_eval_batch_size="8" \
|
12 |
--gradient_accumulation_steps="4" \
|
13 |
--learning_rate="3e-5" \
|
@@ -15,19 +15,19 @@ python ~/xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
|
|
15 |
--length_column_name="input_length" \
|
16 |
--evaluation_strategy="steps" \
|
17 |
--text_column_name="sentence" \
|
18 |
-
--eval_metrics
|
19 |
--save_steps="200" \
|
20 |
--eval_steps="200" \
|
21 |
--logging_steps="100" \
|
22 |
--min_duration_in_seconds="0.2" \
|
23 |
-
--layerdrop="0.
|
24 |
--activation_dropout="0.1" \
|
25 |
--save_total_limit="3" \
|
26 |
--freeze_feature_encoder \
|
27 |
-
--feat_proj_dropout="0.
|
28 |
--mask_time_prob="0.50" \
|
29 |
--mask_time_length="10" \
|
30 |
-
--mask_feature_prob="0.
|
31 |
--mask_feature_length="64" \
|
32 |
--gradient_checkpointing \
|
33 |
--use_auth_token \
|
|
|
6 |
--train_split_name="train[:30%]" \
|
7 |
--eval_split_name="validation[50%:]" \
|
8 |
--overwrite_output_dir \
|
9 |
+
--num_train_epochs="50" \
|
10 |
+
--per_device_train_batch_size="64" \
|
11 |
--per_device_eval_batch_size="8" \
|
12 |
--gradient_accumulation_steps="4" \
|
13 |
--learning_rate="3e-5" \
|
|
|
15 |
--length_column_name="input_length" \
|
16 |
--evaluation_strategy="steps" \
|
17 |
--text_column_name="sentence" \
|
18 |
+
--eval_metrics wer cer \
|
19 |
--save_steps="200" \
|
20 |
--eval_steps="200" \
|
21 |
--logging_steps="100" \
|
22 |
--min_duration_in_seconds="0.2" \
|
23 |
+
--layerdrop="0.05" \
|
24 |
--activation_dropout="0.1" \
|
25 |
--save_total_limit="3" \
|
26 |
--freeze_feature_encoder \
|
27 |
+
--feat_proj_dropout="0.05" \
|
28 |
--mask_time_prob="0.50" \
|
29 |
--mask_time_length="10" \
|
30 |
+
--mask_feature_prob="0.15" \
|
31 |
--mask_feature_length="64" \
|
32 |
--gradient_checkpointing \
|
33 |
--use_auth_token \
|
run_speech_recognition_ctc.py
CHANGED
@@ -557,7 +557,7 @@ def main():
|
|
557 |
"gradient_checkpointing": training_args.gradient_checkpointing,
|
558 |
"layerdrop": model_args.layerdrop,
|
559 |
"ctc_loss_reduction": model_args.ctc_loss_reduction,
|
560 |
-
"
|
561 |
"pad_token_id": tokenizer.pad_token_id,
|
562 |
"vocab_size": len(tokenizer),
|
563 |
"activation_dropout": model_args.activation_dropout,
|
|
|
557 |
"gradient_checkpointing": training_args.gradient_checkpointing,
|
558 |
"layerdrop": model_args.layerdrop,
|
559 |
"ctc_loss_reduction": model_args.ctc_loss_reduction,
|
560 |
+
"ctc_zero_infinity": True,
|
561 |
"pad_token_id": tokenizer.pad_token_id,
|
562 |
"vocab_size": len(tokenizer),
|
563 |
"activation_dropout": model_args.activation_dropout,
|
runs/Feb06_16-03-20_job-0a778896-a7e2-46e9-bcf5-016f91f242cf/1644163569.9492478/events.out.tfevents.1644163569.job-0a778896-a7e2-46e9-bcf5-016f91f242cf.775427.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0662fb29ecffb8d0f2f9bd7362d875e72af431c0d4df692fb69ed1b647e640a
|
3 |
+
size 4826
|
runs/Feb06_16-03-20_job-0a778896-a7e2-46e9-bcf5-016f91f242cf/events.out.tfevents.1644163569.job-0a778896-a7e2-46e9-bcf5-016f91f242cf.775427.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e1d89f34d1d109d3588a2fa0cdbfc551fc59a5e849ed79e813b55d20f17e4da
|
3 |
+
size 5424
|
special_tokens_map.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3055
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8038e4755ffed0897ea06919cec764a4e3039a214a6e022867aaec6e399aba3
|
3 |
size 3055
|