Commit
·
ad160c6
1
Parent(s):
c9e72b5
up
Browse files- .gitignore +1 -0
- added_tokens.json +1 -0
- preprocessor_config.json +9 -0
- run.sh +33 -0
- runs/Nov12_13-21-38_ip-172-31-1-149/1636723590.2706728/events.out.tfevents.1636723590.ip-172-31-1-149.112667.1 +3 -0
- runs/Nov12_13-21-38_ip-172-31-1-149/events.out.tfevents.1636723590.ip-172-31-1-149.112667.0 +3 -0
- runs/Nov12_14-57-07_ip-172-31-1-149/1636729239.1256545/events.out.tfevents.1636729239.ip-172-31-1-149.114324.1 +3 -0
- runs/Nov12_14-57-07_ip-172-31-1-149/events.out.tfevents.1636729239.ip-172-31-1-149.114324.0 +3 -0
- runs/Nov12_15-16-34_ip-172-31-1-149/1636730406.7286696/events.out.tfevents.1636730406.ip-172-31-1-149.117359.1 +3 -0
- runs/Nov12_15-16-34_ip-172-31-1-149/events.out.tfevents.1636730406.ip-172-31-1-149.117359.0 +3 -0
- runs/Nov12_15-24-22_ip-172-31-1-149/1636730878.4524837/events.out.tfevents.1636730878.ip-172-31-1-149.121194.1 +3 -0
- runs/Nov12_15-24-22_ip-172-31-1-149/events.out.tfevents.1636730878.ip-172-31-1-149.121194.0 +3 -0
- runs/Nov12_15-33-27_ip-172-31-1-149/1636731418.5040016/events.out.tfevents.1636731418.ip-172-31-1-149.122905.1 +3 -0
- runs/Nov12_15-33-27_ip-172-31-1-149/events.out.tfevents.1636731418.ip-172-31-1-149.122905.0 +3 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
checkpoint-*/
|
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<s>": 39, "</s>": 40}
|
preprocessor_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0,
|
7 |
+
"return_attention_mask": true,
|
8 |
+
"sampling_rate": 16000
|
9 |
+
}
|
run.sh
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
python -m torch.distributed.launch \
|
3 |
+
--nproc_per_node 2 run_speech_recognition_ctc.py \
|
4 |
+
--dataset_name="common_voice" \
|
5 |
+
--model_name_or_path="facebook/wav2vec2-xls-r-1b" \
|
6 |
+
--dataset_config_name="tr" \
|
7 |
+
--output_dir="./wav2vec2-xls-r-1b-common_voice-tr-ft" \
|
8 |
+
--overwrite_output_dir \
|
9 |
+
--num_train_epochs="100" \
|
10 |
+
--per_device_train_batch_size="1" \
|
11 |
+
--gradient_accumulation_steps="1" \
|
12 |
+
--learning_rate="5e-5" \
|
13 |
+
--warmup_steps="500" \
|
14 |
+
--evaluation_strategy="steps" \
|
15 |
+
--text_column_name="sentence" \
|
16 |
+
--save_steps="500" \
|
17 |
+
--eval_steps="500" \
|
18 |
+
--logging_steps="1" \
|
19 |
+
--layerdrop="0.0" \
|
20 |
+
--eval_metrics wer cer \
|
21 |
+
--save_total_limit="1" \
|
22 |
+
--mask_time_prob="0.3" \
|
23 |
+
--mask_time_length="10" \
|
24 |
+
--mask_feature_prob="0.1" \
|
25 |
+
--mask_feature_length="64" \
|
26 |
+
--freeze_feature_extractor \
|
27 |
+
--chars_to_ignore , ? . ! - \; \: \" “ % ‘ ” � \
|
28 |
+
--fp16 \
|
29 |
+
--group_by_length \
|
30 |
+
--push_to_hub \
|
31 |
+
--do_train --do_eval \
|
32 |
+
--gradient_checkpointing \
|
33 |
+
--use_auth_token
|
runs/Nov12_13-21-38_ip-172-31-1-149/1636723590.2706728/events.out.tfevents.1636723590.ip-172-31-1-149.112667.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b9e046f0527e9a37482b1a3c660409e7390d9f970df1bee51d94077cdb7ac67
|
3 |
+
size 4605
|
runs/Nov12_13-21-38_ip-172-31-1-149/events.out.tfevents.1636723590.ip-172-31-1-149.112667.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79dee9d311ec93e9fe2716cd841993183adc9bf84b239966af6e24af8d3dc87a
|
3 |
+
size 4695
|
runs/Nov12_14-57-07_ip-172-31-1-149/1636729239.1256545/events.out.tfevents.1636729239.ip-172-31-1-149.114324.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:173b3e9ea7b3030e9990d653dbc696418a98c7be98b009c88c47c5345a2f03ae
|
3 |
+
size 4605
|
runs/Nov12_14-57-07_ip-172-31-1-149/events.out.tfevents.1636729239.ip-172-31-1-149.114324.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cfa8002c1c6937e1c625ee22d636ce24f541690e64fe273256436365c996195
|
3 |
+
size 4387
|
runs/Nov12_15-16-34_ip-172-31-1-149/1636730406.7286696/events.out.tfevents.1636730406.ip-172-31-1-149.117359.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d217da7ff98cf74214bb7f4e198901c083f2b6d6ec5469cdb13d9a440e8d3b0
|
3 |
+
size 4605
|
runs/Nov12_15-16-34_ip-172-31-1-149/events.out.tfevents.1636730406.ip-172-31-1-149.117359.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ea8e7135432a92fd91d6cf094a0ed8a08804138036ff9b1bc2be3098a81ff39
|
3 |
+
size 4233
|
runs/Nov12_15-24-22_ip-172-31-1-149/1636730878.4524837/events.out.tfevents.1636730878.ip-172-31-1-149.121194.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f6aabec00b31f8e6320909cdc835c478fba5534b89a698f0f132686fa9c4d82
|
3 |
+
size 4605
|
runs/Nov12_15-24-22_ip-172-31-1-149/events.out.tfevents.1636730878.ip-172-31-1-149.121194.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22763bdfa6e96bd42c0bb8ac197da78336bd8c05db4d33fadb3255daea0abe3b
|
3 |
+
size 4849
|
runs/Nov12_15-33-27_ip-172-31-1-149/1636731418.5040016/events.out.tfevents.1636731418.ip-172-31-1-149.122905.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd6f6c1a2b3a56a97e14d3a20be69956507f9429face765269b15c52737c79d1
|
3 |
+
size 4605
|
runs/Nov12_15-33-27_ip-172-31-1-149/events.out.tfevents.1636731418.ip-172-31-1-149.122905.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d277f5e7af974b0bd0f13dbffadd861a170247fd4c4cf3f4f0cb58bfbdee5776
|
3 |
+
size 4849
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./wav2vec2-xls-r-1b-common_voice-tr-ft", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "q": 17, "r": 18, "s": 19, "t": 20, "u": 21, "v": 22, "w": 23, "x": 24, "y": 25, "z": 26, "â": 27, "ç": 28, "ë": 29, "î": 30, "ö": 31, "ü": 32, "ğ": 33, "ı": 34, "ş": 35, "̇": 36, "|": 0, "[UNK]": 37, "[PAD]": 38}
|