patrickvonplaten commited on
Commit
ad160c6
1 Parent(s): c9e72b5
.gitignore ADDED
@@ -0,0 +1 @@
 
1
+ checkpoint-*/
added_tokens.json ADDED
@@ -0,0 +1 @@
 
1
+ {"<s>": 39, "</s>": 40}
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
run.sh ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ python -m torch.distributed.launch \
3
+ --nproc_per_node 2 run_speech_recognition_ctc.py \
4
+ --dataset_name="common_voice" \
5
+ --model_name_or_path="facebook/wav2vec2-xls-r-1b" \
6
+ --dataset_config_name="tr" \
7
+ --output_dir="./wav2vec2-xls-r-1b-common_voice-tr-ft" \
8
+ --overwrite_output_dir \
9
+ --num_train_epochs="100" \
10
+ --per_device_train_batch_size="1" \
11
+ --gradient_accumulation_steps="1" \
12
+ --learning_rate="5e-5" \
13
+ --warmup_steps="500" \
14
+ --evaluation_strategy="steps" \
15
+ --text_column_name="sentence" \
16
+ --save_steps="500" \
17
+ --eval_steps="500" \
18
+ --logging_steps="1" \
19
+ --layerdrop="0.0" \
20
+ --eval_metrics wer cer \
21
+ --save_total_limit="1" \
22
+ --mask_time_prob="0.3" \
23
+ --mask_time_length="10" \
24
+ --mask_feature_prob="0.1" \
25
+ --mask_feature_length="64" \
26
+ --freeze_feature_extractor \
27
+ --chars_to_ignore , ? . ! - \; \: \" “ % ‘ ” � \
28
+ --fp16 \
29
+ --group_by_length \
30
+ --push_to_hub \
31
+ --do_train --do_eval \
32
+ --gradient_checkpointing \
33
+ --use_auth_token
runs/Nov12_13-21-38_ip-172-31-1-149/1636723590.2706728/events.out.tfevents.1636723590.ip-172-31-1-149.112667.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b9e046f0527e9a37482b1a3c660409e7390d9f970df1bee51d94077cdb7ac67
3
+ size 4605
runs/Nov12_13-21-38_ip-172-31-1-149/events.out.tfevents.1636723590.ip-172-31-1-149.112667.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79dee9d311ec93e9fe2716cd841993183adc9bf84b239966af6e24af8d3dc87a
3
+ size 4695
runs/Nov12_14-57-07_ip-172-31-1-149/1636729239.1256545/events.out.tfevents.1636729239.ip-172-31-1-149.114324.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:173b3e9ea7b3030e9990d653dbc696418a98c7be98b009c88c47c5345a2f03ae
3
+ size 4605
runs/Nov12_14-57-07_ip-172-31-1-149/events.out.tfevents.1636729239.ip-172-31-1-149.114324.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cfa8002c1c6937e1c625ee22d636ce24f541690e64fe273256436365c996195
3
+ size 4387
runs/Nov12_15-16-34_ip-172-31-1-149/1636730406.7286696/events.out.tfevents.1636730406.ip-172-31-1-149.117359.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d217da7ff98cf74214bb7f4e198901c083f2b6d6ec5469cdb13d9a440e8d3b0
3
+ size 4605
runs/Nov12_15-16-34_ip-172-31-1-149/events.out.tfevents.1636730406.ip-172-31-1-149.117359.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ea8e7135432a92fd91d6cf094a0ed8a08804138036ff9b1bc2be3098a81ff39
3
+ size 4233
runs/Nov12_15-24-22_ip-172-31-1-149/1636730878.4524837/events.out.tfevents.1636730878.ip-172-31-1-149.121194.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f6aabec00b31f8e6320909cdc835c478fba5534b89a698f0f132686fa9c4d82
3
+ size 4605
runs/Nov12_15-24-22_ip-172-31-1-149/events.out.tfevents.1636730878.ip-172-31-1-149.121194.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22763bdfa6e96bd42c0bb8ac197da78336bd8c05db4d33fadb3255daea0abe3b
3
+ size 4849
runs/Nov12_15-33-27_ip-172-31-1-149/1636731418.5040016/events.out.tfevents.1636731418.ip-172-31-1-149.122905.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd6f6c1a2b3a56a97e14d3a20be69956507f9429face765269b15c52737c79d1
3
+ size 4605
runs/Nov12_15-33-27_ip-172-31-1-149/events.out.tfevents.1636731418.ip-172-31-1-149.122905.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d277f5e7af974b0bd0f13dbffadd861a170247fd4c4cf3f4f0cb58bfbdee5776
3
+ size 4849
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./wav2vec2-xls-r-1b-common_voice-tr-ft", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
1
+ {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "q": 17, "r": 18, "s": 19, "t": 20, "u": 21, "v": 22, "w": 23, "x": 24, "y": 25, "z": 26, "â": 27, "ç": 28, "ë": 29, "î": 30, "ö": 31, "ü": 32, "ğ": 33, "ı": 34, "ş": 35, "̇": 36, "|": 0, "[UNK]": 37, "[PAD]": 38}