diff --git a/config.json b/config.json index e2b341d..c6e8001 100644 --- a/config.json +++ b/config.json @@ -6,7 +6,7 @@ "add_adapter": false, "apply_spec_augment": true, "architectures": [ - "Wav2Vec2ForCTC" + "Wav2Vec2ForPreTraining" ], "attention_dropout": 0.0, "bos_token_id": 1, diff --git a/run.sh b/run.sh index f854ea3..24b2efe 100755 --- a/run.sh +++ b/run.sh @@ -4,6 +4,7 @@ python run_speech_recognition_ctc.py \ --dataset_config_name="48K_mp3" \ --output_dir="./" \ --overwrite_output_dir \ + --hub_model_id="NbAiLab/xls-npsc" \ --num_train_epochs="10" \ --per_device_train_batch_size="8" \ --per_device_eval_batch_size="8" \ @@ -31,4 +32,6 @@ python run_speech_recognition_ctc.py \ --fp16 \ --group_by_length \ --do_train --do_eval \ + --max_train_samples="1000" \ + --max_eval_samples="100" \ --push_to_hub diff --git a/special_tokens_map.json b/special_tokens_map.json index 623bcb0..59f35a2 100644 --- a/special_tokens_map.json +++ b/special_tokens_map.json @@ -1 +1 @@ -{"bos_token": "", "eos_token": "", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]} \ No newline at end of file +{"bos_token": "", "eos_token": "", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]} \ No newline at end of file diff --git a/vocab.json b/vocab.json index 6328bc8..5211c51 100644 --- a/vocab.json +++ b/vocab.json @@ -1 +1 @@ -{"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "q": 17, "r": 18, "s": 19, "t": 20, "u": 21, "v": 22, "w": 23, "x": 24, "y": 25, "z": 26, "ä": 27, "å": 28, "é": 29, "ô": 30, "ö": 31, "ü": 32, "|": 0, "[UNK]": 33, "[PAD]": 34} \ No newline at end of file +{"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "q": 17, "r": 18, "s": 19, "t": 20, "u": 21, "v": 22, "w": 23, "x": 24, "y": 25, "z": 26, "å": 27, "æ": 28, "è": 29, "ò": 30, "ô": 31, "ø": 32, "|": 0, "[UNK]": 33, "[PAD]": 34} \ No newline at end of file