pere commited on
Commit
9183420
1 Parent(s): 51c737f

Training in progress, step 500

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 45, "</s>": 46}
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "add_adapter": false,
7
  "apply_spec_augment": true,
8
  "architectures": [
9
- "Wav2Vec2ForPreTraining"
10
  ],
11
  "attention_dropout": 0.094,
12
  "bos_token_id": 1,
@@ -76,7 +76,7 @@
76
  "num_hidden_layers": 48,
77
  "num_negatives": 100,
78
  "output_hidden_size": 1280,
79
- "pad_token_id": 51,
80
  "proj_codevector_dim": 1024,
81
  "tdnn_dilation": [
82
  1,
@@ -102,6 +102,6 @@
102
  "torch_dtype": "float32",
103
  "transformers_version": "4.17.0.dev0",
104
  "use_weighted_layer_sum": false,
105
- "vocab_size": 54,
106
  "xvector_output_dim": 512
107
  }
 
6
  "add_adapter": false,
7
  "apply_spec_augment": true,
8
  "architectures": [
9
+ "Wav2Vec2ForCTC"
10
  ],
11
  "attention_dropout": 0.094,
12
  "bos_token_id": 1,
 
76
  "num_hidden_layers": 48,
77
  "num_negatives": 100,
78
  "output_hidden_size": 1280,
79
+ "pad_token_id": 44,
80
  "proj_codevector_dim": 1024,
81
  "tdnn_dilation": [
82
  1,
 
102
  "torch_dtype": "float32",
103
  "transformers_version": "4.17.0.dev0",
104
  "use_weighted_layer_sum": false,
105
+ "vocab_size": 47,
106
  "xvector_output_dim": 512
107
  }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d7c805c06a74796efe316238c224e293665549d19fa4135249aa5af42f602c
3
+ size 3850553521
run.sh CHANGED
@@ -1,13 +1,13 @@
1
  python run_speech_recognition_ctc.py \
2
  --dataset_name="NbAiLab/NPSC" \
3
  --model_name_or_path="facebook/wav2vec2-xls-r-1b" \
4
- --hub_model_id="NbAiLab/wav2vec2-xlsr-1B-NPSC-NN-OH" \
5
  --dataset_config_name="16K_mp3" \
6
  --output_dir="./" \
7
  --overwrite_output_dir \
8
  --num_train_epochs="50" \
9
- --per_device_train_batch_size="16" \
10
- --per_device_eval_batch_size="16" \
11
  --gradient_accumulation_steps="2" \
12
  --learning_rate="6e-5" \
13
  --warmup_steps="2000" \
 
1
  python run_speech_recognition_ctc.py \
2
  --dataset_name="NbAiLab/NPSC" \
3
  --model_name_or_path="facebook/wav2vec2-xls-r-1b" \
4
+ --hub_model_id="NbAiLab/wav2vec2-xlsr-1B-NPSC-NN" \
5
  --dataset_config_name="16K_mp3" \
6
  --output_dir="./" \
7
  --overwrite_output_dir \
8
  --num_train_epochs="50" \
9
+ --per_device_train_batch_size="8" \
10
+ --per_device_eval_batch_size="8" \
11
  --gradient_accumulation_steps="2" \
12
  --learning_rate="6e-5" \
13
  --warmup_steps="2000" \
runs/Feb03_23-58-44_job-c93f32d8-97c5-48e7-b5ec-c6c950f627ba/1643932752.7292159/events.out.tfevents.1643932752.job-c93f32d8-97c5-48e7-b5ec-c6c950f627ba.3705903.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:664b412fcf3743a608fe8606e4274b27263b1bd005a97911789e4f2e61ca01d6
3
+ size 4781
runs/Feb03_23-58-44_job-c93f32d8-97c5-48e7-b5ec-c6c950f627ba/events.out.tfevents.1643932752.job-c93f32d8-97c5-48e7-b5ec-c6c950f627ba.3705903.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:445d5b20ca2f3076cdfdf78799449aa82779f181eff7cc34a8b33b73b0f9e228
3
+ size 4721
runs/Feb04_05-15-53_job-c93f32d8-97c5-48e7-b5ec-c6c950f627ba/1643951801.7137828/events.out.tfevents.1643951801.job-c93f32d8-97c5-48e7-b5ec-c6c950f627ba.3825650.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42b8d4eddf804d7de58a16311b8b28a916c96201117ddca175401c4312eb0827
3
+ size 4781
runs/Feb04_05-15-53_job-c93f32d8-97c5-48e7-b5ec-c6c950f627ba/events.out.tfevents.1643951801.job-c93f32d8-97c5-48e7-b5ec-c6c950f627ba.3825650.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98d001b226ad34ddb4c46cae4170fda46f52eb19bd5f87ef6fb63663b873d884
3
+ size 5819
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0abaa17e1c688234049e636981023a7e78d73306acdf189dd3aeaafa5e82e883
3
+ size 3055
vocab.json CHANGED
@@ -1 +1 @@
1
- {"-": 1, ".": 2, "/": 3, "0": 4, "1": 5, "2": 6, "4": 7, "6": 8, "7": 9, "8": 10, "9": 11, "_": 12, "a": 13, "b": 14, "c": 15, "d": 16, "e": 17, "f": 18, "g": 19, "h": 20, "i": 21, "j": 22, "k": 23, "l": 24, "m": 25, "n": 26, "o": 27, "p": 28, "q": 29, "r": 30, "s": 31, "t": 32, "u": 33, "v": 34, "w": 35, "x": 36, "y": 37, "z": 38, "\u00e5": 39, "\u00e6": 40, "\u00f8": 41, "\u2013": 42, "|": 0, "[UNK]": 43, "[PAD]": 44}
 
1
+ {"-": 1, ".": 2, "/": 3, "0": 4, "1": 5, "2": 6, "4": 7, "6": 8, "7": 9, "8": 10, "9": 11, "_": 12, "a": 13, "b": 14, "c": 15, "d": 16, "e": 17, "f": 18, "g": 19, "h": 20, "i": 21, "j": 22, "k": 23, "l": 24, "m": 25, "n": 26, "o": 27, "p": 28, "q": 29, "r": 30, "s": 31, "t": 32, "u": 33, "v": 34, "w": 35, "x": 36, "y": 37, "z": 38, "å": 39, "æ": 40, "ø": 41, "": 42, "|": 0, "[UNK]": 43, "[PAD]": 44}