lIlBrother commited on
Commit
c73317b
1 Parent(s): 86185c1

Upload 4 files

Browse files
alphabet.json ADDED
@@ -0,0 +1 @@
 
1
+ {"labels": ["", "\u2047", "<s>", "</s>", " ", "\u110b", "\u1161", "\u1175", "\u1100", "\u1173", "\u11ab", "\u1165", "\u1169", "\u110c", "\u1103", "\u1109", "\u11af", "\u1102", "\u1105", "\u1112", "\u1166", "\u116e", "\u11bc", "\u1106", "\u1162", "\u1167", "\u11a8", "\u1107", "\u11b7", "\u116d", "\u11bb", "\u11b8", "\u110e", "\u116a", "\u1104", "\u116c", "\u1101", "\u11ba", "\u1174", "\u1110", "\u11c2", "\u116f", "\u1163", "\u1111", "\u1168", "\u11ad", "\u1172", "\u110f", "\u11c0", "\u110a", "\u1171", "0", "1", "\u110d", "\u116b", "\u11b9", "\u11bd", "\u11c1", "2", "\u11ae", "\u1164", "\u1108", "3", "5", "E", "4", "T", "6", "A", "I", "O", "S", "N", "\u11be", "9", "\u11a9", "R", "8", "C", "\u11b0", "M", "7", "P", "D", "L", "\u11b1", "%", "B", "\u11b6", "G", "U", "F", "\u11b2", "V", "H", "K", "\u1170", "\u11ac", "W", "Y", "X", "Q", "J", "Z", "\u11aa", "\u11bf", "\u11b4", "'", "\u11b5", "\u11b3", "\u00b0"], "is_bpe": false}
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "do_lower_case": false,
4
+ "eos_token": "</s>",
5
+ "name_or_path": "/data_raid0/TADEV_BIG_DATA/ASR/STT/model/fine-tuning/spelling/wav2vec2-base-4data",
6
+ "pad_token": "<pad>",
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "replace_word_delimiter_char": " ",
9
+ "special_tokens_map_file": "/data/asr_proj/stt/bart_branch/output_dir/grapheme/special_tokens_map.json",
10
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
11
+ "unk_token": "<unk>",
12
+ "word_delimiter_token": "|"
13
+ }