lIlBrother commited on
Commit
9876cd0
1 Parent(s): 1fad393

Init: Model config

Browse files
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": ["", "\u2047", "<s>", "</s>", " ", "\u1171", "\u11b4", "\u1165", "\u11ae", "\u110c", "\u116a", "\u110e", "\u11b3", "\u11bf", "\u116b", "\u11c1", "\u1163", "\u11aa", "\u110d", "\u1173", "\u11ba", "\u1169", "\u1174", "\u1112", "\u11c2", "\u11ab", "\u11b5", "\u1167", "\u11b6", "\u1168", "\u1161", "\u11ad", "\u1170", "\u11bd", "\u11b8", "\u11b1", "\u1109", "\u11bb", "\u11af", "\u116d", "\u1103", "\u11a9", "\u1175", "\u1101", "\u1111", "\u1162", "\u1110", "\u1164", "\u1108", "\u116e", "\u1104", "\u1102", "\u116f", "\u110a", "\u1105", "\u11b7", "\u1106", "\u11b9", "\u116c", "\u1100", "\u11ac", "\u1107", "\u1166", "\u11b0", "\u11bc", "\u11b2", "\u11be", "\u110b", "\u11c0", "\u11a8", "\u110f", "\u1172"], "is_bpe": false}
language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": false}
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "do_lower_case": false,
4
+ "eos_token": "</s>",
5
+ "name_or_path": "/data_raid0/TADEV_BIG_DATA/ASR/STT/model/fine-tuning/42maru/wav2vec2-base-4data",
6
+ "pad_token": "<pad>",
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "replace_word_delimiter_char": " ",
9
+ "special_tokens_map_file": "/DATA01/bart/workspace/stt/output_dir/special_tokens_map.json",
10
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
11
+ "unk_token": "<unk>",
12
+ "word_delimiter_token": "|"
13
+ }
vocab.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<pad>": 0,
3
+ "<unk>": 1,
4
+ "<s>": 2,
5
+ "</s>": 3,
6
+ "|": 4,
7
+ "\u1171": 5,
8
+ "\u11b4": 6,
9
+ "\u1165": 7,
10
+ "\u11ae": 8,
11
+ "\u110c": 9,
12
+ "\u116a": 10,
13
+ "\u110e": 11,
14
+ "\u11b3": 12,
15
+ "\u11bf": 13,
16
+ "\u116b": 14,
17
+ "\u11c1": 15,
18
+ "\u1163": 16,
19
+ "\u11aa": 17,
20
+ "\u110d": 18,
21
+ "\u1173": 19,
22
+ "\u11ba": 20,
23
+ "\u1169": 21,
24
+ "\u1174": 22,
25
+ "\u1112": 23,
26
+ "\u11c2": 24,
27
+ "\u11ab": 25,
28
+ "\u11b5": 26,
29
+ "\u1167": 27,
30
+ "\u11b6": 28,
31
+ "\u1168": 29,
32
+ "\u1161": 30,
33
+ "\u11ad": 31,
34
+ "\u1170": 32,
35
+ "\u11bd": 33,
36
+ "\u11b8": 34,
37
+ "\u11b1": 35,
38
+ "\u1109": 36,
39
+ "\u11bb": 37,
40
+ "\u11af": 38,
41
+ "\u116d": 39,
42
+ "\u1103": 40,
43
+ "\u11a9": 41,
44
+ "\u1175": 42,
45
+ "\u1101": 43,
46
+ "\u1111": 44,
47
+ "\u1162": 45,
48
+ "\u1110": 46,
49
+ "\u1164": 47,
50
+ "\u1108": 48,
51
+ "\u116e": 49,
52
+ "\u1104": 50,
53
+ "\u1102": 51,
54
+ "\u116f": 52,
55
+ "\u110a": 53,
56
+ "\u1105": 54,
57
+ "\u11b7": 55,
58
+ "\u1106": 56,
59
+ "\u11b9": 57,
60
+ "\u116c": 58,
61
+ "\u1100": 59,
62
+ "\u11ac": 60,
63
+ "\u1107": 61,
64
+ "\u1166": 62,
65
+ "\u11b0": 63,
66
+ "\u11bc": 64,
67
+ "\u11b2": 65,
68
+ "\u11be": 66,
69
+ "\u110b": 67,
70
+ "\u11c0": 68,
71
+ "\u11a8": 69,
72
+ "\u110f": 70,
73
+ "\u1172": 71
74
+ }