paruwka commited on
Commit
840f698
1 Parent(s): b393e07

Upload tokenizer

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. added_tokens.json +2 -2
  3. tokenizer_config.json +4 -4
  4. vocab.json +68 -65
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  license: apache-2.0
3
- base_model: paruwka/wav2vec2-base-timit-demo-google-colab
4
  tags:
5
  - generated_from_trainer
 
6
  metrics:
7
  - wer
8
  model-index:
 
1
  ---
2
  license: apache-2.0
 
3
  tags:
4
  - generated_from_trainer
5
+ base_model: paruwka/wav2vec2-base-timit-demo-google-colab
6
  metrics:
7
  - wer
8
  model-index:
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 66,
3
- "<s>": 65
4
  }
 
1
  {
2
+ "</s>": 69,
3
+ "<s>": 68
4
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "63": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": false
10
  },
11
- "64": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": false
18
  },
19
- "65": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "66": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
 
1
  {
2
  "added_tokens_decoder": {
3
+ "66": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
 
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "67": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
 
16
  "single_word": false,
17
  "special": false
18
  },
19
+ "68": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "69": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
vocab.json CHANGED
@@ -1,67 +1,70 @@
1
  {
2
- "&": 42,
3
- "'": 4,
4
- "(": 32,
5
- ")": 40,
6
- "=": 19,
7
- "[PAD]": 64,
8
- "[UNK]": 63,
9
- "a": 28,
10
- "b": 18,
11
- "c": 49,
12
- "d": 54,
13
- "e": 60,
14
- "f": 36,
15
- "g": 58,
16
- "h": 11,
17
- "i": 31,
18
- "j": 2,
19
- "k": 6,
20
- "l": 14,
21
- "m": 41,
22
- "n": 22,
23
- "o": 30,
24
- "p": 35,
25
- "q": 56,
26
- "r": 53,
27
- "s": 39,
28
- "t": 43,
29
- "u": 24,
30
- "v": 20,
31
- "w": 0,
32
- "x": 50,
33
- "y": 46,
34
- "z": 55,
35
- "|": 23,
36
- "«": 33,
37
- "»": 37,
38
- "ß": 12,
39
- "à": 44,
40
- "á": 27,
41
- "â": 61,
42
- "ç": 62,
43
- "è": 5,
44
- "é": 17,
45
- "ê": 26,
46
- "ë": 34,
47
- "î": 59,
48
- "ï": 25,
49
- "ó": 45,
50
- "ô": 13,
51
- "ö": 1,
52
- "ù": 52,
53
- "ú": 57,
54
- "û": 48,
55
- "ü": 9,
56
- "ć": 47,
57
- "ō": 15,
58
- "œ": 21,
59
- "ū": 16,
60
- "̀": 8,
61
- "́": 10,
62
- "̂": 7,
63
- "̧": 51,
64
- "": 3,
65
- "": 29,
66
- "": 38
 
 
 
67
  }
 
1
  {
2
+ "&": 28,
3
+ "'": 3,
4
+ "(": 13,
5
+ ")": 49,
6
+ "=": 54,
7
+ "[PAD]": 67,
8
+ "[UNK]": 66,
9
+ "a": 38,
10
+ "b": 8,
11
+ "c": 43,
12
+ "d": 21,
13
+ "e": 62,
14
+ "f": 57,
15
+ "g": 52,
16
+ "h": 44,
17
+ "i": 39,
18
+ "j": 60,
19
+ "k": 48,
20
+ "l": 19,
21
+ "m": 26,
22
+ "n": 15,
23
+ "o": 11,
24
+ "p": 17,
25
+ "q": 23,
26
+ "r": 14,
27
+ "s": 63,
28
+ "t": 2,
29
+ "u": 41,
30
+ "v": 30,
31
+ "w": 51,
32
+ "x": 20,
33
+ "y": 36,
34
+ "z": 18,
35
+ "|": 42,
36
+ "«": 46,
37
+ "»": 29,
38
+ "ß": 53,
39
+ "à": 59,
40
+ "á": 32,
41
+ "â": 55,
42
+ "ç": 24,
43
+ "è": 27,
44
+ "é": 16,
45
+ "ê": 6,
46
+ "ë": 5,
47
+ "î": 7,
48
+ "ï": 45,
49
+ "ó": 56,
50
+ "ô": 33,
51
+ "ö": 61,
52
+ "ø": 40,
53
+ "ù": 58,
54
+ "ú": 9,
55
+ "û": 47,
56
+ "ü": 34,
57
+ "ć": 4,
58
+ "ē": 0,
59
+ "ō": 22,
60
+ "œ": 10,
61
+ "š": 31,
62
+ "ū": 37,
63
+ "̀": 65,
64
+ "́": 35,
65
+ "̂": 12,
66
+ "̧": 64,
67
+ "—": 25,
68
+ "’": 50,
69
+ "…": 1
70
  }