sanchit-gandhi HF staff commited on
Commit
e9bf05f
1 Parent(s): a076edc

add tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +1 -1
  2. vocab.json +29 -29
tokenizer_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "bos_token": "<s>",
3
  "do_lower_case": false,
4
  "eos_token": "</s>",
5
- "name_or_path": "/tmp/tmpbt4kf3rk",
6
  "pad_token": "<pad>",
7
  "replace_word_delimiter_char": " ",
8
  "special_tokens_map_file": null,
 
2
  "bos_token": "<s>",
3
  "do_lower_case": false,
4
  "eos_token": "</s>",
5
+ "name_or_path": "/tmp/tmpi8f1bi5v",
6
  "pad_token": "<pad>",
7
  "replace_word_delimiter_char": " ",
8
  "special_tokens_map_file": null,
vocab.json CHANGED
@@ -1,36 +1,36 @@
1
  {
2
- "'": 12,
3
- "-": 8,
4
- "1": 23,
5
  "</s>": 2,
6
  "<pad>": 0,
7
  "<s>": 1,
8
  "<unk>": 3,
9
- "a": 22,
10
- "b": 20,
11
- "c": 5,
12
- "d": 25,
13
- "e": 15,
14
- "f": 29,
15
- "g": 14,
16
- "h": 24,
17
  "i": 26,
18
- "j": 33,
19
- "k": 19,
20
- "l": 7,
21
- "m": 30,
22
- "n": 13,
23
- "o": 6,
24
- "p": 10,
25
- "q": 31,
26
- "r": 9,
27
- "s": 11,
28
- "t": 4,
29
- "u": 28,
30
- "v": 16,
31
- "w": 17,
32
- "x": 32,
33
- "y": 21,
34
- "z": 18,
35
- "|": 27
36
  }
 
1
  {
2
+ "'": 29,
3
+ "-": 25,
4
+ "1": 18,
5
  "</s>": 2,
6
  "<pad>": 0,
7
  "<s>": 1,
8
  "<unk>": 3,
9
+ "a": 23,
10
+ "b": 33,
11
+ "c": 22,
12
+ "d": 21,
13
+ "e": 14,
14
+ "f": 11,
15
+ "g": 6,
16
+ "h": 9,
17
  "i": 26,
18
+ "j": 4,
19
+ "k": 15,
20
+ "l": 17,
21
+ "m": 8,
22
+ "n": 30,
23
+ "o": 20,
24
+ "p": 19,
25
+ "q": 12,
26
+ "r": 13,
27
+ "s": 10,
28
+ "t": 24,
29
+ "u": 7,
30
+ "v": 31,
31
+ "w": 32,
32
+ "x": 28,
33
+ "y": 16,
34
+ "z": 27,
35
+ "|": 5
36
  }