colesimmons commited on
Commit
75fadd9
1 Parent(s): 682b103

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +5 -12
  2. tokenizer_config.json +3 -4
special_tokens_map.json CHANGED
@@ -1,42 +1,35 @@
1
  {
2
  "additional_special_tokens": [
3
  {
4
- "content": "<RULING>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "<BLANK_SPACE>",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
16
  },
17
  {
18
- "content": "<unk>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
  {
25
- "content": "\n",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
29
  "single_word": false
30
  },
31
  {
32
- "content": "<COLUMN>",
33
  "lstrip": false,
34
  "normalized": false,
35
  "rstrip": false,
36
  "single_word": false
37
  },
38
  {
39
- "content": "<SURFACE>",
40
  "lstrip": false,
41
  "normalized": false,
42
  "rstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
  {
4
+ "content": "<SURFACE>",
 
 
 
 
 
 
 
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
+ "content": "<COLUMN>",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
16
  },
17
  {
18
+ "content": "<BLANK_SPACE>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
  {
25
+ "content": "<RULING>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
29
  "single_word": false
30
  },
31
  {
32
+ "content": "\n",
33
  "lstrip": false,
34
  "normalized": false,
35
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -90,12 +90,11 @@
90
  }
91
  },
92
  "additional_special_tokens": [
93
- "<RULING>",
 
94
  "<BLANK_SPACE>",
95
- "<unk>",
96
  "\n",
97
- "<COLUMN>",
98
- "<SURFACE>",
99
  "..."
100
  ],
101
  "bos_token": "<s>",
 
90
  }
91
  },
92
  "additional_special_tokens": [
93
+ "<SURFACE>",
94
+ "<COLUMN>",
95
  "<BLANK_SPACE>",
96
+ "<RULING>",
97
  "\n",
 
 
98
  "..."
99
  ],
100
  "bos_token": "<s>",