cooperleong00 commited on
Commit
bb46145
1 Parent(s): a6fcd92
special_tokens_map.json CHANGED
@@ -9,7 +9,7 @@
9
  "cls_token": {
10
  "content": "<cls>",
11
  "lstrip": false,
12
- "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
@@ -23,21 +23,21 @@
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": false,
26
- "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "pad_token": {
31
  "content": "<pad>",
32
  "lstrip": false,
33
- "normalized": false,
34
  "rstrip": false,
35
  "single_word": false
36
  },
37
  "sep_token": {
38
  "content": "<sep>",
39
  "lstrip": false,
40
- "normalized": false,
41
  "rstrip": false,
42
  "single_word": false
43
  },
 
9
  "cls_token": {
10
  "content": "<cls>",
11
  "lstrip": false,
12
+ "normalized": true,
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": false,
26
+ "normalized": true,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "pad_token": {
31
  "content": "<pad>",
32
  "lstrip": false,
33
+ "normalized": true,
34
  "rstrip": false,
35
  "single_word": false
36
  },
37
  "sep_token": {
38
  "content": "<sep>",
39
  "lstrip": false,
40
+ "normalized": true,
41
  "rstrip": false,
42
  "single_word": false
43
  },
tokenizer.json CHANGED
@@ -36,7 +36,7 @@
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
- "normalized": false,
40
  "special": true
41
  },
42
  {
@@ -45,7 +45,7 @@
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
- "normalized": false,
49
  "special": true
50
  },
51
  {
@@ -54,7 +54,7 @@
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
- "normalized": false,
58
  "special": true
59
  },
60
  {
@@ -63,7 +63,7 @@
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
66
- "normalized": false,
67
  "special": true
68
  }
69
  ],
@@ -32170,7 +32170,11 @@
32170
  "왕": 31996,
32171
  "收": 31997,
32172
  "弘": 31998,
32173
- "给": 31999
 
 
 
 
32174
  },
32175
  "merges": [
32176
  "▁ t",
 
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
+ "normalized": true,
40
  "special": true
41
  },
42
  {
 
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
+ "normalized": true,
49
  "special": true
50
  },
51
  {
 
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
+ "normalized": true,
58
  "special": true
59
  },
60
  {
 
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
66
+ "normalized": true,
67
  "special": true
68
  }
69
  ],
 
32170
  "왕": 31996,
32171
  "收": 31997,
32172
  "弘": 31998,
32173
+ "给": 31999,
32174
+ "<pad>": 32000,
32175
+ "<sep>": 32001,
32176
+ "<cls>": 32002,
32177
+ "<mask>": 32003
32178
  },
32179
  "merges": [
32180
  "▁ t",
tokenizer_config.json CHANGED
@@ -29,7 +29,7 @@
29
  "32000": {
30
  "content": "<pad>",
31
  "lstrip": false,
32
- "normalized": false,
33
  "rstrip": false,
34
  "single_word": false,
35
  "special": true
@@ -37,7 +37,7 @@
37
  "32001": {
38
  "content": "<sep>",
39
  "lstrip": false,
40
- "normalized": false,
41
  "rstrip": false,
42
  "single_word": false,
43
  "special": true
@@ -45,7 +45,7 @@
45
  "32002": {
46
  "content": "<cls>",
47
  "lstrip": false,
48
- "normalized": false,
49
  "rstrip": false,
50
  "single_word": false,
51
  "special": true
@@ -53,7 +53,7 @@
53
  "32003": {
54
  "content": "<mask>",
55
  "lstrip": false,
56
- "normalized": false,
57
  "rstrip": false,
58
  "single_word": false,
59
  "special": true
 
29
  "32000": {
30
  "content": "<pad>",
31
  "lstrip": false,
32
+ "normalized": true,
33
  "rstrip": false,
34
  "single_word": false,
35
  "special": true
 
37
  "32001": {
38
  "content": "<sep>",
39
  "lstrip": false,
40
+ "normalized": true,
41
  "rstrip": false,
42
  "single_word": false,
43
  "special": true
 
45
  "32002": {
46
  "content": "<cls>",
47
  "lstrip": false,
48
+ "normalized": true,
49
  "rstrip": false,
50
  "single_word": false,
51
  "special": true
 
53
  "32003": {
54
  "content": "<mask>",
55
  "lstrip": false,
56
+ "normalized": true,
57
  "rstrip": false,
58
  "single_word": false,
59
  "special": true