faisaltareque commited on
Commit
f9b6141
1 Parent(s): e3d89d4

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +8 -0
  2. tokenizer.json +54 -0
special_tokens_map.json CHANGED
@@ -1,4 +1,12 @@
1
  {
 
 
 
 
 
 
 
 
2
  "bos_token": "<s>",
3
  "cls_token": "<cls>",
4
  "eos_token": "</s>",
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<url>",
4
+ "<email>",
5
+ "<number>",
6
+ "<digit>",
7
+ "<punc>",
8
+ "<foreign>"
9
+ ],
10
  "bos_token": "<s>",
11
  "cls_token": "<cls>",
12
  "eos_token": "</s>",
tokenizer.json CHANGED
@@ -65,6 +65,60 @@
65
  "rstrip": false,
66
  "normalized": false,
67
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  }
69
  ],
70
  "normalizer": null,
 
65
  "rstrip": false,
66
  "normalized": false,
67
  "special": true
68
+ },
69
+ {
70
+ "id": 32000,
71
+ "content": "<url>",
72
+ "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": false,
76
+ "special": true
77
+ },
78
+ {
79
+ "id": 32001,
80
+ "content": "<email>",
81
+ "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": false,
85
+ "special": true
86
+ },
87
+ {
88
+ "id": 32002,
89
+ "content": "<number>",
90
+ "single_word": false,
91
+ "lstrip": false,
92
+ "rstrip": false,
93
+ "normalized": false,
94
+ "special": true
95
+ },
96
+ {
97
+ "id": 32003,
98
+ "content": "<digit>",
99
+ "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
+ },
105
+ {
106
+ "id": 32004,
107
+ "content": "<punc>",
108
+ "single_word": false,
109
+ "lstrip": false,
110
+ "rstrip": false,
111
+ "normalized": false,
112
+ "special": true
113
+ },
114
+ {
115
+ "id": 32005,
116
+ "content": "<foreign>",
117
+ "single_word": false,
118
+ "lstrip": false,
119
+ "rstrip": false,
120
+ "normalized": false,
121
+ "special": true
122
  }
123
  ],
124
  "normalizer": null,