anyiwang commited on
Commit
8dbd840
1 Parent(s): a123a01

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +33 -1
  2. tokenizer_config.json +1 -1
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
@@ -62,6 +67,12 @@
62
  "id": "A",
63
  "type_id": 0
64
  }
 
 
 
 
 
 
65
  }
66
  ],
67
  "pair": [
@@ -77,6 +88,12 @@
77
  "type_id": 0
78
  }
79
  },
 
 
 
 
 
 
80
  {
81
  "SpecialToken": {
82
  "id": "<s>",
@@ -88,9 +105,24 @@
88
  "id": "B",
89
  "type_id": 1
90
  }
 
 
 
 
 
 
91
  }
92
  ],
93
  "special_tokens": {
 
 
 
 
 
 
 
 
 
94
  "<s>": {
95
  "id": "<s>",
96
  "ids": [
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 1024,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
67
  "id": "A",
68
  "type_id": 0
69
  }
70
+ },
71
+ {
72
+ "SpecialToken": {
73
+ "id": "</s>",
74
+ "type_id": 0
75
+ }
76
  }
77
  ],
78
  "pair": [
 
88
  "type_id": 0
89
  }
90
  },
91
+ {
92
+ "SpecialToken": {
93
+ "id": "</s>",
94
+ "type_id": 0
95
+ }
96
+ },
97
  {
98
  "SpecialToken": {
99
  "id": "<s>",
 
105
  "id": "B",
106
  "type_id": 1
107
  }
108
+ },
109
+ {
110
+ "SpecialToken": {
111
+ "id": "</s>",
112
+ "type_id": 1
113
+ }
114
  }
115
  ],
116
  "special_tokens": {
117
+ "</s>": {
118
+ "id": "</s>",
119
+ "ids": [
120
+ 2
121
+ ],
122
+ "tokens": [
123
+ "</s>"
124
+ ]
125
+ },
126
  "<s>": {
127
  "id": "<s>",
128
  "ids": [
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
- "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
1
  {
2
  "add_bos_token": true,
3
+ "add_eos_token": true,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",