AlexWortega commited on
Commit
2f88a56
1 Parent(s): c7106f2
Files changed (3) hide show
  1. special_tokens_map.json +28 -4
  2. tokenizer.json +29 -1
  3. tokenizer_config.json +2 -0
special_tokens_map.json CHANGED
@@ -4,8 +4,32 @@
4
  "<s>",
5
  "</s>"
6
  ],
7
- "bos_token": "<s>",
8
- "eos_token": "</s>",
9
- "pad_token": "<unk>",
10
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  }
 
4
  "<s>",
5
  "</s>"
6
  ],
7
+ "bos_token": {
8
+ "content": "<s>",
9
+ "lstrip": false,
10
+ "normalized": false,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "eos_token": {
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "pad_token": {
22
+ "content": "<unk>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ "unk_token": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ }
35
  }
tokenizer.json CHANGED
@@ -51,6 +51,12 @@
51
  "post_processor": {
52
  "type": "TemplateProcessing",
53
  "single": [
 
 
 
 
 
 
54
  {
55
  "Sequence": {
56
  "id": "A",
@@ -59,12 +65,24 @@
59
  }
60
  ],
61
  "pair": [
 
 
 
 
 
 
62
  {
63
  "Sequence": {
64
  "id": "A",
65
  "type_id": 0
66
  }
67
  },
 
 
 
 
 
 
68
  {
69
  "Sequence": {
70
  "id": "B",
@@ -72,7 +90,17 @@
72
  }
73
  }
74
  ],
75
- "special_tokens": {}
 
 
 
 
 
 
 
 
 
 
76
  },
77
  "decoder": {
78
  "type": "Sequence",
 
51
  "post_processor": {
52
  "type": "TemplateProcessing",
53
  "single": [
54
+ {
55
+ "SpecialToken": {
56
+ "id": "<s>",
57
+ "type_id": 0
58
+ }
59
+ },
60
  {
61
  "Sequence": {
62
  "id": "A",
 
65
  }
66
  ],
67
  "pair": [
68
+ {
69
+ "SpecialToken": {
70
+ "id": "<s>",
71
+ "type_id": 0
72
+ }
73
+ },
74
  {
75
  "Sequence": {
76
  "id": "A",
77
  "type_id": 0
78
  }
79
  },
80
+ {
81
+ "SpecialToken": {
82
+ "id": "<s>",
83
+ "type_id": 1
84
+ }
85
+ },
86
  {
87
  "Sequence": {
88
  "id": "B",
 
90
  }
91
  }
92
  ],
93
+ "special_tokens": {
94
+ "<s>": {
95
+ "id": "<s>",
96
+ "ids": [
97
+ 1
98
+ ],
99
+ "tokens": [
100
+ "<s>"
101
+ ]
102
+ }
103
+ }
104
  },
105
  "decoder": {
106
  "type": "Sequence",
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",