bullerwins commited on
Commit
5ec4aaa
1 Parent(s): a9bb02f

Fixed tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +54 -0
  2. tokenizer_config.json +48 -0
tokenizer.json CHANGED
@@ -29,6 +29,60 @@
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
 
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[INST]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": true,
40
+ "special": false
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "[/INST]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": true,
49
+ "special": false
50
+ },
51
+ {
52
+ "id": 10,
53
+ "content": "[IMG]",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": true,
58
+ "special": false
59
+ },
60
+ {
61
+ "id": 11,
62
+ "content": "[PREFIX]",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": true,
67
+ "special": false
68
+ },
69
+ {
70
+ "id": 12,
71
+ "content": "[MIDDLE]",
72
+ "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": true,
76
+ "special": false
77
+ },
78
+ {
79
+ "id": 13,
80
+ "content": "[SUFFIX]",
81
+ "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": true,
85
+ "special": false
86
  }
87
  ],
88
  "normalizer": {
tokenizer_config.json CHANGED
@@ -26,6 +26,54 @@
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
  },
31
  "bos_token": "<s>",
 
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
29
+ },
30
+ "3": {
31
+ "content": "[INST]",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": false
37
+ },
38
+ "4": {
39
+ "content": "[/INST]",
40
+ "lstrip": false,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": false
45
+ },
46
+ "10": {
47
+ "content": "[IMG]",
48
+ "lstrip": false,
49
+ "normalized": true,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": false
53
+ },
54
+ "11": {
55
+ "content": "[PREFIX]",
56
+ "lstrip": false,
57
+ "normalized": true,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": false
61
+ },
62
+ "12": {
63
+ "content": "[MIDDLE]",
64
+ "lstrip": false,
65
+ "normalized": true,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": false
69
+ },
70
+ "13": {
71
+ "content": "[SUFFIX]",
72
+ "lstrip": false,
73
+ "normalized": true,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": false
77
  }
78
  },
79
  "bos_token": "<s>",