Text Generation
Transformers
Safetensors
Finnish
llama
finnish
conversational
text-generation-inference
aapot commited on
Commit
615b2a3
1 Parent(s): ce7ff6d

fix autotokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +20 -9
  2. tokenizer_config.json +5 -5
tokenizer.json CHANGED
@@ -37,7 +37,7 @@
37
  "lstrip": false,
38
  "rstrip": false,
39
  "normalized": false,
40
- "special": false
41
  },
42
  {
43
  "id": 4,
@@ -46,7 +46,7 @@
46
  "lstrip": false,
47
  "rstrip": false,
48
  "normalized": false,
49
- "special": false
50
  },
51
  {
52
  "id": 5,
@@ -55,7 +55,7 @@
55
  "lstrip": false,
56
  "rstrip": false,
57
  "normalized": false,
58
- "special": false
59
  },
60
  {
61
  "id": 6,
@@ -64,15 +64,26 @@
64
  "lstrip": false,
65
  "rstrip": false,
66
  "normalized": false,
67
- "special": false
68
  }
69
  ],
70
- "normalizer": null,
 
 
 
 
 
 
71
  "pre_tokenizer": {
72
- "type": "Metaspace",
73
- "replacement": "▁",
74
- "prepend_scheme": "first",
75
- "split": false
 
 
 
 
 
76
  },
77
  "post_processor": {
78
  "type": "TemplateProcessing",
 
37
  "lstrip": false,
38
  "rstrip": false,
39
  "normalized": false,
40
+ "special": true
41
  },
42
  {
43
  "id": 4,
 
46
  "lstrip": false,
47
  "rstrip": false,
48
  "normalized": false,
49
+ "special": true
50
  },
51
  {
52
  "id": 5,
 
55
  "lstrip": false,
56
  "rstrip": false,
57
  "normalized": false,
58
+ "special": true
59
  },
60
  {
61
  "id": 6,
 
64
  "lstrip": false,
65
  "rstrip": false,
66
  "normalized": false,
67
+ "special": true
68
  }
69
  ],
70
+ "normalizer": {
71
+ "type": "Replace",
72
+ "pattern": {
73
+ "Regex": " {2,}"
74
+ },
75
+ "content": "▁"
76
+ },
77
  "pre_tokenizer": {
78
+ "type": "Sequence",
79
+ "pretokenizers": [
80
+ {
81
+ "type": "Metaspace",
82
+ "replacement": "▁",
83
+ "prepend_scheme": "first",
84
+ "split": false
85
+ }
86
+ ]
87
  },
88
  "post_processor": {
89
  "type": "TemplateProcessing",
tokenizer_config.json CHANGED
@@ -33,7 +33,7 @@
33
  "normalized": false,
34
  "rstrip": false,
35
  "single_word": false,
36
- "special": false
37
  },
38
  "4": {
39
  "content": "[/INST]",
@@ -41,7 +41,7 @@
41
  "normalized": false,
42
  "rstrip": false,
43
  "single_word": false,
44
- "special": false
45
  },
46
  "5": {
47
  "content": "<<SYS>>",
@@ -49,7 +49,7 @@
49
  "normalized": false,
50
  "rstrip": false,
51
  "single_word": false,
52
- "special": false
53
  },
54
  "6": {
55
  "content": "<</SYS>>",
@@ -57,7 +57,7 @@
57
  "normalized": false,
58
  "rstrip": false,
59
  "single_word": false,
60
- "special": false
61
  }
62
  },
63
  "bos_token": "<s>",
@@ -69,7 +69,7 @@
69
  "pad_token": null,
70
  "sp_model_kwargs": {},
71
  "spaces_between_special_tokens": false,
72
- "tokenizer_class": "LlamaTokenizer",
73
  "unk_token": "<unk>",
74
  "use_default_system_prompt": false
75
  }
 
33
  "normalized": false,
34
  "rstrip": false,
35
  "single_word": false,
36
+ "special": true
37
  },
38
  "4": {
39
  "content": "[/INST]",
 
41
  "normalized": false,
42
  "rstrip": false,
43
  "single_word": false,
44
+ "special": true
45
  },
46
  "5": {
47
  "content": "<<SYS>>",
 
49
  "normalized": false,
50
  "rstrip": false,
51
  "single_word": false,
52
+ "special": true
53
  },
54
  "6": {
55
  "content": "<</SYS>>",
 
57
  "normalized": false,
58
  "rstrip": false,
59
  "single_word": false,
60
+ "special": true
61
  }
62
  },
63
  "bos_token": "<s>",
 
69
  "pad_token": null,
70
  "sp_model_kwargs": {},
71
  "spaces_between_special_tokens": false,
72
+ "tokenizer_class": "PreTrainedTokenizerFast",
73
  "unk_token": "<unk>",
74
  "use_default_system_prompt": false
75
  }