selcuktekgoz commited on
Commit
80bf2b6
1 Parent(s): 3562cbc

Upload tokenizer

Browse files
merges.txt CHANGED
@@ -49741,4 +49741,3 @@ Cihaz ın
49741
  Ġistek te
49742
  Ġkendimiz in
49743
  Su eno
49744
- Ġolm u
 
49741
  Ġistek te
49742
  Ġkendimiz in
49743
  Su eno
 
special_tokens_map.json CHANGED
@@ -7,6 +7,13 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
 
 
 
 
 
 
 
10
  "content": "<|endoftext|>",
11
  "lstrip": false,
12
  "normalized": true,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|stop|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
  "content": "<|endoftext|>",
18
  "lstrip": false,
19
  "normalized": true,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -9,14 +9,23 @@
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
 
 
 
 
 
 
 
 
12
  }
13
  },
14
  "bos_token": "<|endoftext|>",
 
15
  "clean_up_tokenization_spaces": true,
16
- "eos_token": "<|endoftext|>",
17
  "errors": "replace",
18
  "model_max_length": 1000000000000000019884624838656,
19
- "pad_token": null,
20
  "tokenizer_class": "GPT2Tokenizer",
21
  "unk_token": "<|endoftext|>"
22
  }
 
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
12
+ },
13
+ "1": {
14
+ "content": "<|stop|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
  }
21
  },
22
  "bos_token": "<|endoftext|>",
23
+ "chat_template": "{% set message_count = messages|length %}{% if message_count >= 4 and messages[-4].role == 'user' %}{% set recent_messages = messages[-4:] %}{% elif message_count > 3 %}{% set recent_messages = messages[-3:] %}{% else %}{% set recent_messages = messages %}{% endif %}{% for message in recent_messages %}{% if message.role == 'user' %}### Kullanıcı:\n{{ message.content }}\n{% elif message.role == 'assistant' %}### Asistan:\n{{ message.content }}\n{% endif %}{% endfor %}{% if messages[-1]['role'] == 'user' %}### Asistan:\n{% endif %}",
24
  "clean_up_tokenization_spaces": true,
25
+ "eos_token": "<|stop|>",
26
  "errors": "replace",
27
  "model_max_length": 1000000000000000019884624838656,
28
+ "pad_token": "<|endoftext|>",
29
  "tokenizer_class": "GPT2Tokenizer",
30
  "unk_token": "<|endoftext|>"
31
  }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff