interstellarninja commited on
Commit
89012d4
1 Parent(s): 5526113

Training in progress, step 100

Browse files
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/home/interstellarninja/ai_projects/axolotl/stablelm-1_6b-tool-calling-1/merged",
5
+ "bias": "none",
6
+ "fan_in_fan_out": null,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 16,
13
+ "lora_dropout": 0.05,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": [
17
+ "embed_tokens",
18
+ "lm_head"
19
+ ],
20
+ "peft_type": "LORA",
21
+ "r": 16,
22
+ "rank_pattern": {},
23
+ "revision": null,
24
+ "target_modules": [
25
+ "v_proj",
26
+ "down_proj",
27
+ "up_proj",
28
+ "o_proj",
29
+ "k_proj",
30
+ "q_proj",
31
+ "gate_proj"
32
+ ],
33
+ "task_type": "CAUSAL_LM"
34
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dac13d8ea70c951f34d886785caa5ad1c9454ed7379c6b592324dea15ecf3932
3
+ size 852407184
arcade100k.tiktoken ADDED
The diff for this file is too large to render. See raw diff
 
config.json CHANGED
@@ -1,31 +1,45 @@
1
  {
2
- "_name_or_path": "/home/interstellarninja/ai_projects/axolotl/stablelm-func-calling-3/merged",
3
  "architectures": [
4
  "StableLMEpochForCausalLM"
5
  ],
 
6
  "auto_map": {
7
  "AutoConfig": "configuration_stablelm_epoch.StableLMEpochConfig",
8
  "AutoModelForCausalLM": "modeling_stablelm_epoch.StableLMEpochForCausalLM"
9
  },
10
- "bos_token_id": 0,
11
- "eos_token_id": 0,
12
  "hidden_act": "silu",
13
- "hidden_size": 2560,
14
  "initializer_range": 0.02,
15
- "intermediate_size": 6912,
16
  "max_position_embeddings": 4096,
17
  "model_type": "stablelm_epoch",
18
  "norm_eps": 1e-05,
19
  "num_attention_heads": 32,
20
  "num_heads": 32,
21
- "num_hidden_layers": 32,
22
  "num_key_value_heads": 32,
 
 
 
 
 
 
 
 
 
 
 
 
23
  "rope_pct": 0.25,
24
  "rope_theta": 10000,
25
  "rotary_scaling_factor": 1.0,
26
  "tie_word_embeddings": false,
27
- "torch_dtype": "float16",
28
  "transformers_version": "4.37.0.dev0",
29
  "use_cache": false,
30
- "vocab_size": 50304
 
31
  }
 
1
  {
2
+ "_name_or_path": "/home/interstellarninja/ai_projects/axolotl/stablelm-1_6b-tool-calling-1/merged",
3
  "architectures": [
4
  "StableLMEpochForCausalLM"
5
  ],
6
+ "attention_dropout": 0.0,
7
  "auto_map": {
8
  "AutoConfig": "configuration_stablelm_epoch.StableLMEpochConfig",
9
  "AutoModelForCausalLM": "modeling_stablelm_epoch.StableLMEpochForCausalLM"
10
  },
11
+ "bos_token_id": 100257,
12
+ "eos_token_id": 100257,
13
  "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
  "initializer_range": 0.02,
16
+ "intermediate_size": 5632,
17
  "max_position_embeddings": 4096,
18
  "model_type": "stablelm_epoch",
19
  "norm_eps": 1e-05,
20
  "num_attention_heads": 32,
21
  "num_heads": 32,
22
+ "num_hidden_layers": 24,
23
  "num_key_value_heads": 32,
24
+ "quantization_config": {
25
+ "bnb_4bit_compute_dtype": "bfloat16",
26
+ "bnb_4bit_quant_type": "nf4",
27
+ "bnb_4bit_use_double_quant": true,
28
+ "llm_int8_enable_fp32_cpu_offload": false,
29
+ "llm_int8_has_fp16_weight": false,
30
+ "llm_int8_skip_modules": null,
31
+ "llm_int8_threshold": 6.0,
32
+ "load_in_4bit": true,
33
+ "load_in_8bit": false,
34
+ "quant_method": "bitsandbytes"
35
+ },
36
  "rope_pct": 0.25,
37
  "rope_theta": 10000,
38
  "rotary_scaling_factor": 1.0,
39
  "tie_word_embeddings": false,
40
+ "torch_dtype": "bfloat16",
41
  "transformers_version": "4.37.0.dev0",
42
  "use_cache": false,
43
+ "use_qkv_bias": true,
44
+ "vocab_size": 100352
45
  }
configuration_stablelm_epoch.py CHANGED
@@ -1,4 +1,3 @@
1
- # coding=utf-8
2
  # Copyright 2023 Stability and The HuggingFace Inc. team. All rights reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -62,8 +61,12 @@ class StableLMEpochConfig(PretrainedConfig):
62
  use_cache (`bool`, *optional*, defaults to `True`):
63
  Whether or not the model should return the last key/values attentions
64
  (not used by all models). Only relevant if `config.is_decoder=True`.
 
 
65
  tie_word_embeddings(`bool`, *optional*, defaults to `False`):
66
  Whether to tie weight embeddings
 
 
67
  """
68
  model_type = "stablelm_epoch"
69
  keys_to_ignore_at_inference = ["past_key_values"]
@@ -83,9 +86,11 @@ class StableLMEpochConfig(PretrainedConfig):
83
  initializer_range=0.02,
84
  norm_eps=1.0e-5,
85
  use_cache=True,
 
86
  bos_token_id=0,
87
  eos_token_id=2,
88
  tie_word_embeddings=False,
 
89
  **kwargs,
90
  ):
91
  self.vocab_size = vocab_size
@@ -101,7 +106,9 @@ class StableLMEpochConfig(PretrainedConfig):
101
  self.initializer_range = initializer_range
102
  self.norm_eps = norm_eps
103
  self.use_cache = use_cache
 
104
  self.tie_word_embeddings = tie_word_embeddings
 
105
  super().__init__(
106
  bos_token_id=bos_token_id,
107
  eos_token_id=eos_token_id,
 
 
1
  # Copyright 2023 Stability and The HuggingFace Inc. team. All rights reserved.
2
  #
3
  # Licensed under the Apache License, Version 2.0 (the "License");
 
61
  use_cache (`bool`, *optional*, defaults to `True`):
62
  Whether or not the model should return the last key/values attentions
63
  (not used by all models). Only relevant if `config.is_decoder=True`.
64
+ use_qkv_bias (`bool`, *optional*, defaults to `True`):
65
+ Whether or not the model should use bias for qkv layers.
66
  tie_word_embeddings(`bool`, *optional*, defaults to `False`):
67
  Whether to tie weight embeddings
68
+ attention_dropout (`float`, *optional*, defaults to 0.0):
69
+ The dropout ratio for the attention probabilities.
70
  """
71
  model_type = "stablelm_epoch"
72
  keys_to_ignore_at_inference = ["past_key_values"]
 
86
  initializer_range=0.02,
87
  norm_eps=1.0e-5,
88
  use_cache=True,
89
+ use_qkv_bias=True,
90
  bos_token_id=0,
91
  eos_token_id=2,
92
  tie_word_embeddings=False,
93
+ attention_dropout: float = 0.0,
94
  **kwargs,
95
  ):
96
  self.vocab_size = vocab_size
 
106
  self.initializer_range = initializer_range
107
  self.norm_eps = norm_eps
108
  self.use_cache = use_cache
109
+ self.use_qkv_bias = use_qkv_bias
110
  self.tie_word_embeddings = tie_word_embeddings
111
+ self.attention_dropout = attention_dropout
112
  super().__init__(
113
  bos_token_id=bos_token_id,
114
  eos_token_id=eos_token_id,
runs/Jan24_03-21-57_WIN-2NFIRFU3FSK/events.out.tfevents.1706084518.WIN-2NFIRFU3FSK.39284.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:481fe540b63adb0ccdd50f5e247a7b41dbd07984cecc33cc491a113b0d29b5fb
3
+ size 67513
special_tokens_map.json CHANGED
@@ -1,30 +1,5 @@
1
  {
2
- "bos_token": {
3
- "content": "<|endoftext|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "[PAD]",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "unk_token": {
24
- "content": "<|endoftext|>",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- }
30
  }
 
1
  {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  }
tokenization_arcade100k.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright (c) 2023 Alibaba Cloud & Stability AI.
3
+ #
4
+ # Tongyi Qianwen LICENSE AGREEMENT:
5
+ # https://github.com/QwenLM/Qwen/blob/5aa84bdfd3237b37f01bc88cd49b3279b9a71d0b/Tongyi%20Qianwen%20LICENSE%20AGREEMENT
6
+ """Tokenization classes for Arcade100k."""
7
+
8
+ import base64
9
+ import os
10
+ import unicodedata
11
+ from typing import Collection, Dict, List, Set, Tuple, Union
12
+
13
+ import tiktoken
14
+ from transformers.utils import logging
15
+ from transformers import PreTrainedTokenizer, AddedToken
16
+
17
+ logger = logging.get_logger(__name__)
18
+
19
+ VOCAB_FILES_NAMES = {"vocab_file": "arcade100k.tiktoken"}
20
+ NAME = "arcade100k"
21
+
22
+
23
+ def _load_tiktoken_bpe(tiktoken_bpe_file: str) -> Dict[bytes, int]:
24
+ with open(tiktoken_bpe_file, "rb") as f:
25
+ contents = f.read()
26
+ return {
27
+ base64.b64decode(token): int(rank)
28
+ for token, rank in (line.split() for line in contents.splitlines() if line)
29
+ }
30
+
31
+
32
+ ENDOFTEXT = "<|endoftext|>"
33
+ FIM = [
34
+ "<|fim_prefix|>",
35
+ "<|fim_middle|>",
36
+ "<|fim_suffix|>",
37
+ "<|fim_pad|>",
38
+ ]
39
+ # `StarCoder` Tokens
40
+ CODE = [
41
+ "<gh_stars>",
42
+ "<filename>",
43
+ "<issue_start>",
44
+ "<issue_comment>",
45
+ "<issue_closed>",
46
+ "<jupyter_start>",
47
+ "<jupyter_text>",
48
+ "<jupyter_code>",
49
+ "<jupyter_output>",
50
+ "<empty_output>",
51
+ "<commit_before>",
52
+ "<commit_msg>",
53
+ "<commit_after>",
54
+ "<reponame>",
55
+ ]
56
+ CHAT = [
57
+ "<|im_start|>", # Chat: Input message start
58
+ "<|im_end|>", # Chat: Input message end
59
+ ]
60
+ PAUSE = "<|pause|>" # Think before you speak (https://arxiv.org/abs/2310.02226)
61
+ REGISTERS = [
62
+ f"<|reg{i}|>" for i in range(0, 8)
63
+ ] # Register 0 sink token (https://arxiv.org/abs/2309.17453)
64
+ ENDOFPROMPT = "<|endofprompt|>"
65
+ SPECIAL_TOKENS_NAMES = (
66
+ [ENDOFTEXT]
67
+ + FIM
68
+ + CODE
69
+ + [ENDOFPROMPT]
70
+ + CHAT
71
+ + [PAUSE]
72
+ + REGISTERS
73
+ + ["<|extra0|>"]
74
+ )
75
+ START_ID = 100257
76
+ SPECIAL_TOKENS = {t: START_ID + i for i, t in enumerate(SPECIAL_TOKENS_NAMES)}
77
+
78
+
79
+ def _arcade100k(vocab_file: str):
80
+ mergeable_ranks = _load_tiktoken_bpe(vocab_file)
81
+
82
+ return {
83
+ "name": NAME,
84
+ "pat_str": r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+""",
85
+ "mergeable_ranks": mergeable_ranks,
86
+ "special_tokens": SPECIAL_TOKENS,
87
+ }
88
+
89
+
90
+ class Arcade100kTokenizer(PreTrainedTokenizer):
91
+ """
92
+ Construct a Arcade100k tokenizer backed by `tiktoken`.
93
+
94
+ Args:
95
+ vocab_file (`str`):
96
+ Path to the vocabulary file.
97
+ errors (`str`, *optional*, defaults to `"replace"`):
98
+ How to handle errors in decoding UTF-8 byte sequences.
99
+ WARNING: the default behaviour of this function is lossy, since decoded bytes are not
100
+ guaranteed to be valid UTF-8. You can control this behaviour using the `errors` parameter,
101
+ for instance, setting `errors=strict`.
102
+ """
103
+
104
+ vocab_files_names = VOCAB_FILES_NAMES
105
+ model_input_names = ["input_ids", "attention_mask"]
106
+
107
+ def __init__(
108
+ self,
109
+ vocab_file: str,
110
+ errors: str = "replace",
111
+ **kwargs,
112
+ ):
113
+ super().__init__(errors=errors, **kwargs)
114
+ self._tiktoken_config = _arcade100k(vocab_file)
115
+ self.tokenizer = tiktoken.Encoding(**self._tiktoken_config)
116
+
117
+ # TODO: Remove this assertion
118
+ assert (
119
+ len(self.tokenizer._mergeable_ranks)
120
+ + len(self.tokenizer._special_tokens)
121
+ + 1
122
+ == self.tokenizer.n_vocab
123
+ ), f"{len(self.tokenizer._mergeable_ranks) + len(self.tokenizer._special_tokens)} != {self.tokenizer.n_vocab} in encoding"
124
+
125
+ self.decoder = {i: n for n, i in self.tokenizer._mergeable_ranks.items()}
126
+ self.decoder.update({i: n for n, i in self.tokenizer._special_tokens.items()})
127
+ # Provide default `eos_token` and `pad_token`
128
+ if self.eos_token is None:
129
+ self.eos_token = self.decoder[self.tokenizer.eot_token]
130
+ if self.pad_token is None:
131
+ self.pad_token = self.decoder[self.tokenizer.pad_token]
132
+
133
+ # Expose for convenience
134
+ self.mergeable_ranks = self.tokenizer._mergeable_ranks
135
+ self.special_tokens = self.tokenizer._special_tokens
136
+
137
+ def __len__(self):
138
+ return self.tokenizer.n_vocab
139
+
140
+ def __getstate__(self):
141
+ # Required for `pickle` support
142
+ state = self.__dict__.copy()
143
+ del state["tokenizer"]
144
+ return state
145
+
146
+ def __setstate__(self, state):
147
+ self.__dict__.update(state)
148
+ self.tokenizer = tiktoken.Encoding(**self._tiktoken_config)
149
+
150
+ @property
151
+ def vocab_size(self):
152
+ return self.tokenizer.n_vocab
153
+
154
+ def get_vocab(self) -> Dict[bytes, int]:
155
+ return self.tokenizer._mergeable_ranks
156
+
157
+ def convert_tokens_to_ids(
158
+ self, tokens: Union[bytes, str, List[Union[bytes, str]]]
159
+ ) -> List[int]:
160
+ ids = []
161
+ if isinstance(tokens, (str, bytes)):
162
+ if tokens in self.tokenizer._special_tokens:
163
+ return self.tokenizer._special_tokens[tokens]
164
+ else:
165
+ return self.tokenizer._mergeable_ranks.get(tokens)
166
+ for token in tokens:
167
+ if token in self.tokenizer._special_tokens:
168
+ ids.append(self.tokenizer._special_tokens[token])
169
+ else:
170
+ ids.append(self.tokenizer._mergeable_ranks.get(token))
171
+ return ids
172
+
173
+ def _add_tokens(
174
+ self,
175
+ new_tokens: Union[List[str], List[AddedToken]],
176
+ special_tokens: bool = False,
177
+ ) -> int:
178
+ if not special_tokens and new_tokens:
179
+ raise ValueError("Adding regular tokens is not supported")
180
+ for token in new_tokens:
181
+ surface_form = token.content if isinstance(token, AddedToken) else token
182
+ if surface_form not in SPECIAL_TOKENS:
183
+ raise ValueError("Adding unknown special tokens is not supported")
184
+ return 0
185
+
186
+ def save_vocabulary(self, save_directory: str, **kwargs) -> Tuple[str]:
187
+ """
188
+ Save only the vocabulary of the tokenizer (vocabulary).
189
+
190
+ Returns:
191
+ `Tuple(str)`: Paths to the files saved.
192
+ """
193
+ file_path = os.path.join(save_directory, "arcade100k.tiktoken")
194
+ with open(file_path, "w", encoding="utf8") as w:
195
+ for k, v in self.tokenizer._mergeable_ranks.items():
196
+ line = base64.b64encode(k).decode("utf8") + " " + str(v) + "\n"
197
+ w.write(line)
198
+ return (file_path,)
199
+
200
+ def tokenize(
201
+ self,
202
+ text: str,
203
+ allowed_special: Union[Set, str] = "all",
204
+ disallowed_special: Union[Collection, str] = (),
205
+ **kwargs,
206
+ ) -> List[Union[bytes, str]]:
207
+ """
208
+ Converts a string in a sequence of tokens.
209
+
210
+ Args:
211
+ text (`str`):
212
+ The sequence to be encoded.
213
+ allowed_special (`Literal["all"]` or `set`):
214
+ The surface forms of the tokens to be encoded as special tokens in regular texts.
215
+ Default to "all".
216
+ disallowed_special (`Literal["all"]` or `Collection`):
217
+ The surface forms of the tokens that should not be in regular texts and trigger errors.
218
+ Default to an empty tuple.
219
+
220
+ kwargs (additional keyword arguments, *optional*):
221
+ Will be passed to the underlying model specific encode method.
222
+
223
+ Returns:
224
+ `List[bytes|str]`: The list of tokens.
225
+ """
226
+ tokens = []
227
+ text = unicodedata.normalize("NFC", text)
228
+
229
+ # this implementation takes a detour: text -> token id -> token surface forms
230
+ for t in self.tokenizer.encode(
231
+ text, allowed_special=allowed_special, disallowed_special=disallowed_special
232
+ ):
233
+ tokens.append(self.decoder[t])
234
+ return tokens
235
+
236
+ def convert_tokens_to_string(self, tokens: List[Union[bytes, str]]) -> str:
237
+ """
238
+ Converts a sequence of tokens in a single string.
239
+ """
240
+ text = ""
241
+ temp = b""
242
+ for t in tokens:
243
+ if isinstance(t, str):
244
+ if temp:
245
+ text += temp.decode("utf-8", errors=self.errors)
246
+ temp = b""
247
+ text += t
248
+ elif isinstance(t, bytes):
249
+ temp += t
250
+ else:
251
+ raise TypeError("token should only be of type types or str")
252
+ if temp:
253
+ text += temp.decode("utf-8", errors=self.errors)
254
+ return text
255
+
256
+ def _convert_id_to_token(self, index: int) -> Union[bytes, str]:
257
+ """Converts an id to a token, special tokens included"""
258
+ if index in self.decoder:
259
+ return self.decoder[index]
260
+ raise ValueError("unknown ids")
261
+
262
+ def _convert_token_to_id(self, token: Union[bytes, str]) -> int:
263
+ """Converts a token to an id using the vocab, special tokens included"""
264
+ if token in self.tokenizer._special_tokens:
265
+ return self.tokenizer._special_tokens[token]
266
+ if token in self.tokenizer._mergeable_ranks:
267
+ return self.tokenizer._mergeable_ranks[token]
268
+ raise ValueError("unknown token")
269
+
270
+ def _tokenize(self, text: str, **kwargs):
271
+ """
272
+ Converts a string in a sequence of tokens (string), using the tokenizer. Split in words for word-based
273
+ vocabulary or sub-words for sub-word-based vocabularies (BPE/SentencePieces/WordPieces).
274
+
275
+ Do NOT take care of added tokens.
276
+ """
277
+ raise NotImplementedError
278
+
279
+ def _decode(
280
+ self,
281
+ token_ids: Union[int, List[int]],
282
+ skip_special_tokens: bool = False,
283
+ errors: str = None,
284
+ **kwargs,
285
+ ) -> str:
286
+ if isinstance(token_ids, int):
287
+ token_ids = [token_ids]
288
+ if skip_special_tokens:
289
+ token_ids = [i for i in token_ids if i < self.tokenizer.eot_token]
290
+ return self.tokenizer.decode(token_ids)
tokenizer_config.json CHANGED
@@ -1,221 +1,17 @@
1
  {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|endoftext|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|padding|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "50254": {
21
- "content": " ",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": false
27
- },
28
- "50255": {
29
- "content": " ",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": false
35
- },
36
- "50256": {
37
- "content": " ",
38
- "lstrip": false,
39
- "normalized": true,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": false
43
- },
44
- "50257": {
45
- "content": " ",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false,
50
- "special": false
51
- },
52
- "50258": {
53
- "content": " ",
54
- "lstrip": false,
55
- "normalized": true,
56
- "rstrip": false,
57
- "single_word": false,
58
- "special": false
59
- },
60
- "50259": {
61
- "content": " ",
62
- "lstrip": false,
63
- "normalized": true,
64
- "rstrip": false,
65
- "single_word": false,
66
- "special": false
67
- },
68
- "50260": {
69
- "content": " ",
70
- "lstrip": false,
71
- "normalized": true,
72
- "rstrip": false,
73
- "single_word": false,
74
- "special": false
75
- },
76
- "50261": {
77
- "content": " ",
78
- "lstrip": false,
79
- "normalized": true,
80
- "rstrip": false,
81
- "single_word": false,
82
- "special": false
83
- },
84
- "50262": {
85
- "content": " ",
86
- "lstrip": false,
87
- "normalized": true,
88
- "rstrip": false,
89
- "single_word": false,
90
- "special": false
91
- },
92
- "50263": {
93
- "content": " ",
94
- "lstrip": false,
95
- "normalized": true,
96
- "rstrip": false,
97
- "single_word": false,
98
- "special": false
99
- },
100
- "50264": {
101
- "content": " ",
102
- "lstrip": false,
103
- "normalized": true,
104
- "rstrip": false,
105
- "single_word": false,
106
- "special": false
107
- },
108
- "50265": {
109
- "content": " ",
110
- "lstrip": false,
111
- "normalized": true,
112
- "rstrip": false,
113
- "single_word": false,
114
- "special": false
115
- },
116
- "50266": {
117
- "content": " ",
118
- "lstrip": false,
119
- "normalized": true,
120
- "rstrip": false,
121
- "single_word": false,
122
- "special": false
123
- },
124
- "50267": {
125
- "content": " ",
126
- "lstrip": false,
127
- "normalized": true,
128
- "rstrip": false,
129
- "single_word": false,
130
- "special": false
131
- },
132
- "50268": {
133
- "content": " ",
134
- "lstrip": false,
135
- "normalized": true,
136
- "rstrip": false,
137
- "single_word": false,
138
- "special": false
139
- },
140
- "50269": {
141
- "content": " ",
142
- "lstrip": false,
143
- "normalized": true,
144
- "rstrip": false,
145
- "single_word": false,
146
- "special": false
147
- },
148
- "50270": {
149
- "content": " ",
150
- "lstrip": false,
151
- "normalized": true,
152
- "rstrip": false,
153
- "single_word": false,
154
- "special": false
155
- },
156
- "50271": {
157
- "content": " ",
158
- "lstrip": false,
159
- "normalized": true,
160
- "rstrip": false,
161
- "single_word": false,
162
- "special": false
163
- },
164
- "50272": {
165
- "content": " ",
166
- "lstrip": false,
167
- "normalized": true,
168
- "rstrip": false,
169
- "single_word": false,
170
- "special": false
171
- },
172
- "50273": {
173
- "content": " ",
174
- "lstrip": false,
175
- "normalized": true,
176
- "rstrip": false,
177
- "single_word": false,
178
- "special": false
179
- },
180
- "50274": {
181
- "content": " ",
182
- "lstrip": false,
183
- "normalized": true,
184
- "rstrip": false,
185
- "single_word": false,
186
- "special": false
187
- },
188
- "50275": {
189
- "content": " ",
190
- "lstrip": false,
191
- "normalized": true,
192
- "rstrip": false,
193
- "single_word": false,
194
- "special": false
195
- },
196
- "50276": {
197
- "content": " ",
198
- "lstrip": false,
199
- "normalized": true,
200
- "rstrip": false,
201
- "single_word": false,
202
- "special": false
203
- },
204
- "50277": {
205
- "content": "[PAD]",
206
- "lstrip": false,
207
- "normalized": false,
208
- "rstrip": false,
209
- "single_word": false,
210
- "special": true
211
- }
212
  },
213
  "bos_token": "<|endoftext|>",
214
  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
215
  "clean_up_tokenization_spaces": true,
216
  "eos_token": "<|endoftext|>",
 
217
  "model_max_length": 2048,
218
- "pad_token": "[PAD]",
219
- "tokenizer_class": "GPTNeoXTokenizer",
220
- "unk_token": "<|endoftext|>"
221
  }
 
1
  {
2
+ "added_tokens_decoder": {},
3
+ "auto_map": {
4
+ "AutoTokenizer": [
5
+ "tokenization_arcade100k.Arcade100kTokenizer",
6
+ null
7
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  },
9
  "bos_token": "<|endoftext|>",
10
  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
11
  "clean_up_tokenization_spaces": true,
12
  "eos_token": "<|endoftext|>",
13
+ "errors": "replace",
14
  "model_max_length": 2048,
15
+ "pad_token": "<|endoftext|>",
16
+ "tokenizer_class": "Arcade100kTokenizer"
 
17
  }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:661b08b166c876c9c6d5f90b918f55da075201b27d1bd2861ed89c5500df2bf7
3
+ size 4411