ToastyPigeon commited on
Commit
9f9bf08
·
verified ·
1 Parent(s): 084ad88

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ tags:
4
+ - axolotl
5
+ - base_model:adapter:apertus-12b-nonzero-trained/cpt-part2-instruct-part1
6
+ - lora
7
+ - transformers
8
+ datasets:
9
+ - grimulkan/LimaRP-augmented
10
+ - ToastyPigeon/mixed-medical-reasoning-formatted
11
+ - ToastyPigeon/kimi-stories-instruct
12
+ - allura-org/fujin-instruct-v2
13
+ - ToastyPigeon/some-rp-extended
14
+ - allura-forge/koto-instruct-sft-nothink
15
+ base_model: apertus-12b-nonzero-trained/cpt-part2-instruct-part1
16
+ pipeline_tag: text-generation
17
+ model-index:
18
+ - name: apertus-12b-nonzero-trained/part2-instruct
19
+ results: []
20
+ ---
21
+
22
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
23
+ should probably proofread and complete it, then remove this comment. -->
24
+
25
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
26
+ <details><summary>See axolotl config</summary>
27
+
28
+ axolotl version: `0.13.0.dev0`
29
+ ```yaml
30
+ # === Model Configuration ===
31
+ base_model: apertus-12b-nonzero-trained/cpt-part2-instruct-part1
32
+ load_in_8bit: false
33
+ load_in_4bit: false
34
+
35
+ # === HF Configuration ===
36
+ #hub_model_id: ToastyPigeon/apertus-12b-try-again-s1
37
+ #hub_strategy: "every_save"
38
+ output_dir: apertus-12b-nonzero-trained/part2-instruct
39
+ # === Wandb Tracking ===
40
+ wandb_project: ApertusV3
41
+ # wandb_entity: [WANDB_ENTITY]
42
+ wandb_name: 12b-part2-instruct
43
+
44
+ # === Training Setup ===
45
+ num_epochs: 1
46
+ micro_batch_size: 2
47
+ gradient_accumulation_steps: 16
48
+ sequence_len: 4096
49
+ #sequence_parallel_degree: 2
50
+ #heads_k_stride: 1
51
+ sample_packing: true
52
+ #pad_to_sequence_len: true
53
+ #temperature: 0.7
54
+ #max_steps: 10
55
+ # === Evaluation ===
56
+ val_set_size: 200
57
+ evals_per_epoch: 10
58
+ #eval_steps: 20
59
+ #max_steps: 60
60
+ #eval_table_size:
61
+ eval_max_new_tokens: 128
62
+ #eval_sample_packing: true
63
+ #eval_strategy: "no"
64
+
65
+ # === LoRA Configuration ===
66
+ adapter: lora
67
+ lora_model_dir:
68
+ lora_r: 128
69
+ lora_alpha: 16
70
+ lora_dropout: 0
71
+ lora_target_linear:
72
+ lora_target_modules:
73
+ # - up_proj
74
+ - down_proj
75
+ # - gate_proj
76
+ - q_proj
77
+ - v_proj
78
+ - k_proj
79
+ - o_proj
80
+ # - input_layernorm
81
+ # - post_attention_layernorm
82
+ # - embed_tokens
83
+ # - lm_head
84
+
85
+ lora_fan_in_fan_out:
86
+ peft_use_rslora: true
87
+ lora_modules_to_save:
88
+ # - embed_tokens
89
+ # - lm_head
90
+ #fix_untrained_tokens: true
91
+ #lora_mlp_kernel: true
92
+ #lora_qkv_kernel: true
93
+ #lora_o_kernel: true
94
+ #unfrozen_parameters:
95
+ # - model.layers.(2[4-9]|3[0-9]).*
96
+ # - model.layers.[0-9+].mlp.up_proj
97
+ # - model.layers.[0-9]+.mlp.down_proj
98
+ # - model.layers.[0-9+].feedforward_layernorm
99
+ # - embed_tokens
100
+ # - lm_head
101
+ # - model.layers.[0-9]+.self_attn.(q|k|v|o)_proj
102
+ # === Hyperparameter Configuration ===
103
+ #optimizer: apollo_adamw_layerwise
104
+ #warmup_steps: 0
105
+ warmup_ratio: 0.025
106
+ #optimizer: adamw_8bit
107
+ optimizer: adamw_torch_fused
108
+ #optimizer: paged_adamw_8bit
109
+ #optim_args:
110
+ # enable_stochastic_rounding: true
111
+ # enable_cautious: true
112
+ # enable_8bit: true
113
+ # Apollo-mini configuration:
114
+ #optim_args: "proj=random,rank=128,scale=128.0,scale_type=tensor,update_proj_gap=100"
115
+ # Regular Apollo configuration:
116
+ # optim_args:
117
+ #optim_target_modules: all_linear
118
+ learning_rate: 2e-5
119
+ lr_scheduler: cosine
120
+ #cosine_min_lr_ratio: 0.2
121
+ #lr_scheduler: cosine_with_min_lr
122
+ #lr_scheduler_kwargs:
123
+ # cosine_min_lr: 1e-6
124
+ weight_decay: 0.01
125
+ max_grad_norm: 2.0
126
+ #warmup_steps: 0
127
+ #warmup_ratio: 0.025
128
+
129
+
130
+ # === Data Configuration ===
131
+ #
132
+ #chat_template: jinja
133
+ chat_template: chatml
134
+ special_tokens:
135
+ eos_token: "<|im_end|>"
136
+ # eos_token: "</s>"
137
+ #tokenizer_use_mistral_common: true
138
+ shuffle_merged_datasets: true
139
+ datasets:
140
+ # - path: allura-org/the-anarchist-library
141
+ # type: completion
142
+ # split: train[:20%]
143
+ - path: grimulkan/LimaRP-augmented
144
+ type: chat_template
145
+ field_messages: conversations
146
+ message_property_mappings:
147
+ role: from
148
+ content: value
149
+ # - path: allenai/tulu-3-sft-personas-instruction-following
150
+ # type: chat_template
151
+ # split: train[:10%]
152
+ - path: ToastyPigeon/mixed-medical-reasoning-formatted
153
+ type: chat_template
154
+ data_files: mixed-medical-nothink.json
155
+ # split: train[:10%]
156
+ # - path: ToastyPigeon/steve-and-marvin
157
+ # type: completion
158
+ # data_files: marvin.json
159
+ - path: ToastyPigeon/kimi-stories-instruct
160
+ type: chat_template
161
+ # type: completion
162
+ # - path: ToastyPigeon/new-story-dataset
163
+ # type: customcompletion-regex
164
+ # type: completion
165
+ # data_files: new-story-dataset-v2.json
166
+ - path: allura-org/fujin-instruct-v2
167
+ # type: customchatml-regex
168
+ type: chat_template
169
+ field_messages: conversations
170
+ message_property_mappings:
171
+ role: from
172
+ content: value
173
+ - path: ToastyPigeon/some-rp-extended
174
+ # type: customchatml-regex
175
+ type: chat_template
176
+ field_messages: conversations
177
+ message_property_mappings:
178
+ role: from
179
+ content: value
180
+ roles_to_train: ["user","assistant"]
181
+ split: train[:30%]
182
+ # - path: Alfitaria/rosier-inf
183
+ # type: completion
184
+ # split: train[70%:]
185
+ - path: allura-forge/koto-instruct-sft-nothink
186
+ # type: customchatml-regex
187
+ type: chat_template
188
+ # split: train[:50%]
189
+ # field_messages: conversations
190
+ # message_property_mappings:
191
+ # role: from
192
+ # content: value
193
+ # - path: ToastyPigeon/SpringDragon
194
+ # type: customcompletion-regex
195
+ # type: completion
196
+ # split: train
197
+ # - path: ToastyPigeon/erotic-books-clone
198
+ # type: customcompletion-regex
199
+ # type: completion
200
+ # split: train[:50%]
201
+ # split: train[35%:45%]
202
+ # - path: ToastyPigeon/tulu-mini
203
+ # type: chat_template
204
+ dataset_prepared_path: last_run_prepared
205
+
206
+
207
+ # === Plugins ===
208
+ plugins:
209
+ - axolotl.integrations.liger.LigerPlugin
210
+ - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
211
+
212
+ # === Hardware Optimization ===
213
+ #gradient_checkpointing: true
214
+ liger_rope: true
215
+ liger_rms_norm: true
216
+ liger_layer_norm: true
217
+ liger_glu_activation: true
218
+ #liger_fused_linear_cross_entropy: true
219
+ cut_cross_entropy: true
220
+
221
+ #deepspeed: ../axolotl/deepspeed_configs/zero2.json
222
+
223
+ # === FSDP Config ===
224
+ fsdp:
225
+ - full_shard
226
+ - auto_wrap
227
+ fsdp_config:
228
+ fsdp_limit_all_gathers: true
229
+ fsdp_sync_module_states: true
230
+ fsdp_offload_params: true
231
+ fsdp_activation_checkpointing: true
232
+ fsdp_use_orig_params: true
233
+ fsdp_cpu_ram_efficient_loading: true
234
+ fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
235
+ fsdp_transformer_layer_cls_to_wrap: ApertusDecoderLayer
236
+ fsdp_state_dict_type: FULL_STATE_DICT
237
+ fsdp_sharding_strategy: FULL_SHARD
238
+ #fsdp_stage: 2
239
+ #fsdp_final_state_dict_type: FULL_STATE_DICT
240
+
241
+ # === Checkpointing ===
242
+ #save_steps: 2
243
+ saves_per_epoch: 4
244
+ save_total_limit: 4
245
+
246
+ # === Advanced Settings ===
247
+ bf16: true
248
+ flash_attention: true
249
+ train_on_inputs: false
250
+ group_by_length: false
251
+ save_safetensors: true
252
+ logging_steps: 1
253
+ seed: 420
254
+ gc_steps: 10
255
+
256
+ ```
257
+
258
+ </details><br>
259
+
260
+ # apertus-12b-nonzero-trained/part2-instruct
261
+
262
+ This model was trained from scratch on the grimulkan/LimaRP-augmented, the ToastyPigeon/mixed-medical-reasoning-formatted, the ToastyPigeon/kimi-stories-instruct, the allura-org/fujin-instruct-v2, the ToastyPigeon/some-rp-extended and the allura-forge/koto-instruct-sft-nothink datasets.
263
+ It achieves the following results on the evaluation set:
264
+ - Loss: 1.1911
265
+ - Memory/max Active (gib): 6.89
266
+ - Memory/max Allocated (gib): 6.88
267
+ - Memory/device Reserved (gib): 8.18
268
+
269
+ ## Model description
270
+
271
+ More information needed
272
+
273
+ ## Intended uses & limitations
274
+
275
+ More information needed
276
+
277
+ ## Training and evaluation data
278
+
279
+ More information needed
280
+
281
+ ## Training procedure
282
+
283
+ ### Training hyperparameters
284
+
285
+ The following hyperparameters were used during training:
286
+ - learning_rate: 2e-05
287
+ - train_batch_size: 2
288
+ - eval_batch_size: 2
289
+ - seed: 420
290
+ - distributed_type: multi-GPU
291
+ - num_devices: 2
292
+ - gradient_accumulation_steps: 16
293
+ - total_train_batch_size: 64
294
+ - total_eval_batch_size: 4
295
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
296
+ - lr_scheduler_type: cosine
297
+ - lr_scheduler_warmup_steps: 9
298
+ - training_steps: 372
299
+
300
+ ### Training results
301
+
302
+ | Training Loss | Epoch | Step | Validation Loss | Active (gib) | Allocated (gib) | Reserved (gib) |
303
+ |:-------------:|:------:|:----:|:---------------:|:------------:|:---------------:|:--------------:|
304
+ | No log | 0 | 0 | 1.4636 | 6.87 | 6.87 | 8.13 |
305
+ | 1.2677 | 0.1020 | 38 | 1.3280 | 6.89 | 6.88 | 8.18 |
306
+ | 1.1286 | 0.2041 | 76 | 1.2605 | 6.89 | 6.88 | 8.18 |
307
+ | 1.159 | 0.3061 | 114 | 1.2275 | 6.89 | 6.88 | 8.18 |
308
+ | 1.0281 | 0.4081 | 152 | 1.2122 | 6.89 | 6.88 | 8.18 |
309
+ | 1.0781 | 0.5102 | 190 | 1.2033 | 6.89 | 6.88 | 8.18 |
310
+ | 1.0296 | 0.6122 | 228 | 1.1976 | 6.89 | 6.88 | 8.18 |
311
+ | 1.0756 | 0.7142 | 266 | 1.1939 | 6.89 | 6.88 | 8.18 |
312
+ | 1.1134 | 0.8162 | 304 | 1.1921 | 6.89 | 6.88 | 8.18 |
313
+ | 1.0437 | 0.9183 | 342 | 1.1911 | 6.89 | 6.88 | 8.18 |
314
+
315
+
316
+ ### Framework versions
317
+
318
+ - PEFT 0.17.1
319
+ - Transformers 4.56.1
320
+ - Pytorch 2.7.1+cu126
321
+ - Datasets 4.0.0
322
+ - Tokenizers 0.22.1
adapter_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "apertus-12b-nonzero-trained/cpt-part2-instruct-part1",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": false,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 256,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.2,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 256,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "down_proj",
29
+ "k_proj",
30
+ "v_proj",
31
+ "o_proj",
32
+ "up_proj",
33
+ "q_proj"
34
+ ],
35
+ "target_parameters": [],
36
+ "task_type": "CAUSAL_LM",
37
+ "trainable_token_indices": null,
38
+ "use_dora": false,
39
+ "use_qalora": false,
40
+ "use_rslora": false
41
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:288cad2b87d032fa17cda386356fcd8220315fce83e91da5f037d143fbcb4113
3
+ size 1283523112
chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '
2
+ ' + message['content'] + '<|im_end|>' + '
3
+ '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
4
+ ' }}{% endif %}
config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ApertusForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_apertus.ApertusConfig",
9
+ "AutoModel": "modeling_apertus.ApertusModel",
10
+ "AutoModelForCausalLM": "modeling_apertus.ApertusForCausalLM"
11
+ },
12
+ "bos_token_id": 1,
13
+ "dtype": "bfloat16",
14
+ "eos_token_id": 68,
15
+ "hidden_act": "xielu",
16
+ "hidden_dropout": 0.0,
17
+ "hidden_size": 4096,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 21504,
20
+ "max_position_embeddings": 65536,
21
+ "mlp_bias": false,
22
+ "model_type": "apertus",
23
+ "num_attention_heads": 32,
24
+ "num_hidden_layers": 48,
25
+ "num_key_value_heads": 8,
26
+ "pad_token_id": 3,
27
+ "post_norm": false,
28
+ "qk_norm": true,
29
+ "rms_norm_eps": 1e-05,
30
+ "rope_scaling": {
31
+ "factor": 8.0,
32
+ "high_freq_factor": 4.0,
33
+ "low_freq_factor": 1.0,
34
+ "original_max_position_embeddings": 8192,
35
+ "rope_type": "llama3",
36
+ "type": "llama3"
37
+ },
38
+ "rope_theta": 12000000,
39
+ "tie_word_embeddings": false,
40
+ "transformers_version": "4.56.1",
41
+ "use_cache": false,
42
+ "vocab_size": 131072
43
+ }
debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-11-26 03:25:26,747] [INFO] [axolotl.utils.data.sft._load_raw_datasets:320] [PID:64100] Loading raw datasets...
2
+ [2025-11-26 03:25:29,268] [INFO] [axolotl.utils.data.wrappers.get_dataset_wrapper:87] [PID:64100] Loading dataset: ToastyPigeon/limarp-augmented-train-last-only with base_type: chat_template and prompt_style: None
3
+ [2025-11-26 03:25:29,972] [WARNING] [huggingface_hub.repocard.content:108] [PID:64100] Repo card metadata block was not found. Setting CardData to empty.
4
+ [2025-11-26 03:25:30,754] [INFO] [axolotl.utils.data.wrappers.get_dataset_wrapper:87] [PID:64100] Loading dataset: ToastyPigeon/mixed-medical-reasoning-formatted with base_type: chat_template and prompt_style: None
5
+ [2025-11-26 03:25:32,584] [WARNING] [huggingface_hub.repocard.content:108] [PID:64100] Repo card metadata block was not found. Setting CardData to empty.
6
+ [2025-11-26 03:25:33,613] [INFO] [axolotl.utils.data.wrappers.get_dataset_wrapper:87] [PID:64100] Loading dataset: ToastyPigeon/kimi-stories-instruct with base_type: chat_template and prompt_style: None
7
+ [2025-11-26 03:25:35,627] [INFO] [axolotl.utils.data.wrappers.get_dataset_wrapper:87] [PID:64100] Loading dataset: allura-forge/koto-instruct-sft-nothink with base_type: chat_template and prompt_style: None
8
+ [2025-11-26 03:25:36,310] [WARNING] [huggingface_hub.repocard.content:108] [PID:64100] Repo card metadata block was not found. Setting CardData to empty.
9
+ [2025-11-26 03:25:37,341] [INFO] [axolotl.utils.data.wrappers.get_dataset_wrapper:87] [PID:64100] Loading dataset: ToastyPigeon/SpringDragon-Instruct with base_type: chat_template and prompt_style: None
10
+
11
+ [2025-11-26 03:25:52,171] [WARNING] [huggingface_hub.repocard.content:108] [PID:64100] Repo card metadata block was not found. Setting CardData to empty.
12
+ [2025-11-26 03:25:53,391] [INFO] [axolotl.utils.data.wrappers.get_dataset_wrapper:87] [PID:64100] Loading dataset: ToastyPigeon/tulu-mini with base_type: chat_template and prompt_style: None
13
+
14
+
15
+
16
+
17
+
18
+ Using Liger RMSNorm!
19
+
20
+
21
+ [2025-11-26 14:54:12,836] [WARNING] [py.warnings._showwarnmsg:110] [PID:64100] /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:680: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
22
+ warnings.warn(
23
+
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:355d635f647f0513296e01963d876dc5a10df2853296b11d5be7d00296070faf
3
+ size 17078466
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff