Upload 6 files

Browse files

Files changed (6) hide show

chat_template.jinja +5 -0
config.json +15 -33
generation_config.json +4 -4
model.safetensors +2 -2
tokenizer.json +0 -0
tokenizer_config.json +214 -9

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,5 @@

+{% for message in messages %}<|im_start|>{{ message['role'] }}
+{% if message['role'] == 'assistant' %}{% generation %}{{ message['content'] }}<|im_end|>
+{% endgeneration %}{% else %}{{ message['content'] }}<|im_end|>
+{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
+{% endif %}

config.json CHANGED Viewed

@@ -4,56 +4,38 @@
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
-  "bos_token_id": 1,
   "dtype": "bfloat16",
-  "eos_token_id": 2,
-  "head_dim": 32,
   "hidden_act": "silu",
-  "hidden_size": 64,
   "initializer_range": 0.02,
   "intermediate_size": 2048,
   "layer_types": [
     "full_attention",
     "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
     "full_attention"
   ],
-  "max_position_embeddings": 2048,
-  "max_window_layers": 24,
   "model_type": "qwen3",
-  "num_attention_heads": 4,
-  "num_hidden_layers": 24,
   "num_key_value_heads": 2,
-  "pad_token_id": 2,
   "rms_norm_eps": 1e-06,
   "rope_parameters": {
     "rope_theta": 10000.0,
     "rope_type": "default"
   },
-  "sliding_window": null,
   "tie_word_embeddings": true,
   "transformers_version": "5.8.0.dev0",
   "use_cache": false,
-  "use_sliding_window": false,
-  "vocab_size": 2048
 }

   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
+  "bos_token_id": 0,
   "dtype": "bfloat16",
+  "eos_token_id": 6,
+  "head_dim": 64,
   "hidden_act": "silu",
+  "hidden_size": 512,
   "initializer_range": 0.02,
   "intermediate_size": 2048,
   "layer_types": [
+    "sliding_attention",
     "full_attention",
+    "sliding_attention",
     "full_attention",
+    "sliding_attention",
     "full_attention"
   ],
+  "max_position_embeddings": 8192,
+  "max_window_layers": 5,
   "model_type": "qwen3",
+  "num_attention_heads": 8,
+  "num_hidden_layers": 6,
   "num_key_value_heads": 2,
+  "pad_token_id": 6,
   "rms_norm_eps": 1e-06,
   "rope_parameters": {
     "rope_theta": 10000.0,
     "rope_type": "default"
   },
+  "sliding_window": 512,
   "tie_word_embeddings": true,
   "transformers_version": "5.8.0.dev0",
   "use_cache": false,
+  "use_sliding_window": true,
+  "vocab_size": 16384
 }

generation_config.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
   "_from_model_config": true,
-  "bos_token_id": 1,
   "eos_token_id": [
-    2,
-    4
   ],
-  "pad_token_id": 2,
   "transformers_version": "5.8.0.dev0",
   "use_cache": false
 }

 {
   "_from_model_config": true,
+  "bos_token_id": 0,
   "eos_token_id": [
+    6,
+    2
   ],
+  "pad_token_id": 6,
   "transformers_version": "5.8.0.dev0",
   "use_cache": false
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44fc1d7661d5e478a6c2b6cd82f877ac2246915f368a3b579e03bf61afcc6054
-size 20354200

 version https://git-lfs.github.com/spec/v1
+oid sha256:bba7b17a06ff8bb8ff2dab9a672e22b4cec6c67fe0d1fbf075753ec0c7c164e5
+size 62412552

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,15 +1,220 @@
 {
-  "add_prefix_space": null,
   "backend": "tokenizers",
-  "bos_token": "<|start_story|>",
   "clean_up_tokenization_spaces": false,
-  "eos_token": "<|end_story|>",
-  "is_local": false,
   "local_files_only": false,
   "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "<|end_story|>",
-  "sp_model_kwargs": {},
-  "tokenizer_class": "LlamaTokenizer",
-  "unk_token": "<unk>",
-  "use_default_system_prompt": false
 }

 {
   "backend": "tokenizers",
+  "bos_token": "<|startoftext|>",
+  "chatml_role_tokens": [
+    "<|system|>",
+    "<|user|>",
+    "<|assistant|>",
+    "<|developer|>",
+    "<|tool|>",
+    "<|function|>"
+  ],
   "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "expected_vocab_size": 16384,
+  "falcon_latex_tokens": [
+    "\\blindtext",
+    "\\newpage",
+    "\\boxed",
+    "\\framebox",
+    "\\fbox",
+    "\\tag",
+    "\\nonumber",
+    "\\item",
+    "\\centering",
+    "\\caption",
+    "\\includegraphics",
+    "\\label",
+    "\\multicolumn",
+    "\\cline",
+    "\\hline",
+    "\\end",
+    "\\begin",
+    "\\tableofcontents",
+    "\\maketitle",
+    "\\date",
+    "\\author",
+    "\\title",
+    "\\chapter",
+    "\\subsubsection",
+    "\\subsection",
+    "\\section",
+    "\\noindent",
+    "\\newline",
+    "\\par",
+    "\\ddot",
+    "\\dot",
+    "\\bar",
+    "\\iff",
+    "\\implies",
+    "\\neg",
+    "\\lor",
+    "\\land",
+    "\\qquad",
+    "\\quad",
+    "\\arctan",
+    "\\arccos",
+    "\\arcsin",
+    "\\cot",
+    "\\sec",
+    "\\csc",
+    "\\tan",
+    "\\cos",
+    "\\sin",
+    "\\widetilde",
+    "\\widehat",
+    "\\complement",
+    "\\varnothing",
+    "\\tilde",
+    "\\hat",
+    "\\vec",
+    "\\mathfrak",
+    "\\mathcal",
+    "\\mathbb",
+    "\\emptyset",
+    "\\bot",
+    "\\aleph",
+    "\\Re",
+    "\\Im",
+    "\\ell",
+    "\\hbar",
+    "\\exists",
+    "\\forall",
+    "\\partial",
+    "\\nabla",
+    "\\infty",
+    "\\binom",
+    "\\overline",
+    "\\sqrt",
+    "\\tfrac",
+    "\\dfrac",
+    "\\frac",
+    "\\textsc",
+    "\\textsf",
+    "\\texttt",
+    "\\emph",
+    "\\underline",
+    "\\textit",
+    "\\textbf",
+    "\\bigoplus",
+    "\\bigcap",
+    "\\bigcup",
+    "\\prod",
+    "\\sum",
+    "\\oint",
+    "\\iiint",
+    "\\iint",
+    "\\int",
+    "\\right",
+    "\\left",
+    "\\Bigr",
+    "\\Bigl",
+    "\\bigr",
+    "\\bigl",
+    "\\rceil",
+    "\\lceil",
+    "\\rfloor",
+    "\\lfloor",
+    "\\rangle",
+    "\\langle",
+    "\\overleftarrow",
+    "\\overleftrightarrow",
+    "\\overrightarrow",
+    "\\Longleftrightarrow",
+    "\\longleftrightarrow",
+    "\\longrightarrow",
+    "\\longleftarrow",
+    "\\Longrightarrow",
+    "\\Longleftarrow",
+    "\\gets",
+    "\\to",
+    "\\mapsto",
+    "\\Updownarrow",
+    "\\Downarrow",
+    "\\Uparrow",
+    "\\updownarrow",
+    "\\downarrow",
+    "\\uparrow",
+    "\\Leftrightarrow",
+    "\\Rightarrow",
+    "\\Leftarrow",
+    "\\leftrightarrow",
+    "\\rightarrow",
+    "\\leftarrow",
+    "\\perp",
+    "\\propto",
+    "\\ni",
+    "\\notin",
+    "\\in",
+    "\\supseteq",
+    "\\supset",
+    "\\sqsupseteq",
+    "\\sqsubseteq",
+    "\\subseteq",
+    "\\subset",
+    "\\cong",
+    "\\approx",
+    "\\simeq",
+    "\\sim",
+    "\\equiv",
+    "\\neq",
+    "\\geq",
+    "\\leq",
+    "\\oslash",
+    "\\otimes",
+    "\\ominus",
+    "\\oplus",
+    "\\wedge",
+    "\\vee",
+    "\\sqcup",
+    "\\sqcap",
+    "\\uplus",
+    "\\cup",
+    "\\cap",
+    "\\cdot",
+    "\\bullet",
+    "\\circ",
+    "\\star",
+    "\\ast",
+    "\\div",
+    "\\times",
+    "\\mp",
+    "\\pm",
+    "\\omega",
+    "\\psi",
+    "\\chi",
+    "\\varphi",
+    "\\varsigma",
+    "\\phi",
+    "\\upsilon",
+    "\\tau",
+    "\\sigma",
+    "\\rho",
+    "\\pi",
+    "\\xi",
+    "\\nu",
+    "\\mu",
+    "\\lambda",
+    "\\kappa",
+    "\\iota",
+    "\\theta",
+    "\\eta",
+    "\\zeta",
+    "\\epsilon",
+    "\\delta",
+    "\\gamma",
+    "\\beta",
+    "\\alpha"
+  ],
+  "fresh_training_vocab_note": "This tokenizer intentionally repurposes obsolete Harmony/control token IDs 2,3,4,7,8,9 as ChatML role tokens. It is intended for from-scratch base-model training; do not use it with old checkpoints or old tokenized caches.",
+  "hf_compatibility_note": "Set tokenizer_class to PreTrainedTokenizerFast for vanilla Hugging Face loading; original source used a custom TokenizersBackend name.",
+  "is_local": true,
   "local_files_only": false,
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
   "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<|im_end|>",
+  "tokenizer_class": "TokenizersBackend"
 }