Add files using upload-large-folder tool

Browse files

Files changed (8) hide show

.gitattributes +1 -0
chat_template.jinja +5 -0
config.json +83 -0
generation_config.json +9 -0
model.safetensors +3 -0
recipe.yaml +7 -0
tokenizer.json +3 -0
tokenizer_config.json +18 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,5 @@

+{{ bos_token }}{%- set system_token = '<SPECIAL_61>' -%}{%- set end_system_token = '<SPECIAL_62>' -%}{%- set developer_token = '<SPECIAL_63>' -%}{%- set end_developer_token = '<SPECIAL_64>' -%}{%- set user_token = '<SPECIAL_65>' -%}{%- set end_user_token = '<SPECIAL_66>' -%}{%- set assistant_token = '<SPECIAL_67>' -%}{%- set end_assistant_token = '<SPECIAL_68>' -%}{%- set inner_token = '<SPECIAL_69>' -%}{%- set outer_token = '<SPECIAL_70>' -%}{%- set tool_calls_token = '<SPECIAL_71>' -%}{%- set end_tool_calls_token = '<SPECIAL_72>' -%}{%- if messages and messages[0].role == 'system' -%}   {%- set system_content = messages[0].content -%}    {{ system_token }}    {%- if system_content is string -%}        {{ system_content }}    {%- elif system_content is mapping and "text" in system_content -%}        {{ system_content.text }}    {%- else -%}        {{- raise_exception("Invalid system content: " + str(system_content)) -}}    {%- endif -%}    {{ end_system_token }}    {%- set messages_without_system = messages[1:] -%}{%- else -%}    {{ system_token + end_system_token }}    {%- set messages_without_system = messages -%}{%- endif -%}{%- if messages_without_system and messages_without_system[0].role == 'developer' -%}    {%- set developer_content = messages_without_system[0].content -%}    {{ developer_token }}    {%- if "has_thinking" in developer_content -%}        {{ 'Deliberation: ' }}        {%- if developer_content.has_thinking -%}            {{ 'enabled' }}        {%- else -%}            {{ 'disabled' }}        {%- endif -%}        {{ '
+' }}    {%- else -%}        {{ 'Deliberation: disabled
+' }}    {%- endif -%}    {%- if "formatted_tools" in developer_content and developer_content.formatted_tools -%}        {{ 'Tool Capabilities:
+' + developer_content.formatted_tools }}    {%- else -%}        {{ 'Tool Capabilities: disabled' }}    {%- endif -%}    {{ end_developer_token }}    {%- set loop_messages = messages_without_system[1:] -%}{%- else -%}    {{ developer_token + 'Deliberation: disabled
+Tool Capabilities: disabled' + end_developer_token }}    {%- set loop_messages = messages_without_system -%}{%- endif -%}{%- for message in loop_messages -%}    {%- set content = message.content -%}    {%- if message.role == 'user' -%}        {{ user_token }}        {%- if content is string -%}            {{ content }}        {%- elif content is sequence -%}            {%- for part in content.parts -%}                {%- if part.type == 'text' -%}                    {{ part.text }}                {%- endif -%}            {%- endfor -%}        {%- else -%}            {{- raise_exception("Invalid user content: " + str(content)) -}}        {%- endif -%}        {{ end_user_token }}    {%- elif message.role == 'assistant' -%}        {{ assistant_token }}        {%- if content is string -%}            {{ content }}        {%- elif content is sequence -%}            {%- set ns = namespace(in_inner=false) -%}            {%- for block in content.blocks -%}                {%- if block.type == 'thoughts' -%}                    {%- if not ns.in_inner -%}                        {%- set ns.in_inner = true -%}                        {{ inner_token }}                    {%- endif -%}                    {{ block.text }}                {%- elif block.type == 'tool_calls' -%}                    {%- if ns.in_inner and not loop.first and block.calls|length == 1 and block.calls[0].name == 'display_answers' -%}                        {%- set ns.in_inner = false -%}                        {{ outer_token }}                    {%- endif -%}                    {{ tool_calls_token + '[' }}                    {%- for tool_call in block.calls -%}                        {{- '{"' + tool_call.name + '": ' + tool_call.arguments + '}' }}                        {%- if not loop.last -%}                            {{- ", " }}                        {%- endif -%}                    {%- endfor -%}                    {{ ']' + end_tool_calls_token }}                {%- elif block.type == 'tool_outputs' -%}                    {{ '[' }}                    {%- for tool_output in block.outputs -%}                        {{- tool_output.output }}                        {%- if not loop.last -%}                            {{- ", " }}                        {%- endif -%}                    {%- endfor -%}                    {{- ']' }}                    {%- if not loop.last -%}                        {{- ' ' }}                    {%- endif -%}                {%- elif block.type == 'response' -%}                    {%- if not loop.first and ns.in_inner -%}                        {%- set ns.in_inner = false -%}                        {{ outer_token }}                    {%- endif -%}                    {{ block.text }}                {%- else -%}                    {{- raise_exception("Invalid block type: " + block.type) -}}                {%- endif -%}            {%- endfor -%}        {%- else -%}            {{- raise_exception("Invalid assistant content: " + str(content)) -}}        {%- endif -%}        {{ end_assistant_token }}    {%- else -%}        {{- raise_exception("Invalid message role: " + message.role) -}}    {%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}    {{ assistant_token }}{%- endif -%}

config.json ADDED Viewed

	@@ -0,0 +1,83 @@

+{
+  "architectures": [
+    "ApertusForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "dtype": "float32",
+  "eos_token_id": 68,
+  "hidden_act": "xielu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 6144,
+  "max_position_embeddings": 4096,
+  "mlp_bias": false,
+  "model_type": "apertus",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 20,
+  "num_key_value_heads": 4,
+  "pad_token_id": 10,
+  "post_norm": false,
+  "qk_norm": true,
+  "quantization_config": {
+    "config_groups": {
+      "group_0": {
+        "format": "nvfp4-pack-quantized",
+        "input_activations": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": "local",
+          "group_size": 16,
+          "num_bits": 4,
+          "observer": "static_minmax",
+          "observer_kwargs": {},
+          "scale_dtype": "torch.float8_e4m3fn",
+          "strategy": "tensor_group",
+          "symmetric": true,
+          "type": "float",
+          "zp_dtype": null
+        },
+        "output_activations": null,
+        "targets": [
+          "Linear"
+        ],
+        "weights": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": 16,
+          "num_bits": 4,
+          "observer": "memoryless_minmax",
+          "observer_kwargs": {},
+          "scale_dtype": "torch.float8_e4m3fn",
+          "strategy": "tensor_group",
+          "symmetric": true,
+          "type": "float",
+          "zp_dtype": null
+        }
+      }
+    },
+    "format": "nvfp4-pack-quantized",
+    "global_compression_ratio": null,
+    "ignore": [
+      "lm_head"
+    ],
+    "kv_cache_scheme": null,
+    "quant_method": "compressed-tensors",
+    "quantization_status": "compressed",
+    "sparsity_config": {},
+    "transform_config": {},
+    "version": "0.15.1.a20260428"
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_parameters": {
+    "rope_theta": 500000.0,
+    "rope_type": "default"
+  },
+  "tie_word_embeddings": true,
+  "transformers_version": "5.7.0",
+  "use_cache": false,
+  "vocab_size": 131072
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": [
+    68
+  ],
+  "pad_token_id": 10,
+  "transformers_version": "5.7.0"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57d39813bda9c01b6494c5abfcb153e0b76d21793cfbe8364b89a33b6826dc7f
+size 708173256

recipe.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+default_stage:
+  default_modifiers:
+    QuantizationModifier:
+      targets: [Linear]
+      ignore: [lm_head]
+      scheme: NVFP4
+      bypass_divisibility_checks: false

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa400cc62603ca19e8e2f4a6451f8508c8d68866a362ac644fe74c0026243129
+size 17078467

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<SPECIAL_68>",
+  "is_local": true,
+  "local_files_only": false,
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": "<unk>"
+}