spicyneuron commited on 2 days ago

Commit

4a65b34

1 Parent(s): f16a072

Add files using upload-large-folder tool

Browse files

Files changed (32) hide show

.DS_Store +0 -0
README.md +0 -44
chat_template.jinja +4 -4
config.json +0 -0
model-00001-of-00027.safetensors +3 -0
model-00002-of-00027.safetensors +3 -0
model-00003-of-00027.safetensors +3 -0
model-00004-of-00027.safetensors +3 -0
model-00005-of-00027.safetensors +3 -0
model-00006-of-00027.safetensors +3 -0
model-00007-of-00027.safetensors +3 -0
model-00008-of-00027.safetensors +3 -0
model-00009-of-00027.safetensors +3 -0
model-00010-of-00027.safetensors +3 -0
model-00011-of-00027.safetensors +3 -0
model-00012-of-00027.safetensors +3 -0
model-00013-of-00027.safetensors +3 -0
model-00014-of-00027.safetensors +3 -0
model-00015-of-00027.safetensors +3 -0
model-00016-of-00027.safetensors +3 -0
model-00017-of-00027.safetensors +3 -0
model-00018-of-00027.safetensors +3 -0
model-00019-of-00027.safetensors +3 -0
model-00020-of-00027.safetensors +3 -0
model-00021-of-00027.safetensors +3 -0
model-00022-of-00027.safetensors +3 -0
model-00023-of-00027.safetensors +3 -0
model-00024-of-00027.safetensors +3 -0
model-00025-of-00027.safetensors +3 -0
model-00026-of-00027.safetensors +3 -0
model-00027-of-00027.safetensors +3 -0
model.safetensors.index.json +21 -21

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

README.md CHANGED Viewed

@@ -8,47 +8,3 @@ tags:
 - mlx
 base_model: MiniMaxAI/MiniMax-M2.7
 ---
-[MiniMax-M2.7](MiniMaxAI/MiniMax-M2.7) optimized for MLX. A mixed-precision quant that balances speed, memory, and accuracy.
-# Usage
-```sh
-# Start server at http://localhost:8080/chat/completions
-uvx --from mlx-lm mlx_lm.server \
-  --host 127.0.0.1 \
-  --port 8080 \
-  --model spicyneuron/MiniMax-M2.7-MLX-4.6bit
-```
-# Methodology
-Quantized with a [mlx-lm fork](https://github.com/ml-explore/mlx-lm/pull/922), drawing inspiration from Unsloth/AesSedai/ubergarm style mixed-precision GGUFs.
-MLX quantization options differ than llama.cpp, but the principles are the same:
-- Sensitive layers like MoE routing, attention, and output embeddings get higher precision
-- More tolerant layers like MoE experts get lower precision
-# Benchmarks
-metric | mlx-community_MiniMax-M2.7-4bit | baa-ai_MiniMax-M2.7-RAM-155GB-MLX | 4.6 bit (this model)
---- | --- | --- | ---
-bpw | 4.501 | 5.4278 | 4.5987
-peak memory (1024/512) | 129.632 | 156.051 | 132.442
-prompt tok/s (1024) | 739.996 ± 1.565 | 708.147 ± 0.818 | 740.409 ± 0.268
-gen tok/s (512) | 48.703 ± 0.116 | 40.253 ± 0.077 | 48.038 ± 0.099
-perplexity | 9.120 ± 0.047 | 8.835 ± 0.045 | 4.462 ± 0.019
-hellaswag | 0.504 ± 0.011 | 0.509 ± 0.011 | 0.505 ± 0.011
-piqa | 0.786 ± 0.01 | 0.787 ± 0.01 | 0.793 ± 0.009
-winogrande | 0.636 ± 0.014 | 0.661 ± 0.013 | 0.645 ± 0.013
-Tested on a Mac Studio M3 Ultra with:
-```
-mlx_lm.perplexity --sequence-length 2048 --seed 123
-mlx_lm.benchmark --prompt-tokens 1024 --generation-tokens 512 --num-trials 5
-mlx_lm.evaluate --tasks hellaswag --seed 123 --num-shots 0 --limit 2000
-mlx_lm.evaluate --tasks piqa --seed 123 --num-shots 0 --limit 2000
-mlx_lm.evaluate --tasks winogrande --seed 123 --num-shots 0 --limit 2000
-```

 - mlx
 base_model: MiniMaxAI/MiniMax-M2.7
 ---

chat_template.jinja CHANGED Viewed

@@ -32,7 +32,7 @@
         {%- endif -%}
         {{- model_identity }}
     {%- endif -%}
     {#- Handle current_date -#}
     {%- if system_message and system_message.current_date -%}
         {{- '\n' ~ 'Current date: ' + system_message.current_date }}
@@ -116,14 +116,14 @@
                 {% endfor %}
                 {{- '</invoke>' ~ '\n' }}
             {%- endfor -%}
             {{- toolcall_end_token}}
             {%- set last_tool_call.name = message.tool_calls[-1].name -%}
         {%- else -%}
             {%- set last_tool_call.name = none -%}
         {%- endif -%}
         {{- '[e~[' ~ '\n' }}
     {%- elif message.role == 'tool' -%}
     {%- if last_tool_call.name is none -%}
         {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
@@ -145,7 +145,7 @@
     {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}
         {{- '[e~[\n' -}}
     {%- endif -%}
     {%- elif message.role == 'user' -%}
         {{- ']~b]user' ~ '\n' }}
         {{- visible_text(message.content) }}

         {%- endif -%}
         {{- model_identity }}
     {%- endif -%}
     {#- Handle current_date -#}
     {%- if system_message and system_message.current_date -%}
         {{- '\n' ~ 'Current date: ' + system_message.current_date }}
                 {% endfor %}
                 {{- '</invoke>' ~ '\n' }}
             {%- endfor -%}
             {{- toolcall_end_token}}
             {%- set last_tool_call.name = message.tool_calls[-1].name -%}
         {%- else -%}
             {%- set last_tool_call.name = none -%}
         {%- endif -%}
         {{- '[e~[' ~ '\n' }}
     {%- elif message.role == 'tool' -%}
     {%- if last_tool_call.name is none -%}
         {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
     {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}
         {{- '[e~[\n' -}}
     {%- endif -%}
     {%- elif message.role == 'user' -%}
         {{- ']~b]user' ~ '\n' }}
         {{- visible_text(message.content) }}

config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1453e1d2176aba8f7d7f0c8ec41d36b190260cc5e09bf882b2dee461022747fe
+size 5177037304

model-00002-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0572ba9610d5c45fb51db9d8592308fb18be7ab7150ed511bbe7f197e17f64ce
+size 5155124140

model-00003-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fbe7d62fbf61cd4bcf7948b8015bb360e22df0c026908c1c089a60569081c8a
+size 5155124138

model-00004-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3835a700f3a2cbeb744bfc7f91890b5ccf2ed7676d7e5d03d5ab72bea4458819
+size 5354514350

model-00005-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2cbf2f0c5b55f15e761ee0d438623bb1fce4cff9bde11b99318e25a7735b49b8
+size 5155124199

model-00006-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd9cbdf4378822059bf3e4604a437a2121fa2157d2416fa2e10e35f1cda9c8af
+size 5155124193

model-00007-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:acb259ba550c178ec5a85743eadb2b0b58eecc869eb1fba35e2dbd4b25618b11
+size 5354514427

model-00008-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11d784a38f27356319cbfe85ff52f436bd5b3589735d36552e801fd6d99921e9
+size 5155124193

model-00009-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5fae71b190a4c7dc0cb412bdf6196d24fe322f19e78e989ec050674cf4c16a7f
+size 5155124193

model-00010-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31c4d0e903bbe9e982d5992b8bf991a359dd2b1a552e97ee0a6ebc0a7161ab16
+size 5354514409

model-00011-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4f7f63241f0af1d07ad61f6298debb9325bbec2775a324b4b27abff385506e3
+size 5155124223

model-00012-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c34685375b3c296dd32df39a4e258952de296f76a859713a808800e6f2bf595
+size 5155124171

model-00013-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e3d490a5a1136dff06ba6d2ac3c2b7ae8eaac1e43a042f4a5a51d8eb829db80
+size 5354514431

model-00014-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a64552bb6eb6f5ccf456fbe6976caac8dbd0322c5fde6b5d985954134ae69c16
+size 5155124203

model-00015-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc5ba31651da410e39d6b9f431a8ba6a7638749ad40acc3bc6690c4badd74a6f
+size 5155124215

model-00016-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f1b942b84d35e5b8dff79d16db0f42d4c2775564e1e378f33b7e3f5b6bb375c
+size 5354514393

model-00017-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:648e440326f40ec50b1d0d9f5087ee2972888429c9b06c1e9dfb1f295468bed4
+size 5155124211

model-00018-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b89bf73e3bab098ba58694a01aa3014b742ba78d00acc42e22346e07dc01c86
+size 5155124175

model-00019-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba8b22b79b1a719c33d6c63b6de8e348b4c2579421c9d3f3fccc30f349cec194
+size 5354514449

model-00020-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f29c27c8a574f102d6d1ca6cda1958bf4989724be734e1dd8f516d5ecbe6f516
+size 5155124219

model-00021-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e79f793f81df310666d32d5767ffdb0e3027e03cb0650f0f40bceaba36028a8
+size 5155124199

model-00022-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:beb0be8dacf73419602d54995d28ef911c60436c0ba46a0416aa0a660e8954ff
+size 5354514415

model-00023-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34a1eadb3a850bc07ccb99c50b55aae61906c1a7c376bbe1ef9cecf921e11c48
+size 5155124205

model-00024-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3236bdd8c77646e820a6fc0e063ad6eb2773e620112caa46aa8744bb7c85dd1f
+size 5155124215

model-00025-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c96268c2039d21d786afcb7c82fbca99355fa1112863cc6778bc0a0e51553b6
+size 5354514437

model-00026-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45730088b256deabd29422cf786faa69b94f529080c3671048552dfc2a41ea52
+size 5306119123

model-00027-of-00027.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92f46277657390e18f42fca767fa4ede4dd871ab72c779db18abb333b9f37774
+size 4702792504

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "metadata": {
-        "total_size": 131460857856,
         "total_parameters": 228689748992
     },
     "weight_map": {
@@ -1036,10 +1036,10 @@
         "model.layers.42.self_attn.v_proj.biases": "model-00019-of-00027.safetensors",
         "model.layers.42.self_attn.v_proj.scales": "model-00019-of-00027.safetensors",
         "model.layers.42.self_attn.v_proj.weight": "model-00019-of-00027.safetensors",
-        "model.layers.43.block_sparse_moe.e_score_correction_bias": "model-00020-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.gate.weight": "model-00019-of-00027.safetensors",
-        "model.layers.43.block_sparse_moe.switch_mlp.down_proj.biases": "model-00020-of-00027.safetensors",
-        "model.layers.43.block_sparse_moe.switch_mlp.down_proj.scales": "model-00020-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.switch_mlp.down_proj.weight": "model-00019-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.switch_mlp.gate_proj.biases": "model-00019-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.switch_mlp.gate_proj.scales": "model-00019-of-00027.safetensors",
@@ -1047,8 +1047,8 @@
         "model.layers.43.block_sparse_moe.switch_mlp.up_proj.biases": "model-00019-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.switch_mlp.up_proj.scales": "model-00019-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.switch_mlp.up_proj.weight": "model-00019-of-00027.safetensors",
-        "model.layers.43.input_layernorm.weight": "model-00020-of-00027.safetensors",
-        "model.layers.43.post_attention_layernorm.weight": "model-00020-of-00027.safetensors",
         "model.layers.43.self_attn.k_norm.weight": "model-00019-of-00027.safetensors",
         "model.layers.43.self_attn.k_proj.biases": "model-00019-of-00027.safetensors",
         "model.layers.43.self_attn.k_proj.scales": "model-00019-of-00027.safetensors",
@@ -1064,7 +1064,7 @@
         "model.layers.43.self_attn.v_proj.scales": "model-00019-of-00027.safetensors",
         "model.layers.43.self_attn.v_proj.weight": "model-00019-of-00027.safetensors",
         "model.layers.44.block_sparse_moe.e_score_correction_bias": "model-00020-of-00027.safetensors",
-        "model.layers.44.block_sparse_moe.gate.weight": "model-00020-of-00027.safetensors",
         "model.layers.44.block_sparse_moe.switch_mlp.down_proj.biases": "model-00020-of-00027.safetensors",
         "model.layers.44.block_sparse_moe.switch_mlp.down_proj.scales": "model-00020-of-00027.safetensors",
         "model.layers.44.block_sparse_moe.switch_mlp.down_proj.weight": "model-00020-of-00027.safetensors",
@@ -1076,20 +1076,20 @@
         "model.layers.44.block_sparse_moe.switch_mlp.up_proj.weight": "model-00020-of-00027.safetensors",
         "model.layers.44.input_layernorm.weight": "model-00020-of-00027.safetensors",
         "model.layers.44.post_attention_layernorm.weight": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.k_norm.weight": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.k_proj.biases": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.k_proj.scales": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.k_proj.weight": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.o_proj.biases": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.o_proj.scales": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.o_proj.weight": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.q_norm.weight": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.q_proj.biases": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.q_proj.scales": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.q_proj.weight": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.v_proj.biases": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.v_proj.scales": "model-00020-of-00027.safetensors",
-        "model.layers.44.self_attn.v_proj.weight": "model-00020-of-00027.safetensors",
         "model.layers.45.block_sparse_moe.e_score_correction_bias": "model-00020-of-00027.safetensors",
         "model.layers.45.block_sparse_moe.gate.weight": "model-00020-of-00027.safetensors",
         "model.layers.45.block_sparse_moe.switch_mlp.down_proj.biases": "model-00020-of-00027.safetensors",

 {
     "metadata": {
+        "total_size": 140503842816,
         "total_parameters": 228689748992
     },
     "weight_map": {
         "model.layers.42.self_attn.v_proj.biases": "model-00019-of-00027.safetensors",
         "model.layers.42.self_attn.v_proj.scales": "model-00019-of-00027.safetensors",
         "model.layers.42.self_attn.v_proj.weight": "model-00019-of-00027.safetensors",
+        "model.layers.43.block_sparse_moe.e_score_correction_bias": "model-00019-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.gate.weight": "model-00019-of-00027.safetensors",
+        "model.layers.43.block_sparse_moe.switch_mlp.down_proj.biases": "model-00019-of-00027.safetensors",
+        "model.layers.43.block_sparse_moe.switch_mlp.down_proj.scales": "model-00019-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.switch_mlp.down_proj.weight": "model-00019-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.switch_mlp.gate_proj.biases": "model-00019-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.switch_mlp.gate_proj.scales": "model-00019-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.switch_mlp.up_proj.biases": "model-00019-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.switch_mlp.up_proj.scales": "model-00019-of-00027.safetensors",
         "model.layers.43.block_sparse_moe.switch_mlp.up_proj.weight": "model-00019-of-00027.safetensors",
+        "model.layers.43.input_layernorm.weight": "model-00019-of-00027.safetensors",
+        "model.layers.43.post_attention_layernorm.weight": "model-00019-of-00027.safetensors",
         "model.layers.43.self_attn.k_norm.weight": "model-00019-of-00027.safetensors",
         "model.layers.43.self_attn.k_proj.biases": "model-00019-of-00027.safetensors",
         "model.layers.43.self_attn.k_proj.scales": "model-00019-of-00027.safetensors",
         "model.layers.43.self_attn.v_proj.scales": "model-00019-of-00027.safetensors",
         "model.layers.43.self_attn.v_proj.weight": "model-00019-of-00027.safetensors",
         "model.layers.44.block_sparse_moe.e_score_correction_bias": "model-00020-of-00027.safetensors",
+        "model.layers.44.block_sparse_moe.gate.weight": "model-00019-of-00027.safetensors",
         "model.layers.44.block_sparse_moe.switch_mlp.down_proj.biases": "model-00020-of-00027.safetensors",
         "model.layers.44.block_sparse_moe.switch_mlp.down_proj.scales": "model-00020-of-00027.safetensors",
         "model.layers.44.block_sparse_moe.switch_mlp.down_proj.weight": "model-00020-of-00027.safetensors",
         "model.layers.44.block_sparse_moe.switch_mlp.up_proj.weight": "model-00020-of-00027.safetensors",
         "model.layers.44.input_layernorm.weight": "model-00020-of-00027.safetensors",
         "model.layers.44.post_attention_layernorm.weight": "model-00020-of-00027.safetensors",
+        "model.layers.44.self_attn.k_norm.weight": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.k_proj.biases": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.k_proj.scales": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.k_proj.weight": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.o_proj.biases": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.o_proj.scales": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.o_proj.weight": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.q_norm.weight": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.q_proj.biases": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.q_proj.scales": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.q_proj.weight": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.v_proj.biases": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.v_proj.scales": "model-00019-of-00027.safetensors",
+        "model.layers.44.self_attn.v_proj.weight": "model-00019-of-00027.safetensors",
         "model.layers.45.block_sparse_moe.e_score_correction_bias": "model-00020-of-00027.safetensors",
         "model.layers.45.block_sparse_moe.gate.weight": "model-00020-of-00027.safetensors",
         "model.layers.45.block_sparse_moe.switch_mlp.down_proj.biases": "model-00020-of-00027.safetensors",