gemma-2b-it-q0f16-MLC / ndarray-cache.json
CharlieFRuan's picture
Initial commit
5314108 verified
{
"metadata": {
"ParamSize": 110,
"ParamBytes": 5012344832.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1048576000,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
256000,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576000,
"byteOffset": 0
}
],
"md5sum": "206485d87a22f62128d5b2494bafe7db"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "0af16f8f82b18456b292f26c2e72f63b"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "2ba31ef002c5e642855d91dbaf959171"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "c7d1d36c62d66bea4696e965fff1917e"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "362ff38d501c9881a63569d0cfa88605"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 4096
},
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8192
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10493952
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18882560
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18886656
},
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18890752
}
],
"md5sum": "5d30e8d1452ab145a0564794c26cd791"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "fe63b3dbe2543296f8112fac2ade5290"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "629331e7d43d063ae03d036060febb5f"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "cd3f99f63fc39b1170222cb6f4717b72"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "c9193e83b6a383f990b1d7d703a6f80f"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8396800
},
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18882560
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27271168
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27275264
}
],
"md5sum": "59d0b4f1bd55ceeda6e5ab96633e9451"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "820b534e89d40aa248659ca0081f0674"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "256dabcc684ebacdfdbbcfa0adc035e1"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "7d790e2cfa6142474ac62a0ec5aea10c"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "908e1855eb654380dfcd3cfd85a1e631"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "ea87b38d7fde1e77b55f18aa11c0ed31"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "f795202d6edb9f663561fa0cf03fa27a"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "88378b8b2905d8b91937c001c73403cb"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8396800
},
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18882560
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27271168
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27275264
}
],
"md5sum": "071c71fc33170a2d3a1a2ae1f03b140d"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "b58913c0a9d042f1469b2c4929efb8d3"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "7f3594a3e54595cf7f92b5901e0f6465"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "b7c100b48dd50547c5c74f03a68863aa"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "497e575ba76a1ae6e33393fd7c9c4c12"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "7e13a326187148b0ea54878998af1dbe"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "3e8d823c128094965bc0b63996bd30e0"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.16.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8396800
},
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18882560
}
],
"md5sum": "c51345fdd445c9c55206b86ee31d704a"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "708872d176d1deddd88435c817e93d2f"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "0eef3929b28890b79faf9d02dfa2487f"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "6328df0ffee1d1160e0fc6fa300f4c9a"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "30f748096a7ec18a6e349c49a7d14822"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "a15a4fe50c6c5b78f36ec6351732a345"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "5b697bbb99453187d0eee6b6127d2008"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "76327282d65a9c80614e17129cb9e5f2"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8396800
},
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18882560
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27271168
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27275264
}
],
"md5sum": "1f73ecde85ffc44fa98fba7534c55356"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "c983dfde03cb18ee85a7d80d50f4d165"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "afb45d7056b90f804e2c75489be5792e"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "a8c9cd2cc4d6179ffa0d181051ef4f7e"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "89e90749de11be2bc1ee29c5450a4f3d"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "bde36e490b2c3a905f40e1285f4873dd"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "eefc32a2ccf311c1a95e01ca979aaa9f"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "c31b3371a8c2294135e993653a25d153"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8396800
},
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18882560
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27271168
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27275264
}
],
"md5sum": "a9a738d8b541d5d1517049e641a72c6f"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "b4d55621d295f8693b39a501ea581847"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "1031e7308cb30a67747b65b4fdaa94af"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "f74050232fb038c2c717ca99af7cdada"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "2639e2fc57901dac06e9d4183ce2ee34"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 134217728,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
32768,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 134217728,
"byteOffset": 0
}
],
"md5sum": "cdcce0c795b9df58125fb24aa22f62e8"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
2048,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "135969d29acd56ae5e9ac553cb476528"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 27283456,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8396800
},
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18882560
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27271168
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27275264
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27279360
}
],
"md5sum": "7bca8fca53ef5ba8a977e4733d66824a"
}
]
}