{ "metadata": { "ParamSize": 325, "ParamBytes": 4140998656.0, "BitsPerParam": 4.070120983102826 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262668288, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262668288, "byteOffset": 0 } ], "md5sum": "9e9e2b56db00817b08c2ef9fd2763c3e" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9fd4d7af17e1a9e31f86aac1b9f817c8" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4a93084db5b92e73c80365f04ac8df70" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 262668288, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262668288, "byteOffset": 0 } ], "md5sum": "bc148f252d3475e5b5a57c68fb1bf74a" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a9f4b022c1957559660f89cfcd54bb1c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2f22c71478eadc79535c36096982c8c8" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 21962752, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8208384, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8208384 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 8216576 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 9134080 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10969088 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10977280 }, { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8208384, "byteOffset": 10985472 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19193856 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 19202048 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 20119552 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21954560 } ], "md5sum": "723489083c966c8c1ee1ce344a152812" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "51cabac1d4aa63924750b6014c3f6093" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "44aeb8397954f7497009879349433f1c" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "cbd6b1e521bb4d85022a2b75364250ba" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0dc6817b6f31dcdef3253a164215789f" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "55b589c4861b4180ad5ffa41cd3ef610" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "b2cd838b9fdefa83a69f3988941ecaad" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "989c236147ffa628a74e5810291ef7a2" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "15ac49629bb7f7fdac7ae11ed3ea0a32" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "237ad93162c40fb5d7a5d7e5835efc70" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5f4e675864bd1ad058dc0c3716967d26" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "08ec259d6ce1c799c4ef654e18aa2495" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "0de024f3aeca85027a30a24eb3537294" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f776be88400c140633197323e877221e" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4695585276b4f3c5fb15b5259ff337ab" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "e44f8875388d096cdd3bd6a79382d005" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "44d1ae9ae388a59e4ffd6e2cb79dd7cf" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "693d6670541d0b2dd817aad7e6a9878c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "dbad84ea33fe02f5a97febca45244626" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b61cd2279f84362fd415a3358f7b45f5" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9a8608fc7faa98bdd1587b34f9819377" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "bef091d5981d6b08967579a372f5a81e" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1354e506cff2c9a0c79061dde137c044" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c379dfc3f454925ac1c5a9d9c8adeb8a" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 24379392, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21626880 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22544384 } ], "md5sum": "d67c13120522bd6b592c82e827a32f92" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4c14cfae4718365f8f36100ca60c0fcb" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f70186a395d9aa6249502117b658614f" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "33a664877e87771cf993a3e614d7ea0d" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f42fcb10425a7fd843812cf83ce9f2b7" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "051743383325737cad59c42cf80efe47" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "b738a625fad07f5d190543dd4c47217b" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7fb3e73b437098a34593e990fba10fb8" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "582c16c30be8b3ff9ca502bb9c2e1d65" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "8a17b68036b92430f689bccf9662c214" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "543e87f6acbf842d60a0d95a49ffca6e" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b59a3a9b908680feef91966e0a5723ea" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "db7b8b51d5a6899b8344dfedff96e07c" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a1f495b759dcfa1cb61a7ef426f48613" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3eee4b1b7f4ed7f095071a847c86dbc6" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "52bbf20099d84eb3cd6461704715dba2" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "81010b85f8f451d4c82959b1f8e3b4f0" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c74b96ff36aa4c77f0cc02b975cce273" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "a875b381499b24eec2ea987ca185147d" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "45f96c1dcde8d6c74ef0205d6db5fc67" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a8e345d6ce40567dfb0e73a4cbb3a676" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "8f07e1162b6168d89c600f82ebe2a711" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d6c82d8d88a1656b285d62e69cf1ad70" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f56e4bddf1d32893a84ed0876788c15f" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "1b31badefa07b73c7d8101f762350a4f" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "818eef07efe06f3ba0da10b7b5ebdf85" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "e670294356b1e8abdb2760868961a921" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "58ca663ad85376aafe08e062a11f12c1" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d0299daee5af0159fceea76bdec316da" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "0945bce503b7623baee25756ae864242" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "f234a20637d869a23373761ab4ebdb6e" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5d625e72bf7f816cacbdc61d778ac32a" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a27e363fe44b386510f6b01d09786a27" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 24412160, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21635072 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21643264 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21651456 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22568960 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24403968 } ], "md5sum": "788a033e90be2cddbd2cf583aef1ead6" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b5cf829ebacbf318cfc66504da62f4bc" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fef871f090fea01f0d87898702af72a0" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "0b801a1fbbe1088b36f70c64ea9a6b65" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "554c0359f11a33ce015fb79bd02d9c43" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "62f762331b335225f1168c4888e433ee" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "31e98699cdcd413e0315dddc389ea24f" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d78498e4023f23da08dde042005d5594" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c88c67c87e2b078fbb7703f50b96e594" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "6643031708fdf3e25adb28bf05fc9c21" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "85f8d9e9fa40054f1598efa75942764e" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fd431ac08bcc179250e47c7fafae7754" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "75c6d6243a3c088b6ca249038149055c" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bf418b4bd8772b9c798b04e8656adab5" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "60c422cf0756433d917851893a6a26bf" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "42ffd36fdf7e00957fc065b42e952999" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d846658e1bd424998b38c7f2935e1525" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "70d23d1bc3dc4588eb2a99697df96a6b" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "713bf10e7b89e6587789717564f0965a" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0a026952ed20fce72107d5f992d489c3" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "81bd2214861f623f194e84c77f018e04" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "15c3a6fcd1e505a1468c18bf5ae14a56" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c03c91e3a55f364153fa638422b8ff4e" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "33fd788dc0865488b60e8ce0799377b8" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "de76cf3cabacc76423306e0444b0653f" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8230a40018274415bc68b554cb830e6e" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "58f8f78d02880bb0839ade841a0b6971" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "ac46936006d0cb2e64a322ecdcccdd10" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7895a183d2570ccc8c87f1e4a8a80682" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "536429158d2bad488aa6af1e190ddd35" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "ba830a94badc4c05c0ebabe124988215" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ed05d4143a576f695f512da1f8c3adc2" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "002ebe7c8b2dfbaffdbfb53d55e67faa" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "4c584e719d7b1fc99b2db53867b05003" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 21626880, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 } ], "md5sum": "c171039446345bb9e687553004a3ad30" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 21626880, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 } ], "md5sum": "55442e9c233ba784ab1319b1070befdf" } ] }