{ "metadata": { "ParamSize": 355, "ParamBytes": 4515962880.0, "BitsPerParam": 4.500193625511424 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 524288000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 256000, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288000, "byteOffset": 0 } ], "md5sum": "5eab270b7c370dbda6264c4b19d5d8b1" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 256000, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "04f1dcd53e7242bfbfa171f980e15450" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9ae923347cf291c5395783d7a276f0d4" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33038336, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 8192 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 29368320 } ], "md5sum": "7189ad3f00ffa3b01afc940600e1b1e1" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.0.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.0.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "194ef1ba06d37ca4f2a9a496362f9058" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d252bba6de05e578aa9636d6c8675781" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bfc45677598f2a38f2ebda41338aa129" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.0.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.0.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.0.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "84bf067f7c53c1ad5393b7ff25c78761" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.1.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.1.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "e97d180ee9ef47c8cf8ef0d6a95f3c89" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "76b46c748287f1d387e977adbc1f3c9f" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bba061a65698bfb2e133e27ccdf97167" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.1.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.1.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.1.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "bb4349612242196d24c2424a37a78836" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.2.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.2.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "3fdc7ac89571602c2074826717995642" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1417c7914fea8dfeb1cdc9ae860b00aa" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3d86316a0d6a11b3295011fbb180ca12" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.2.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.2.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.2.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "a2a30ea335e7ad9b97fc347bf53aea41" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.3.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.3.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "36a0a023ac5576e3fe5c2a9ac8b85fd6" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6f2d280220a9bb4272e4c31e88f9de67" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d8ff5913bfb0f67e5b44f3838c1c11ba" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.3.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.3.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.3.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "efd97f65b063ba8a4fde2e6c0248e9fa" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.4.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.4.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "a8f4b140d2c7046ef77e8730f229b837" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "393231b253082d7f5814edc5bbef7765" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2778d3088abfd8aa22da14428352f3e3" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.4.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.4.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.4.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "3b18e41d4f5c4b13609c2b439d007832" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.5.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.5.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "382add8e5f0dce52af713adaf2f99d4a" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "43e8730c31b6dfc9a4adae074a6f52c4" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 30932992, "records": [ { "name": "model.layers.5.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.5.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.5.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.6.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27262976 } ], "md5sum": "b3dd0a6547e3e34d0722042c1935cd5d" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7731be01c7fde020c67434a303dd9e40" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6fa9ae02f180311ed9ddfb08f9753d31" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4e39be084b43d705030ab39199ed1cd1" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.6.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 }, { "name": "model.layers.10.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "9b9593584fc1fb9f58b12dd68b854578" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0432498bf339cf63fcbb512529b04568" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "20867305455256607534cac048ae3640" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.10.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.10.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.10.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "9b9e70b483078d0e0f474874077b6b5b" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.11.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.11.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "484e077735aab492214daad6cedcf0cc" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a4110ed30c9563819f639e113d9a6a27" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "99576f36b82a356df61dc4ae12934c84" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.11.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.11.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.11.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "8c554f9625509fea2b92ff997b40be29" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.12.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.12.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "2abbf38a5de84bd8614fc3a7c0a16f09" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5b3d7d782fb5fb5e0dfe6a754c5ff268" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1ea103bb1273957ebfb64309e75aa5e0" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.12.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.12.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.12.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "8c4aac198b853a2c38c6757b98a01503" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.13.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.13.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "998f67849bea9af00d8cbfae5791a7d5" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "782fa0ac5d16861b7846094ab736aa86" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7ecc5c9502268fdf853bd9ed42782df9" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.13.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.13.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.13.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "cf85f68ae9d1961c3b433a8b44747e34" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.14.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.14.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "8b8cd85a21701e64b5c7cbc15953323e" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1ef42cbfcc3cff2b32676ab29e7858d8" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dc54040bf4642e69c9f2066b4b913199" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.14.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.14.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.14.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "88feab30fdef750f380ca3bab492c169" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.15.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.15.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "a3cd9360098d8a85e7cc34f64b31c20e" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "946386deb976eff3e2062330399dc1d9" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "411bd9ec57668a10b922c567076677de" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.15.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.15.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.15.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "c3a7e5a5ba31aef5f649c4c9af317eb5" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.16.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.16.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "b9279ef3455f45982e68572d8b2a288c" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1f13c7d5571d1a5c872fa00771224b4f" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cd12621c09b96671f1c5ff08f82afe9b" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 30932992, "records": [ { "name": "model.layers.16.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.16.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.16.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.17.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27262976 } ], "md5sum": "c856bd745ac32df1fa523c9246f38bed" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c6bbefac6f74bd88671be722fb5eff08" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ae6329c687559987f2676376500d84f0" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.17.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.17.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.17.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "1e7c57d7d054f711842aaad49194f4ec" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 33038336, "records": [ { "name": "model.layers.6.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3678208 } ], "md5sum": "371096349d552d188955b48fcbb49c17" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "9b58b05a86d5ce8bfbb75b6c4c1d7a52" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.7.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.7.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "d89ede6cdffbb6953a201f8f51c624d5" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bd9959dfff9eea0dc32ccf4f31f4ac5c" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "96dfcb9c4cf852e21672151d7351f449" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.7.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.7.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.7.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "c7c2080f48fa09ea7e0b78f12b00059a" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.8.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.8.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "caa7476d9507c42114cfaff0995b9ac0" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2520e28f5b73fb21577f449b419c3492" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "340ce58803258822e074e1daa6749489" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.8.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.8.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.8.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "3fff950e907abe8800b034e6dc82994b" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.9.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.9.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "7f88c06610d1fdf2213ab1931cef8a1a" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "520286ff959ea3b32f4b1a058b5c2692" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "258f0b2e6f05f00f5b5bf6875440de98" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.9.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.9.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.9.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "23b1f9dcbaafb7565741fd618a696cfb" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "8ca7ef240177340628ef911fc927be7a" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.18.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.18.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "d622d96846f3029f1df2250a73a62b21" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2d76ec32b7530aee65508b90d90e7857" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cece555a241b19f0616861af74bb6822" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.18.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.18.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.18.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "b76b89a390c85b397d9ecc7dc65a39da" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.19.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.19.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "5feb20c704db1d230b27f14570e90808" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e2b522a63aa1f55eceb247e678128512" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cd196a805f36ef12332b92d742ed2761" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.19.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.19.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.19.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "f397781e28131d28c58c4502476d9445" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.20.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.20.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "caabbb8889a2cc48dd2f27a1e0750f91" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6fe62c8aa70603b86f2034a50ae46f07" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "55fcd54739a55d8e794573dc36d63b95" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.20.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.20.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.20.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "3e0339a18066415d9571128d218ba977" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.21.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.21.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "954d3fac358ad2cc7e8ec02608fa555a" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "629198d03045262a387f78cd85d77242" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fb29f826b593060c24096ad0541e1fb9" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.21.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.21.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.21.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "ab2d9bef3018bcd1ad9880fb58394d96" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.22.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.22.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "d4b49c1954103d3a1b3015938542e7c0" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a02e4bdc7a71f69f0899e7910dc70ef5" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "acbb9d08c1d75e68ddbee7deaeddcfe8" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.22.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.22.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.22.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "9a591c57974e32c1a02b4dd9871c958a" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.23.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.23.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "7df3c8a60a2ab5464182c7b07754888f" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dc6c9cf3e40533e0e41e138f9024d525" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cb3be97a250aa97e9977fd651fe4bb1b" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.23.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.23.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.23.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "77ff655030025080dfed83fde35e124d" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.24.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.24.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "622640214533af601c44c163ad8ed64e" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b55472ce470abdedd7b5ae55ec23e536" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8f857d69bb83b04e68791e485c28ca44" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.24.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.24.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.24.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "a40c49025c850c55de0a382e9120a010" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.25.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.25.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "92dc3882e2f2817ac3ca8a7791ebe4f6" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0f08d4f1d7f01ade510a4b9eaa4f20eb" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "db1ab9d3f1fa714fefa51ae7af8fb1c1" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.25.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.25.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.25.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "7103262c66e8b9e5553e49de88a31cde" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.26.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.26.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "33033f108c2c3db6899714f3833a3837" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f057e9e5bae6d3ccfa4e9fffd69a2528" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fb8769b0266683af9e197932cd0f444f" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.26.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.26.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.26.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "e877b54738e17f0ae517407e761c2865" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.27.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.27.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "cd4aa9bf2f82d228592a073323c93f01" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b03375ec26f32649166a008c98ead00e" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "10ed900e7d1db4d895c0a6e4783fd021" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.27.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.27.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.27.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "85138e357fcc4cea31c02d7146f6fc1d" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.28.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.28.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "128448bdfdf580d92cc403323875b9f8" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 27262976, "records": [ { "name": "model.layers.28.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.28.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.28.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 } ], "md5sum": "7a14a957e0c60372dd79b6ea804bdf61" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3bd23928f9d32a8af8bf36c18e1d4be9" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cdcd19df2fd768bce74b8e2291622b33" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2b8c55376406b9c961d18930dd5de889" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.29.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.29.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 }, { "name": "model.layers.29.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "c107852313c026e3789f78ad15de6fa6" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 33038336, "records": [ { "name": "model.layers.29.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3678208 } ], "md5sum": "87021edacdb055f39366c6f0542c2d90" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "5dc7cce6f967fb88585adcd35e70c340" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.30.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.30.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "f740c71106891b6460775ab33e0962c8" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e1cdabda8dc6fbd5a1aab37023e549b3" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.gate_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f16a84a58cc5bd287ce387c6494a99f5" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.30.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.30.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.30.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27271168 } ], "md5sum": "fff30f4c279a52fb4c5877810b6ac80d" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.31.mlp.gate_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.31.mlp.up_proj.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3670016 } ], "md5sum": "7456700e755c31bc995de88944efcc6d" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.31.mlp.up_proj.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3670016 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 16252928 }, { "name": "model.layers.31.self_attn.out_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17825792 }, { "name": "model.layers.31.self_attn.out_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26214400 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27262976 } ], "md5sum": "2881d86656c20f515e3b724234a9000c" } ] }