{ "metadata": { "ParamSize": 170, "ParamBytes": 17075361792.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1572864000, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 256000, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864000, "byteOffset": 0 } ], "md5sum": "c60082db0e4d8f35e7289fd7de2d6953" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "0f4e2e963889085e8afe6bc6c49f5572" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "14fb73c20a944f08d1ca3fbc249a4dcb" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "86a00af176de654eddef55171a294f02" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "f95cbe4ce39a600c2e2ea3b4a79d3506" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "58c5b02dbdfb6f4ad7dc07a987487d81" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f824e83e1c09800fed5d9e501cbef9ed" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6b4b1df277cfbc3b70d620be9b0fd82f" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "aa05d7ae7d4eb220f37747d841c39f41" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "a04a7a8dfba3225e5c9e4d08f5d991c8" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "69642bf15d15e8e3d0a50df2c87b3c79" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1bd0562941d5344f2f2981de3eda76b5" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "5241c2d095d69394ec4ea68ba6f23f9e" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "1b1b08abf4a0d04a65bf68736d8673f7" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6510c5f58389931a2d5e1943a4f6aa2e" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4ba676881606031a5ee0b74e33fb88de" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "40d0b41abee65ea73e3a4c82a6fa65c3" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "f3187d1dc182a8f95ceab7df293cb152" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0a1d0e2b8358cd2b4856c73c250dd3a3" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "aa2041a5903e8b1079fa61a8cd603429" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "09e715068b771738ffa53ace091b7f0b" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "4054e3e3ba3296ba506ab84288192da9" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "194504b7b8ab52be14b2581a772ba2d1" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "19d6c58020599c295a7c72322240d15f" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "02f69e4867691d3b47d95927a56854ab" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6ae680ce108a1e6d457056c82ae18bcb" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "547cbecf7fd4ee5e09053084470211c8" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "78a402b92e7a0b524531fc7229cfe3eb" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bbe991c38a3544a1a32286d300efdf3d" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "eff50d4dc40d27ff7f0b45d9361ee8fa" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "1e65975a0acf68aa3dead36688fae7c6" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "66d5d789c9a68fc43f0432dee2025af1" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "21b8e6c6330437b35c12bac84fc5aa55" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "297b687fc59ee54860e1e086a189d049" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "64c4395080e11b49919f611fac8f5bd0" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "2168bb7b517618736468495b64de6492" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7601fdd9f5c4172ad568a88b92780d4f" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6dced480e397d0e4b0f4e229e59168b3" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "b7a2901c6cb2f4e1a84b2559bee2c205" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "668d44bcf32609f674422885adbb2568" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9f7f06099c20e9561b85bbbd71083a9d" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "36e8659919fcc7c0e9cd15bf7ee20638" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "69272f810a63454b7e493ca8db2e4e01" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "97ec6c6098706829215b9775c002b32b" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1b79bfda3ec65213084ac12ceee29f33" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b807f3ab603cd37a846676d193f9d277" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e5dd0bf094a56e3d175d2fdf163b8b7d" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b5bcc5621ca7a57eb12c483feeb43307" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "8217f638eeb1e46fe3868fd8e8bf100e" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "dba591e829aac359033b6e918382810e" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "dbc527f7fa51b6884d6653380f293d2a" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "f1d1a0bb4f98bd51126c57fe77684dce" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f58da53e30d94e45a0a369e0a1240ee2" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1e27f4032aabc28bbe3cd547f8e186d7" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "408fc336ff7f18a7c83fb7fa603a4998" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "538df5b59596e2902efe2bc0af1568b6" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e814405b553d7ec957cad8f3d616ebc7" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3f81f8bc3e1646d20574a7bf8d96b0d5" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "e1a1953eb2ccac5af2d7590e6d2b5ec5" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "ce270223bc558db0a5d27af2116fba53" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "59e777a792b4913307bfda4cf4075a71" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c7a7d717f3b9f832dcaef9749a97c74f" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "385607fea47c6f19a9fd39de55901040" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "c7c9818761d650fea693ab80b98a9d1d" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "48ea87d4d016f945999069724103aac9" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "0ff42527fb75bd5513d8c18ab7c59d98" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2d87584e9446fb1ebf02c1f1709f3ab1" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a952eddcef19e87acba07a874b8f37e7" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "12fda1b30881783dbe2e19b782ad94fa" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "2b7afa8bf48f008e0ebed5abcd2f15ea" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "260f73d27ed48ba8a642b899eec416bc" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9d65ef1f2796ef68e935d6014ca5ba1a" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "3171884795345165c875daa55a17fe94" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "2cfb567bc6879de46040a7f8680d8066" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7c1f33a25629a88883fd6c1526fda4a8" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dcd7f2a5e7045ff5cf5f581832219172" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "fbaa14547db7f71b1885972de54cf52a" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "b73203d60d4898d6f198982b94ef17f3" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "206abc6ecd1f274098f872f4881e3cca" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4428c7ab551722a23e8d07042b9f88d1" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "00c3310f6c68029e3681e6e29acea1f3" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "6e8f193ec13343dd3282e3c72cf16498" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "84e566f57f2c1c687fe22e5a709c75d4" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0b4e188c7aab9c318bc26bc00fa6eaa7" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "6b956e65718e312b8f64d65480a7eb73" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "aa1eba42e21c4a3a209ed123807a5cc5" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9c593249fc8574d8d11888f36a4ebc8f" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9a9767c0741c14fb34c278847729ca41" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "fefa508ad96e075de33755d3e77c0b02" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "272780eda760a1b632ee0898bd822bed" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0c1787fdf9ada9a3cbc2615a3557dc6e" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0549b7e823aae895993f740fb5f92d1f" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "947ff6f419800cdad216c95bb6eae64b" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "c03fc0850c2564e0c0b7c2420601f1e9" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a6ff8555eede8310413c7f0edc374b2f" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3f9577771286399bee6784def87c1a18" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cf1fbf37bec4183e93eef194cc87cc26" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "31edc9bcc3145aece6c8ee2d7b0583d7" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "a90e87939fe6ca58ddf0de8ebd52c62e" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "c0b0e83fa9c6d85c2b92efc97bb9dcd6" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "74f19307deb63ec97ff3aa8f5e1ac00a" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "e9d88cc3c3265e7b073f039e020e3fba" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c3832849eba8cbf73f87dead864a76d0" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a51bb7fd741c29ef91dd4fadc787e03d" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "2ea4aeb190ee39979e1099d1856a93c9" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "23e6ae0d6b3145458e7285adee8c25e3" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cd7a693ca49d3883fb57d5064f5aaa5d" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4e8b4c437c8a8c1893756bfe27dd50f8" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "b592e53fa6d6c73c73924b4ba2944b56" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "c6d781f9c489a4bab30b084193c37b1c" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "984e89674ec1c07fc87a9a8b3a3e72bb" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ad1ef7e668a5d8ded355ef2621df9998" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25516032, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 6144 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 12288 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25178112 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25184256 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25190400 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25196544 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25202688 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25208832 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25214976 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25221120 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25227264 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25233408 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25239552 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25245696 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25251840 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25257984 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25264128 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25270272 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25276416 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25282560 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25288704 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25294848 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25300992 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25307136 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25313280 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25319424 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25325568 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25331712 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25337856 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25344000 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25350144 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25356288 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25362432 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25368576 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25374720 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25380864 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25387008 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25393152 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25399296 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25405440 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25411584 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25417728 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25423872 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25430016 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25436160 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25442304 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25448448 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25454592 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25460736 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25466880 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25473024 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25479168 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25485312 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25491456 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25497600 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25503744 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25509888 } ], "md5sum": "e8b4f06390ce04a29734c710dfe87aa4" } ] }