{ "metadata": { "ParamSize": 325, "ParamBytes": 11118735360.0, "BitsPerParam": 32.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "lm_head.linear.weight", "shape": [ 51200, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "72c3fae3b776fc40e15f87604b845ed5" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.30.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "9ff99441a514882fb2ee64e16beb76fd" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.30.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "bb2aec140fac21c669528366591e3a77" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.30.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "48717e462ab585b46fb895b627126bb8" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.31.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "eb3db13732d5c817dcafb8191d43df3f" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.31.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f65ce63a5d7845311db1673d2fda37ba" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.31.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b54fc4f41a60a0b845e31c4e64666b8c" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "transformer.embd.weight", "shape": [ 51200, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "fe07943feda29e33311211330c358d79" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.0.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "1b700f8db3506f0cf91f415001848b2e" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 26460160, "records": [ { "name": "lm_head.linear.bias", "shape": [ 51200 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 0 }, { "name": "lm_head.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 102400 }, { "name": "lm_head.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 107520 }, { "name": "transformer.h.30.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 112640 }, { "name": "transformer.h.30.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 128000 }, { "name": "transformer.h.30.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 133120 }, { "name": "transformer.h.30.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13240320 }, { "name": "transformer.h.30.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13260800 }, { "name": "transformer.h.31.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13265920 }, { "name": "transformer.h.31.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13271040 }, { "name": "transformer.h.31.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13276160 }, { "name": "transformer.h.31.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13291520 }, { "name": "transformer.h.31.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13296640 }, { "name": "transformer.h.31.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26403840 }, { "name": "transformer.h.31.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26424320 }, { "name": "transformer.h.0.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26429440 }, { "name": "transformer.h.0.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26434560 }, { "name": "transformer.h.0.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26439680 }, { "name": "transformer.h.0.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26455040 } ], "md5sum": "8e8bafe6dbfb85948d1e68f7c3d331c9" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.0.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6a8666b57943ae5779046eabcedd4368" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.0.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ea4bd6378ac24fb06732f934f2784c6b" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.1.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "3962256eaa8bc206e895285d1a7f0357" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.1.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c47d34f9b6060eb7e784fc82805257d9" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.1.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "803a0d20c3434028bf50e092ae2cae55" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.10.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b919c749bebf96f18855c8bd5ad234a0" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.0.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.0.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.0.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.1.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.1.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.1.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.1.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.1.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.1.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.1.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.10.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.10.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.10.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.10.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "998a1ceb620ee309630d73822543a765" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.10.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "9bee5a1c846f66c76c3b5bdd291350cd" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.10.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ea8f51008ef282313341d459d6164340" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.11.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "814c34ea78a434728cf836b7e777ed06" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.11.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "233903939a554b11fb26250d67d8002f" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.11.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "65a81f6fccd0d05aae50650dab2c04de" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.12.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "035f036dc34d5b95f6503d581def7920" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.10.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.10.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.10.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.11.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.11.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.11.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.11.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.11.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.11.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.11.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.12.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.12.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.12.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.12.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "6314df114c8bc449accbdfb66c7f3dac" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.12.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1822c71b27b43fdb3433c133e4ed28e2" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.12.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "eab022f5ced01b2c774aeed32b59e635" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.13.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ff5209be3b95203add8c4b8df46a92fb" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.13.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c3801b278ff0b1288f0a20a5d1810d6e" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.13.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1b88e21871c66345d3e541eb3e5d05c3" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.14.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "52f7eca32be0c3468e3fe0114d7df31a" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.12.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.12.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.12.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.13.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.13.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.13.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.13.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.13.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.13.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.13.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.14.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.14.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.14.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.14.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "aebd2a0198dbcdd7d7c690c39c6417f5" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.14.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f87815773490d1b0c222fe60f96b72ba" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.14.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d665e86003133ad68300f3fe7a4e9359" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.15.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "a318393d22bd178cdd36f8b45e4d5d89" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.15.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6adc811a3327a20b25af95f6a113cbdf" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.15.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "bc84968f0ab5a21a82b80ae697913dad" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.16.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "824712c1c6bb19d150e84511c2e2d53a" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.14.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.14.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.14.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.15.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.15.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.15.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.15.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.15.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.15.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.15.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.16.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.16.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.16.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.16.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "af4808c0e8f5da2d991b74bf4304230b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.16.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "57491d5adcbf6a91983d0421dc1c1d36" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.16.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "00e1c665760e8f38cd17314b47e43137" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.17.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ff3a152d0e459e7629d27588e0cde9b9" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.17.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "81ebbb27207ef07b4b0f756cbd7fcabd" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.17.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "2d102050e8791eca1d1bab41b16e4a1a" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.18.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "f83994e9580ca397ed13d03331b73bbc" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.16.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.16.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.16.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.17.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.17.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.17.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.17.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.17.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.17.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.17.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.18.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.18.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.18.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.18.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "afd0ec443cec3c92e3a46481c21384b6" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.18.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6b24d892bfedde65df5884e4656acf08" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.18.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "706cfd506d0e95c272b3233e23c861f2" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.19.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "de4134eaf8218601df14041bc536588c" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.19.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f674b443d88b0d70f7e23d83896104fb" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.19.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e18e5ddc3111b615eb79678b804239d7" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.2.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "88e9221b1a673be52a66153940e94261" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.18.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.18.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.18.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.19.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.19.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.19.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.19.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.19.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.19.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.19.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.2.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.2.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.2.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.2.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "b659f97e184c45e3c376539c08cd5d5d" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.2.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "0aa95aa9e04199d30b24a88910670ebc" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.2.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ab86c0b1ba827b6da96b5b605af15947" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.20.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "995e576862a39e3bbf258cc488167381" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.20.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "0ea061ae65b0c11b3a743c6c8c2b63a6" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.20.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "31c6fb1210cfb3106406da2833a68727" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.21.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "3aee299dfdfef47756a71140cae47be0" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.2.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.2.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.2.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.20.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.20.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.20.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.20.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.20.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.20.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.20.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.21.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.21.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.21.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.21.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "1e808418fd138b4ab390a5b2da37a4fe" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.21.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "350583ac7cc80d6f669cf5689b5b6716" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.21.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "70e329b5ca5914090897fd800d762a29" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.22.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "5e7dc4cf644b2c42b01bbca70e3767ca" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.22.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "fb4f8b740031332b3ac7f72d90db7a73" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.22.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "36a3a5d282c032133d473bfea5d955de" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.23.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b48a6bfd7d2c80b0c2091b2212a80f10" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.21.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.21.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.21.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.22.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.22.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.22.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.22.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.22.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.22.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.22.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.23.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.23.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.23.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.23.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "feb77afec50b1cb66fe867d8e434901f" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.23.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "392142ef1248d4ff39989e35075af68e" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.23.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "3df27b8259c7aa05cf44ab1d004959ac" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.24.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b1569bdb6d5b6e63674ee1f42c15edb4" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.24.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ea1aafc3c9ad6134a16a4d997ccdc42e" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.24.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "eb68ff024350661b1c0788c7d08852d0" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.25.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "68dda454e4a5265d702068efbbfdb6de" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.23.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.23.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.23.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.24.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.24.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.24.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.24.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.24.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.24.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.24.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.25.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.25.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.25.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.25.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "95525045e136adfb1a0db31d65cb9881" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.25.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "db9abecb308be257bbf0f70a09a638e7" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.25.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "34a328e8c8967a7658e9a2833365f604" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.26.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b23cd8c7f775c5918ed1b4da826a0761" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.26.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "148e47c82df602179ec0844d238e2f42" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.26.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "3be4b6fe89a31fc195f450ad11c1d905" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.27.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "df402524bc4dba4ffaf4219260b4eb1c" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.25.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.25.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.25.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.26.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.26.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.26.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.26.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.26.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.26.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.26.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.27.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.27.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.27.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.27.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "db13c510c48e58a3aff2e22aae795c71" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.27.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "dd066911b2241800f752c49dde7bac70" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.27.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b148d9385e662f51faeb50dfed1d5a1d" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.28.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "1ccac697f9ea875287e4753982083165" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.28.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6841cf1fbc44d3f23524d81789bf0e0e" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.28.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "30054b1f0c7388ed43046f8566997650" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.29.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "94de7add2d1ab20228a0e4f1d77377c4" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.27.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.27.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.27.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.28.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.28.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.28.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.28.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.28.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.28.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.28.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.29.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.29.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.29.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.29.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "2c0062a6a6d52acdec6f11cb3aaaf6de" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.29.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "909574a4aa60cc828641c4207a561a9c" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.29.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7a1eaa3ed4ab7762ec93c3599369ecc3" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.3.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "7907a057c0031210977103b43c5dc9fe" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.3.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ad3e6aa9a9df0922c528e545015ba6db" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.3.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "fc4dc9bea6e56cdce260ad5719a20657" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.4.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "869fb1b2e48b1444237322c6012bf03c" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 26337280, "records": [ { "name": "transformer.h.29.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.29.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.29.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.3.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.3.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.3.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.3.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.3.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.3.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.3.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.30.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.30.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.4.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26306560 }, { "name": "transformer.h.4.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26311680 }, { "name": "transformer.h.4.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26316800 }, { "name": "transformer.h.4.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26332160 } ], "md5sum": "03f59c7d9b4ef5d5feeccb2cd41d08bc" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.4.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f5569ed6d398262e492beadeb1f7b9b7" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.4.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "73b3a979a958afd933d0257ecdbb5935" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.5.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b87e2b625fcf2ae94f95ef870e8deab7" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.5.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ff505633fd3f2e1d1c0784bab1476b5f" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.5.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "44686d72e9941c34f7b7e7b98e9a0096" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.6.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "5775e3923bede0dfa01081a5faf1f27e" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.4.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.4.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.4.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.5.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.5.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.5.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.5.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.5.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.5.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.5.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.6.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.6.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.6.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.6.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "364f57005b883b31668851a839a69780" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.6.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "429a757562a09daadf99baf1c1c433f6" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.6.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "9f0613b8605703260ce69ae291efd260" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.7.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "4897a4e1f64b21a4b3573ad6d8ffa080" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.7.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "5482001f3ba16cf3f736784d074d9f93" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.7.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "8fe6c6017e4b9a75159fbfb7ed1afd01" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.8.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "1d8bc3d9eca68f29aaaf1d511abf040e" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 26327040, "records": [ { "name": "transformer.h.6.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.6.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.6.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.7.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.7.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.7.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.7.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.7.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.7.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.7.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 }, { "name": "transformer.h.8.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26296320 }, { "name": "transformer.h.8.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26301440 }, { "name": "transformer.h.8.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26306560 }, { "name": "transformer.h.8.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26321920 } ], "md5sum": "22aff4897fca73d561b2e9f3995d1a3a" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.8.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "0bcb409a8ff6662bc4fa703c54af7dd1" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.8.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c4ea44fa99757549dd916c715c3d3d1c" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "transformer.h.9.mixer.Wqkv.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "76fdaae02abc63a48f01a4e3618b6d4b" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.9.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e2905c5ff18586e159ad2fba087eaae2" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.9.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ace0640841a3d6231a4bd28bb00d38ea" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 26296320, "records": [ { "name": "transformer.h.8.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "transformer.h.8.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13107200 }, { "name": "transformer.h.8.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "transformer.h.9.ln.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "transformer.h.9.ln.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "transformer.h.9.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13143040 }, { "name": "transformer.h.9.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13158400 }, { "name": "transformer.h.9.mixer.out_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13163520 }, { "name": "transformer.h.9.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26270720 }, { "name": "transformer.h.9.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26291200 } ], "md5sum": "be93d743931f4b3ca5e326c516935367" } ] }