diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4301 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 11118735360.0, + "BitsPerParam": 32.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "lm_head.linear.weight", + "shape": [ + 51200, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "72c3fae3b776fc40e15f87604b845ed5" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.30.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "9ff99441a514882fb2ee64e16beb76fd" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.30.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "bb2aec140fac21c669528366591e3a77" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.30.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "48717e462ab585b46fb895b627126bb8" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.31.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "eb3db13732d5c817dcafb8191d43df3f" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.31.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "f65ce63a5d7845311db1673d2fda37ba" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.31.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "b54fc4f41a60a0b845e31c4e64666b8c" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "transformer.embd.weight", + "shape": [ + 51200, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "fe07943feda29e33311211330c358d79" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.0.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1b700f8db3506f0cf91f415001848b2e" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 26460160, + "records": [ + { + "name": "lm_head.linear.bias", + "shape": [ + 51200 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 102400, + "byteOffset": 0 + }, + { + "name": "lm_head.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 102400 + }, + { + "name": "lm_head.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 107520 + }, + { + "name": "transformer.h.30.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 112640 + }, + { + "name": "transformer.h.30.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 128000 + }, + { + "name": "transformer.h.30.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 133120 + }, + { + "name": "transformer.h.30.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13240320 + }, + { + "name": "transformer.h.30.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13260800 + }, + { + "name": "transformer.h.31.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13265920 + }, + { + "name": "transformer.h.31.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13271040 + }, + { + "name": "transformer.h.31.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13276160 + }, + { + "name": "transformer.h.31.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13291520 + }, + { + "name": "transformer.h.31.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13296640 + }, + { + "name": "transformer.h.31.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26403840 + }, + { + "name": "transformer.h.31.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26424320 + }, + { + "name": "transformer.h.0.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26429440 + }, + { + "name": "transformer.h.0.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26434560 + }, + { + "name": "transformer.h.0.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26439680 + }, + { + "name": "transformer.h.0.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26455040 + } + ], + "md5sum": "8e8bafe6dbfb85948d1e68f7c3d331c9" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.0.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6a8666b57943ae5779046eabcedd4368" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.0.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ea4bd6378ac24fb06732f934f2784c6b" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.1.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "3962256eaa8bc206e895285d1a7f0357" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.1.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c47d34f9b6060eb7e784fc82805257d9" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.1.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "803a0d20c3434028bf50e092ae2cae55" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.10.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b919c749bebf96f18855c8bd5ad234a0" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.0.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.0.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.1.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.1.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.1.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.1.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.1.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.1.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.1.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.10.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.10.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.10.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.10.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "998a1ceb620ee309630d73822543a765" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.10.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "9bee5a1c846f66c76c3b5bdd291350cd" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.10.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ea8f51008ef282313341d459d6164340" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.11.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "814c34ea78a434728cf836b7e777ed06" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.11.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "233903939a554b11fb26250d67d8002f" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.11.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "65a81f6fccd0d05aae50650dab2c04de" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.12.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "035f036dc34d5b95f6503d581def7920" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.10.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.10.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.10.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.11.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.11.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.11.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.11.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.11.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.11.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.11.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.12.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.12.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.12.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.12.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "6314df114c8bc449accbdfb66c7f3dac" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.12.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "1822c71b27b43fdb3433c133e4ed28e2" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.12.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "eab022f5ced01b2c774aeed32b59e635" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.13.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ff5209be3b95203add8c4b8df46a92fb" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.13.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c3801b278ff0b1288f0a20a5d1810d6e" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.13.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "1b88e21871c66345d3e541eb3e5d05c3" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.14.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "52f7eca32be0c3468e3fe0114d7df31a" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.12.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.12.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.13.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.13.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.13.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.13.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.13.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.13.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.13.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.14.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.14.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.14.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.14.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "aebd2a0198dbcdd7d7c690c39c6417f5" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.14.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "f87815773490d1b0c222fe60f96b72ba" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.14.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d665e86003133ad68300f3fe7a4e9359" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.15.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "a318393d22bd178cdd36f8b45e4d5d89" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.15.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6adc811a3327a20b25af95f6a113cbdf" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.15.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "bc84968f0ab5a21a82b80ae697913dad" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.16.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "824712c1c6bb19d150e84511c2e2d53a" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.14.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.14.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.15.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.15.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.15.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.15.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.15.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.15.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.15.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.16.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.16.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.16.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.16.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "af4808c0e8f5da2d991b74bf4304230b" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.16.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "57491d5adcbf6a91983d0421dc1c1d36" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.16.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "00e1c665760e8f38cd17314b47e43137" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.17.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ff3a152d0e459e7629d27588e0cde9b9" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.17.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "81ebbb27207ef07b4b0f756cbd7fcabd" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.17.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "2d102050e8791eca1d1bab41b16e4a1a" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.18.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f83994e9580ca397ed13d03331b73bbc" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.16.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.16.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.16.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.17.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.17.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.17.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.17.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.17.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.17.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.17.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.18.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.18.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.18.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.18.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "afd0ec443cec3c92e3a46481c21384b6" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.18.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6b24d892bfedde65df5884e4656acf08" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.18.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "706cfd506d0e95c272b3233e23c861f2" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.19.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "de4134eaf8218601df14041bc536588c" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.19.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "f674b443d88b0d70f7e23d83896104fb" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.19.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e18e5ddc3111b615eb79678b804239d7" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.2.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "88e9221b1a673be52a66153940e94261" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.18.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.18.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.18.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.19.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.19.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.19.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.19.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.19.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.19.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.19.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.2.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.2.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.2.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.2.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "b659f97e184c45e3c376539c08cd5d5d" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.2.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "0aa95aa9e04199d30b24a88910670ebc" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.2.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ab86c0b1ba827b6da96b5b605af15947" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.20.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "995e576862a39e3bbf258cc488167381" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.20.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "0ea061ae65b0c11b3a743c6c8c2b63a6" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.20.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "31c6fb1210cfb3106406da2833a68727" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.21.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "3aee299dfdfef47756a71140cae47be0" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.2.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.2.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.20.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.20.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.20.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.20.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.20.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.20.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.20.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.21.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.21.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.21.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.21.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "1e808418fd138b4ab390a5b2da37a4fe" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.21.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "350583ac7cc80d6f669cf5689b5b6716" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.21.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "70e329b5ca5914090897fd800d762a29" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.22.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "5e7dc4cf644b2c42b01bbca70e3767ca" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.22.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "fb4f8b740031332b3ac7f72d90db7a73" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.22.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "36a3a5d282c032133d473bfea5d955de" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.23.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b48a6bfd7d2c80b0c2091b2212a80f10" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.21.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.21.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.22.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.22.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.22.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.22.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.22.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.22.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.22.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.23.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.23.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.23.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.23.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "feb77afec50b1cb66fe867d8e434901f" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.23.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "392142ef1248d4ff39989e35075af68e" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.23.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "3df27b8259c7aa05cf44ab1d004959ac" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.24.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b1569bdb6d5b6e63674ee1f42c15edb4" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.24.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ea1aafc3c9ad6134a16a4d997ccdc42e" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.24.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "eb68ff024350661b1c0788c7d08852d0" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.25.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "68dda454e4a5265d702068efbbfdb6de" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.23.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.23.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.24.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.24.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.24.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.24.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.24.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.24.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.24.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.25.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.25.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.25.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.25.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "95525045e136adfb1a0db31d65cb9881" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.25.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "db9abecb308be257bbf0f70a09a638e7" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.25.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "34a328e8c8967a7658e9a2833365f604" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.26.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b23cd8c7f775c5918ed1b4da826a0761" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.26.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "148e47c82df602179ec0844d238e2f42" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.26.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "3be4b6fe89a31fc195f450ad11c1d905" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.27.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "df402524bc4dba4ffaf4219260b4eb1c" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.25.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.25.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.25.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.26.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.26.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.26.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.26.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.26.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.26.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.26.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.27.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.27.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.27.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.27.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "db13c510c48e58a3aff2e22aae795c71" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.27.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "dd066911b2241800f752c49dde7bac70" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.27.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "b148d9385e662f51faeb50dfed1d5a1d" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.28.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1ccac697f9ea875287e4753982083165" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.28.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6841cf1fbc44d3f23524d81789bf0e0e" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.28.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "30054b1f0c7388ed43046f8566997650" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.29.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "94de7add2d1ab20228a0e4f1d77377c4" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.27.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.27.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.27.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.28.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.28.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.28.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.28.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.28.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.28.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.28.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.29.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.29.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.29.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.29.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "2c0062a6a6d52acdec6f11cb3aaaf6de" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.29.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "909574a4aa60cc828641c4207a561a9c" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.29.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "7a1eaa3ed4ab7762ec93c3599369ecc3" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.3.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "7907a057c0031210977103b43c5dc9fe" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.3.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ad3e6aa9a9df0922c528e545015ba6db" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.3.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "fc4dc9bea6e56cdce260ad5719a20657" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.4.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "869fb1b2e48b1444237322c6012bf03c" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 26337280, + "records": [ + { + "name": "transformer.h.29.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.29.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.29.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.3.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.3.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.3.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.3.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.3.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.3.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.3.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.30.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.30.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.4.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.4.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26311680 + }, + { + "name": "transformer.h.4.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26316800 + }, + { + "name": "transformer.h.4.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26332160 + } + ], + "md5sum": "03f59c7d9b4ef5d5feeccb2cd41d08bc" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.4.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "f5569ed6d398262e492beadeb1f7b9b7" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.4.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "73b3a979a958afd933d0257ecdbb5935" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.5.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b87e2b625fcf2ae94f95ef870e8deab7" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.5.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ff505633fd3f2e1d1c0784bab1476b5f" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.5.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "44686d72e9941c34f7b7e7b98e9a0096" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.6.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "5775e3923bede0dfa01081a5faf1f27e" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.4.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.4.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.4.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.5.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.5.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.5.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.5.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.5.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.5.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.5.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.6.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.6.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.6.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.6.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "364f57005b883b31668851a839a69780" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.6.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "429a757562a09daadf99baf1c1c433f6" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.6.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "9f0613b8605703260ce69ae291efd260" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.7.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4897a4e1f64b21a4b3573ad6d8ffa080" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.7.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "5482001f3ba16cf3f736784d074d9f93" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.7.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "8fe6c6017e4b9a75159fbfb7ed1afd01" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.8.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1d8bc3d9eca68f29aaaf1d511abf040e" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 26327040, + "records": [ + { + "name": "transformer.h.6.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.6.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.6.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.7.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.7.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.7.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.7.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.7.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.7.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.7.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + }, + { + "name": "transformer.h.8.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26296320 + }, + { + "name": "transformer.h.8.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26301440 + }, + { + "name": "transformer.h.8.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 26306560 + }, + { + "name": "transformer.h.8.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26321920 + } + ], + "md5sum": "22aff4897fca73d561b2e9f3995d1a3a" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.8.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "0bcb409a8ff6662bc4fa703c54af7dd1" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.8.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c4ea44fa99757549dd916c715c3d3d1c" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "transformer.h.9.mixer.Wqkv.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "76fdaae02abc63a48f01a4e3618b6d4b" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.9.mlp.fc1.weight", + "shape": [ + 10240, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e2905c5ff18586e159ad2fba087eaae2" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "transformer.h.9.mlp.fc2.weight", + "shape": [ + 2560, + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ace0640841a3d6231a4bd28bb00d38ea" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 26296320, + "records": [ + { + "name": "transformer.h.8.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "transformer.h.8.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 13107200 + }, + { + "name": "transformer.h.8.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "transformer.h.9.ln.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "transformer.h.9.ln.weight", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "transformer.h.9.mixer.Wqkv.bias", + "shape": [ + 7680 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 13143040 + }, + { + "name": "transformer.h.9.mixer.out_proj.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13158400 + }, + { + "name": "transformer.h.9.mixer.out_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13163520 + }, + { + "name": "transformer.h.9.mlp.fc1.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 26270720 + }, + { + "name": "transformer.h.9.mlp.fc2.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26291200 + } + ], + "md5sum": "be93d743931f4b3ca5e326c516935367" + } + ] +} \ No newline at end of file