Phi-3.5-mini-instruct-q0f16-MLC / ndarray-cache.json
mengshyu's picture
Upload folder using huggingface_hub
25e9bdb verified
{
"metadata": {
"ParamSize": 195,
"ParamBytes": 7642159104.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 197001216,
"records": [
{
"name": "lm_head.weight",
"shape": [
32064,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 197001216,
"byteOffset": 0
}
],
"md5sum": "355cbdc18645ac1e5a7abdb8e0b4f555"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.21.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "cf28a1cacc4df6daa4f2ec3d5411ec8b"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.21.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8fd160898eedfe8502008427b1789f67"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.21.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "7eb060da1eb7a76488c6ad88bb55be68"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.22.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e43ba8574ee0bde28797ac6e1f4290cd"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.22.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "978b0567eaf6723e96528dfb9f34ae2c"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.22.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "6a6a3e4847b7604add5ebee94398d615"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.23.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d8ae3a2ad6177babf2e4f3004c3c1501"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.23.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b9c61a69bea764d8665bd1625c9b003f"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.23.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "91ef5f839a886ffd0474465618b5379f"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.23.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "268530f14d16de93a92bec1c9f18284a"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.24.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "cdfdc76d4737aadfa851a741f46e7acd"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.24.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "bed3c3a48dfbc1a5abcb39f89bf7a023"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.24.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "17f2ace615c304ac4abac920e340c996"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.24.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "129bd9f4ed4d4fc7d24c8529aa95246f"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.25.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e280a5b59a72f35c8437aa893feb74f0"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.25.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8539094db79c9fd29dfc411a79dba026"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.25.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "faa2883b38c085ca646fc6ab81abd39d"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.25.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "5591a612e538ff92e1543a71a58eb4b6"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.26.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "418024ea7732acd521e1ab189f5c7f82"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.26.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ad28f5efd45052104574d7a743178b10"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.26.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "62d0ca66c268c7f91df5c4aed581e0d1"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.26.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "f1a779e7c2019bf5ae85d7706e2f084e"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.27.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "35352b375985a227aa6840b106026012"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.27.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "738d864fe39149129b85f69c0c0c635e"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.27.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "022bb7ede68c0f9ff23735a9fedb4f32"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.27.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "0ad69d77d578753787e2fed8df17de42"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.28.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "96422f4a2384d546df2c5f5221e4bcf2"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.28.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5ee3c5fba261ad1ee080be9050b30d8d"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.28.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "bfe56c0deb2c00c6036e42be6409c8fc"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.28.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "062da8b2bb613a6a5af09ef1c793b62a"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.29.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "19435e3da71e0f7d237a2e6401d92d78"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.29.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f4fa5b358c8a574c36e5ee466f71c3f7"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.29.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "fedd1781bcda6d8cb8a0f216b0491661"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.29.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "807c0c0e0dc33c078780c826c65412f5"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.30.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a8917bfd3a8295471ea8c3d080380700"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.30.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "623da9d2482c4a0aeb3a40a66480e3aa"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.30.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "2b595136a8328888c183d11b401b7d41"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.30.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "43ac5afec0fd1d91f6898cdb1e7fa3cb"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.31.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "bc50d0f973ffaa22757de43d4d1fbbab"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.31.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "1c3934bd451a66c703cf1dce6deee213"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.31.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "10d95c7d01f1bb5ca1e4412a9ed2c2ef"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.31.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "ffad92466030bc43e685bc879737b37a"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 197001216,
"records": [
{
"name": "transformer.embd.weight",
"shape": [
32064,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 197001216,
"byteOffset": 0
}
],
"md5sum": "4ff3016fcb146b8e981591475c80fd55"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.0.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d2089117ea980d37a17b97c1c2480824"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.0.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "08fcdeed7a735b3e54e4d751270702f0"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.0.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "a0a39f19ff0ecef7594769e0dde21b14"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.0.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "ec47cf48c288af63a08f5e420b0382d0"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.1.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "6b520b0fa670029862be7e33ccb1e20c"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.1.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d698683d9729d3dc1f7bd46db350f0e6"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.1.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "8dac2a9e07c546b2a03d51d7cd6a6cb3"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.1.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "c9b539e646a5ef9c0f33d6622281fc78"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.10.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "185306cd725f9fcdc1d7e219a235de3d"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.10.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5c275cf6a9f185a75990a21b8d636ee0"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.10.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "66a324483e76960da9f8483e09193479"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.10.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "ad55826479c758a41057a783bb5cf768"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.11.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "8fdcc5d0f096e522c2e997ccc2f9b66f"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.11.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "9ba4e5bf1f85b55de33ec8d0fdced1f4"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.11.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "816a461f025574b571ccecf0c502a957"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.11.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "e101d259ecf64620fe505791e4e212d9"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.12.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f9e4d509b0406e88f41dab0f1bea9da2"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.12.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e4ea2dd00700fa17ca8347c498ccb6ac"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.12.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0d9a457299a8fb48735c118fa0d17244"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.12.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "9c749f5f94b57db159c5e8f0f724db51"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.13.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f2449244ccfdfc157a163b3b203e92c0"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.13.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "db2df24cf56b551bcc2548080570f4d8"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.13.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "785e19ea294ce5b71cc2103be15677e8"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.13.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "bfb01523b72ef50058c7ca5419e94b8d"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.14.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "30698dcede7faab88cc026e74edd3516"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.14.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "34ef8f55c49dc254792afaf5aabfccf5"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.14.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "6b7707a306c0f3d206d260084c351359"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.14.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "012b59f3bfe838bd356dda02f13a2cfe"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.15.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f467eb2377c883998559bf4aaaea8770"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.15.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "56278aba505322e53a1a3cd2220ae0df"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.15.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "1693e38eb67f5348a21cc630262027b4"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.15.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "736fc8cedf7555a03a1d3d3a1773a2b4"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.16.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e625d66e9c304190540b7390fc994c44"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.16.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "c67da25e62946eb1baaa539d9688d0f9"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.16.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "98fc4443e34666093d1bb9c377aa5c8f"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.16.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "ab449f3f4a1b4d792fdbaa6127f1dc55"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.17.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "96c9762484d3c89e9f3367d63a20133b"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.17.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8cee417759b02ecae8d39c5d82ec1013"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.17.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "6a255ae12df2e8f93e706e45c5aa04cc"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.17.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "85e048566ad49eccf1776a90b44b9ea7"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.18.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "3e8175b8ccd51257022b868c9b90949c"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.18.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a4ba7d28548c799b45f3f1857f4eda9f"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.18.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "ef5b8db6abf1b78b17f13c4350901bd3"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.18.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "6fb3d130a34ebc35495cc88aebca0a26"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.19.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4891197d898f1cc23fa9a0eda31f6f86"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.19.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "adb238ba2608c9260149c5de1ade8d0c"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.19.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "2db87fb56a7880cf8976bc859388767d"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.19.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "256ada9253db24a7ef0aaa9cff1b5245"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.2.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "44e503562401484d752aaccf40850fdd"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.2.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "92b15620c753a0e3cfe818071a79a1a8"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.2.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "e72876345ed49f2200cf289e7ff670e3"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.2.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "11638690d4019df7dbcb5d4ce42ebbd9"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.20.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "1e18403502a66158427c885a030b3012"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.20.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "cef53d13f9f17f4daf3902459a2bf86e"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.20.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "aca52f56ff0263682722eec9af60ddc5"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.20.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "b79439e9730eb2d9c95240fc31a766f3"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.21.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4083534b0423d4cd92f64053a3ecf9a1"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.3.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e05bdedb234537840669f5d402ab2114"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.3.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "869f30f4fcee3f58bae2cb13672f1aa7"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.3.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "6d2dca57ddfb9420cf81794a8ebedc67"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.3.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "77f51083eb8e430c4f51d3ff2a2aa150"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.4.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0673d8cc2dcb30f7c9bf8fe7f72fedcf"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.4.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8d74d91f6f0dbce4361fb9bd4319e435"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.4.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "a7280ca3d850c9e2c70798ad1827b753"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.4.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "feada6159342059b34ee9ee9f2b5a696"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.5.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "c11c8f2bdc0dac4c24d73630a7bc29ce"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.5.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "2f405cbb6f4bae8e3d0a6eb6c2247b8b"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.5.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "7235c4fff722c734b82860b8a0a9b7d9"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.5.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "78506574176d25f78c32f912055a152d"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.6.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "974f760704e76ebde4b5de88bf2c4907"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.6.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "bfe07b2d96e1d263e33a6967393b3ff2"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.6.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "7f354a9bed45479ca9a1e00d02fe1b65"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.6.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "baae7b45e7aca08cb608530a819276db"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.7.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f8fe08779e3f816bc0081b3ef7f77126"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.7.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "156dd0541b9bfb11d07e724a38d10cc6"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.7.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "d25c30dbcd729ca38d6ec3dd48274825"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.7.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "8fa83aeab434da353f36c7f4604958cc"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.8.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "cae6c26be7a1e1e6f91d1859e7a57211"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.8.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f1c9153abd2616db5ba5c825a3bebebf"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.8.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "3439e666077cd25f208166702d8bc0d6"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.8.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "7fc4f7e9e589c6e3cac14a2457d35da0"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "transformer.h.9.mlp.down_proj.weight",
"shape": [
3072,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "004c33d05d98ef6d3242cad950e68831"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "transformer.h.9.mlp.gate_up_proj.weight",
"shape": [
16384,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "60ef30f75f51b8148b779a4134ba86b3"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.9.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "6088d58197ec37d982808983e9b8c234"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "transformer.h.9.mixer.qkv_proj.weight",
"shape": [
9216,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "6f2ea673e396f982a532b9bd7612996e"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 19273728,
"records": [
{
"name": "transformer.h.21.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 0
},
{
"name": "transformer.h.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 6144
},
{
"name": "transformer.h.22.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 12288
},
{
"name": "transformer.h.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18432
},
{
"name": "transformer.h.22.mixer.out_proj.weight",
"shape": [
3072,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 24576
},
{
"name": "transformer.h.23.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18898944
},
{
"name": "transformer.h.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18905088
},
{
"name": "transformer.h.24.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18911232
},
{
"name": "transformer.h.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18917376
},
{
"name": "transformer.h.25.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18923520
},
{
"name": "transformer.h.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18929664
},
{
"name": "transformer.h.26.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18935808
},
{
"name": "transformer.h.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18941952
},
{
"name": "transformer.h.27.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18948096
},
{
"name": "transformer.h.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18954240
},
{
"name": "transformer.h.28.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18960384
},
{
"name": "transformer.h.28.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18966528
},
{
"name": "transformer.h.29.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18972672
},
{
"name": "transformer.h.29.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18978816
},
{
"name": "transformer.h.30.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18984960
},
{
"name": "transformer.h.30.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18991104
},
{
"name": "transformer.h.31.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18997248
},
{
"name": "transformer.h.31.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19003392
},
{
"name": "transformer.norm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19009536
},
{
"name": "transformer.h.0.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19015680
},
{
"name": "transformer.h.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19021824
},
{
"name": "transformer.h.1.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19027968
},
{
"name": "transformer.h.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19034112
},
{
"name": "transformer.h.10.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19040256
},
{
"name": "transformer.h.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19046400
},
{
"name": "transformer.h.11.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19052544
},
{
"name": "transformer.h.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19058688
},
{
"name": "transformer.h.12.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19064832
},
{
"name": "transformer.h.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19070976
},
{
"name": "transformer.h.13.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19077120
},
{
"name": "transformer.h.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19083264
},
{
"name": "transformer.h.14.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19089408
},
{
"name": "transformer.h.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19095552
},
{
"name": "transformer.h.15.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19101696
},
{
"name": "transformer.h.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19107840
},
{
"name": "transformer.h.16.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19113984
},
{
"name": "transformer.h.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19120128
},
{
"name": "transformer.h.17.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19126272
},
{
"name": "transformer.h.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19132416
},
{
"name": "transformer.h.18.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19138560
},
{
"name": "transformer.h.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19144704
},
{
"name": "transformer.h.19.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19150848
},
{
"name": "transformer.h.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19156992
},
{
"name": "transformer.h.2.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19163136
},
{
"name": "transformer.h.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19169280
},
{
"name": "transformer.h.20.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19175424
},
{
"name": "transformer.h.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19181568
},
{
"name": "transformer.h.3.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19187712
},
{
"name": "transformer.h.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19193856
},
{
"name": "transformer.h.4.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19200000
},
{
"name": "transformer.h.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19206144
},
{
"name": "transformer.h.5.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19212288
},
{
"name": "transformer.h.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19218432
},
{
"name": "transformer.h.6.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19224576
},
{
"name": "transformer.h.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19230720
},
{
"name": "transformer.h.7.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19236864
},
{
"name": "transformer.h.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19243008
},
{
"name": "transformer.h.8.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19249152
},
{
"name": "transformer.h.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19255296
},
{
"name": "transformer.h.9.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19261440
},
{
"name": "transformer.h.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19267584
}
],
"md5sum": "92b95946386cfea46d3fa66d3beb05de"
}
]
}