|
{ |
|
"metadata": { |
|
"ParamSize": 245, |
|
"ParamBytes": 2836541440.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 209715200, |
|
"records": [ |
|
{ |
|
"name": "transformer.embd.weight", |
|
"shape": [ |
|
51200, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 209715200, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3d7aed2891b8ee321c7ff8ce7ea950d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25186304, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 4096 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 8192 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "9b0e59013f2973f1181671c1b0b4f72e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f9c4bb76adcd940d3713a1956a5e4f25" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e5bfbacb7f7f915819368fc6d626f8a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e5203f9a5b245695c70f17136d2f3ad9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dfebe34190b742cd69b03ba55f9072dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5417d2d71e6d7e7171cdecd8d0827257" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9be1a56073bbbdb9b54292a2765604db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9c7895d91d69e27981657d52609cf6e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "999c47dec545f706dc92693576f91393" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f459a50c5268f920c7ebeb6e7158c81" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25300992, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8409088 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8413184 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8417280 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 8421376 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8433664 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16822272 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16826368 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16842752 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16846848 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16850944 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 16867328 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25255936 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 25260032 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25276416 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25280512 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25284608 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 25288704 |
|
} |
|
], |
|
"md5sum": "04328247921bd299acea3a62eb7096ce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6c1ff231187be8a6776ed3c4ba187fd4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f17013d0c36edfa6c2476a02d65a841" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "925950638f6b12c08814dcf1f43e63fc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b79b634dfa829126b2f4597aac52c4db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "477cdef30aad329d995dab8121cdd088" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fb9936d65c721d38d905eed5327675a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00ce4459c8de58641a53fb7bf8125485" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6fbb27a694f2a1e17b20edd363d2e5b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "79feccbaabda723271f032cd885562ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25300992, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8409088 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8413184 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8417280 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 8421376 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8433664 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16822272 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16826368 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16842752 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16846848 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16850944 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 16867328 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25255936 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 25260032 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25276416 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25280512 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25284608 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 25288704 |
|
} |
|
], |
|
"md5sum": "b90af6ecd4b88cc55f2b19a3875b3176" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5a13f78954f325e48348546183caa96a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f5b2dadead59b012b4b524eca0657111" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4bdee0dd3cd28c8e6adefa12d1e6886c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7406ce680dc11d99e24a9c322756effd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a6244592375275872ddb8ee0a7778482" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b7011357e4395ccec18edb3212c71c56" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9d37ddc1eecc99ed396a1ab6242c5e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eeb2924f4e7a29b61d1a5492e2b2bb69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7121eba6ee9c1a490aaf96c897fba701" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25300992, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8409088 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8413184 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8417280 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 8421376 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8433664 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16822272 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16826368 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16842752 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16846848 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16850944 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 16867328 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25255936 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 25260032 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25276416 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25280512 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25284608 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 25288704 |
|
} |
|
], |
|
"md5sum": "32fa912c4ba6df416cefb7332920742f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "30fe110e3a8b569ecb1fa572a990950f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "986ff794743c7d51991afb3cf74a2e4c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b8313730e60e034ef3231eadd3118d8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cb41ee9d86441d60b4532a7908b85a46" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d3ad8ca347a45a0eb5ef36466de04573" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "507f78f0724410d2fe737f590e7edd3f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ccd3c42d4f4ee85b1d37979281bc9bc6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b12b4c8219402d752c2e2ae48b5ec693" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a238caaeeb6d717209ba522038076b54" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25300992, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8409088 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8413184 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8417280 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 8421376 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8433664 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16822272 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16826368 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16842752 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16846848 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16850944 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 16867328 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25255936 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 25260032 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25276416 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25280512 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25284608 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 25288704 |
|
} |
|
], |
|
"md5sum": "1d2035221ed77e248ef4552cc0e4f600" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad12bb0b6334f4dc77d22c5b42dc75fd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4c29981e6e7263a875355d7295e1bd11" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2c38f4d4c46ff318cd8cfaa91b8b224a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ddac460f9ccb6818cefb6cced792dccc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "22fbbefe8b4453ccf9efede6b45904af" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0dce83d74d8b6f56f35e714b115e4d62" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f04756ce304cf04c3da5c106946fb158" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "936b75278db21de5aea0a23ddd1b1f7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fcf038293896b04e8a2504de9aa16c12" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25300992, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8409088 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8413184 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8417280 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 8421376 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8433664 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16822272 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16826368 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16842752 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16846848 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16850944 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 16867328 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25255936 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 25260032 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25276416 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25280512 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25284608 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 25288704 |
|
} |
|
], |
|
"md5sum": "9d27d23390e7dfc239d8c0ae4f1fd328" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "484da910f3409edef5b6559f8d924341" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "25ed0e3cee624f4302a0dce5265f54f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f6985ad19ab6fbf0f3039b69f5ca1072" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f1e1e7c17d6ae90624d480ce2dd0eb1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7754839e7869b7d513af4eea4ad5ff0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a97b65d826b588befb544faf399e92de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c052b2e2de95e7d068d3a81b7ae3bb49" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "876834de34626ac8aed5c9543fdbe29c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6c0987e482996f5148df7fc9e47a1e5f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25300992, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8409088 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8413184 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8417280 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 8421376 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8433664 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16822272 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16826368 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16842752 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16846848 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16850944 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 16867328 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25255936 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 25260032 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25276416 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25280512 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25284608 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 25288704 |
|
} |
|
], |
|
"md5sum": "f724f01dc27b796ea2ceb3bd0eec0c99" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d5876c1e358e6e8ae01783868e9b53e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "293ad3414d6ff8b0b135795abe79cd14" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4d2c42740ed083053146e3c2dfcc451c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bceda52294643b8550c610d07c042377" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f33e2ebe2dfad47cc386aa0946740b3b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "617f22d33235e08d67d0149d0bddaf23" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c9a38760670964161ee3dfecb979e8cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "68595d6d47d1d8871cb4a037790f27f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "54e9138e003502e85476c7928585921e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25300992, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8409088 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8413184 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8417280 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 8421376 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8433664 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16822272 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16826368 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16842752 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16846848 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16850944 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 16867328 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25255936 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 25260032 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25276416 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25280512 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25284608 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 25288704 |
|
} |
|
], |
|
"md5sum": "a98cbd2a429d7e73d025a41466f9e4ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cf0cb235048f9b2ab5bda0b238fc87ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fa6ec4d5eea5f4fbcdbd9b3643ac74da" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8fc0e39a42f9dd863d0b0d84467d6db3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "059fd459b8f03c1c468db3aa0a430dc0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f5e0ea9765cf9e12bda212f461c7860" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mixer.Wqkv.weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "17af60504a0a6063a01d3d0bc577cf05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.fc1.weight", |
|
"shape": [ |
|
8192, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "13afb0682e06ccf4c66b1a76bc21f5db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.fc2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "46dd3800979c3bbaad1953df7e42944b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 209715200, |
|
"records": [ |
|
{ |
|
"name": "lm_head.linear.weight", |
|
"shape": [ |
|
51200, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 209715200, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "09e75e73aa316bf5d8a84a10449de4d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25391104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8409088 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8413184 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8417280 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 8421376 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 8433664 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16822272 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16826368 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16842752 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16846848 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 16850944 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mixer.Wqkv.bias", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 16855040 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mixer.out_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 16867328 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mixer.out_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25255936 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.fc1.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 25260032 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.fc2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25276416 |
|
}, |
|
{ |
|
"name": "lm_head.ln.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25280512 |
|
}, |
|
{ |
|
"name": "lm_head.ln.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25284608 |
|
}, |
|
{ |
|
"name": "lm_head.linear.bias", |
|
"shape": [ |
|
51200 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 25288704 |
|
} |
|
], |
|
"md5sum": "0b341804848132f835b7e117a99a2c0a" |
|
} |
|
] |
|
} |