{ "metadata": { "ParamSize": 245, "ParamBytes": 2836541440.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 209715200, "records": [ { "name": "transformer.embd.weight", "shape": [ 51200, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 209715200, "byteOffset": 0 } ], "md5sum": "3d7aed2891b8ee321c7ff8ce7ea950d8" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 25186304, "records": [ { "name": "transformer.h.0.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 0 }, { "name": "transformer.h.0.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4096 }, { "name": "transformer.h.0.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 8192 }, { "name": "transformer.h.0.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25174016 } ], "md5sum": "9b0e59013f2973f1181671c1b0b4f72e" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.0.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f9c4bb76adcd940d3713a1956a5e4f25" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.0.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8e5bfbacb7f7f915819368fc6d626f8a" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.1.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e5203f9a5b245695c70f17136d2f3ad9" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.1.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dfebe34190b742cd69b03ba55f9072dd" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.1.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5417d2d71e6d7e7171cdecd8d0827257" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.2.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9be1a56073bbbdb9b54292a2765604db" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.2.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9c7895d91d69e27981657d52609cf6e0" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.2.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "999c47dec545f706dc92693576f91393" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.3.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0f459a50c5268f920c7ebeb6e7158c81" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.0.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.0.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.0.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.0.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.1.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.1.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.1.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.1.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.1.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.1.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.1.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.2.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.2.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.2.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.2.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.2.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.2.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.2.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.3.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.3.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.3.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "04328247921bd299acea3a62eb7096ce" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.3.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6c1ff231187be8a6776ed3c4ba187fd4" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.3.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6f17013d0c36edfa6c2476a02d65a841" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.4.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "925950638f6b12c08814dcf1f43e63fc" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.4.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b79b634dfa829126b2f4597aac52c4db" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.4.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "477cdef30aad329d995dab8121cdd088" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.5.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fb9936d65c721d38d905eed5327675a5" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.5.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "00ce4459c8de58641a53fb7bf8125485" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.5.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6fbb27a694f2a1e17b20edd363d2e5b0" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.6.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "79feccbaabda723271f032cd885562ef" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.3.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.3.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.3.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.3.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.4.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.4.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.4.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.4.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.4.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.4.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.4.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.5.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.5.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.5.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.5.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.5.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.5.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.5.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.6.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.6.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.6.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "b90af6ecd4b88cc55f2b19a3875b3176" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.6.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5a13f78954f325e48348546183caa96a" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.6.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f5b2dadead59b012b4b524eca0657111" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.7.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4bdee0dd3cd28c8e6adefa12d1e6886c" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.7.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7406ce680dc11d99e24a9c322756effd" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.7.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a6244592375275872ddb8ee0a7778482" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.8.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b7011357e4395ccec18edb3212c71c56" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.8.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b9d37ddc1eecc99ed396a1ab6242c5e3" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.8.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "eeb2924f4e7a29b61d1a5492e2b2bb69" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.9.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7121eba6ee9c1a490aaf96c897fba701" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.6.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.6.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.6.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.6.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.7.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.7.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.7.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.7.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.7.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.7.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.7.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.8.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.8.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.8.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.8.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.8.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.8.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.8.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.9.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.9.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.9.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "32fa912c4ba6df416cefb7332920742f" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.9.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "30fe110e3a8b569ecb1fa572a990950f" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.9.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "986ff794743c7d51991afb3cf74a2e4c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.10.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b8313730e60e034ef3231eadd3118d8c" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.10.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cb41ee9d86441d60b4532a7908b85a46" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.10.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d3ad8ca347a45a0eb5ef36466de04573" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.11.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "507f78f0724410d2fe737f590e7edd3f" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.11.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ccd3c42d4f4ee85b1d37979281bc9bc6" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.11.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b12b4c8219402d752c2e2ae48b5ec693" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.12.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a238caaeeb6d717209ba522038076b54" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.9.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.9.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.9.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.9.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.10.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.10.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.10.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.10.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.10.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.10.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.10.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.11.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.11.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.11.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.11.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.11.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.11.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.11.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.12.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.12.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.12.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "1d2035221ed77e248ef4552cc0e4f600" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.12.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ad12bb0b6334f4dc77d22c5b42dc75fd" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.12.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4c29981e6e7263a875355d7295e1bd11" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.13.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2c38f4d4c46ff318cd8cfaa91b8b224a" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.13.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ddac460f9ccb6818cefb6cced792dccc" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.13.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "22fbbefe8b4453ccf9efede6b45904af" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.14.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0dce83d74d8b6f56f35e714b115e4d62" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.14.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f04756ce304cf04c3da5c106946fb158" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.14.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "936b75278db21de5aea0a23ddd1b1f7b" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.15.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fcf038293896b04e8a2504de9aa16c12" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.12.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.12.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.12.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.12.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.13.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.13.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.13.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.13.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.13.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.13.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.13.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.14.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.14.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.14.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.14.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.14.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.14.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.14.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.15.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.15.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.15.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "9d27d23390e7dfc239d8c0ae4f1fd328" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.15.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "484da910f3409edef5b6559f8d924341" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.15.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "25ed0e3cee624f4302a0dce5265f54f3" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.16.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f6985ad19ab6fbf0f3039b69f5ca1072" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.16.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2f1e1e7c17d6ae90624d480ce2dd0eb1" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.16.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f7754839e7869b7d513af4eea4ad5ff0" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.17.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a97b65d826b588befb544faf399e92de" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.17.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c052b2e2de95e7d068d3a81b7ae3bb49" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.17.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "876834de34626ac8aed5c9543fdbe29c" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.18.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6c0987e482996f5148df7fc9e47a1e5f" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.15.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.15.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.15.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.15.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.16.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.16.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.16.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.16.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.16.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.16.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.16.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.17.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.17.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.17.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.17.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.17.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.17.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.17.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.18.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.18.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.18.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "f724f01dc27b796ea2ceb3bd0eec0c99" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.18.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6d5876c1e358e6e8ae01783868e9b53e" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.18.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "293ad3414d6ff8b0b135795abe79cd14" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.19.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4d2c42740ed083053146e3c2dfcc451c" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.19.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bceda52294643b8550c610d07c042377" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.19.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f33e2ebe2dfad47cc386aa0946740b3b" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.20.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "617f22d33235e08d67d0149d0bddaf23" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.20.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c9a38760670964161ee3dfecb979e8cc" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.20.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "68595d6d47d1d8871cb4a037790f27f3" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.21.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "54e9138e003502e85476c7928585921e" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.18.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.18.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.18.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.18.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.19.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.19.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.19.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.19.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.19.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.19.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.19.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.20.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.20.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.20.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.20.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.20.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.20.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.20.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.21.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.21.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.21.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "a98cbd2a429d7e73d025a41466f9e4ef" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.21.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cf0cb235048f9b2ab5bda0b238fc87ef" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.21.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fa6ec4d5eea5f4fbcdbd9b3643ac74da" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.22.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8fc0e39a42f9dd863d0b0d84467d6db3" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.22.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "059fd459b8f03c1c468db3aa0a430dc0" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.22.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0f5e0ea9765cf9e12bda212f461c7860" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.23.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "17af60504a0a6063a01d3d0bc577cf05" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.23.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "13afb0682e06ccf4c66b1a76bc21f5db" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.23.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "46dd3800979c3bbaad1953df7e42944b" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 209715200, "records": [ { "name": "lm_head.linear.weight", "shape": [ 51200, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 209715200, "byteOffset": 0 } ], "md5sum": "09e75e73aa316bf5d8a84a10449de4d9" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25391104, "records": [ { "name": "transformer.h.21.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.21.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.21.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.21.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.22.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.22.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.22.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.22.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.22.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.22.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.22.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.23.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.23.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.23.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.23.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.23.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.23.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.23.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "lm_head.ln.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "lm_head.ln.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "lm_head.linear.bias", "shape": [ 51200 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 25288704 } ], "md5sum": "0b341804848132f835b7e117a99a2c0a" } ] }