{ "metadata": { "ParamSize": 968, "ParamBytes": 4232609792.0, "BitsPerParam": 4.503947784206684 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.embeddings.q_weight", "shape": [ 65536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "046e30a4a3cfd1f0f164a97940f9e540" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 26312704, "records": [ { "name": "model.embeddings.q_scale", "shape": [ 65536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 }, { "name": "model.blocks.0.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16777216 }, { "name": "model.blocks.0.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16785408 }, { "name": "model.blocks.0.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "model.blocks.0.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16801792 }, { "name": "model.blocks.0.pre_ln.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16809984 }, { "name": "model.blocks.0.pre_ln.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16818176 }, { "name": "model.blocks.0.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16826368 }, { "name": "model.blocks.0.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16834560 }, { "name": "model.blocks.0.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16842752 }, { "name": "model.blocks.0.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16850944 }, { "name": "model.blocks.0.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16859136 }, { "name": "model.blocks.0.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16867328 }, { "name": "model.blocks.0.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16875520 }, { "name": "model.blocks.0.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25264128 } ], "md5sum": "0295524fe14816683e43aeed0b74e6f5" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.blocks.0.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.0.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.0.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.0.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.0.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18874368 }, { "name": "model.blocks.0.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 27262976 } ], "md5sum": "24520cbea6e3b902b4b9b1c4544a1cde" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.0.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "866cec5e28a0a4c633ed20d40a50965f" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.0.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3bea6f61a7605f236f0c1dcef0c6e62c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 26329088, "records": [ { "name": "model.blocks.0.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.0.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.0.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.blocks.0.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9445376 }, { "name": "model.blocks.0.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9453568 }, { "name": "model.blocks.0.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9461760 }, { "name": "model.blocks.0.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9469952 }, { "name": "model.blocks.0.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13139968 }, { "name": "model.blocks.0.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21528576 }, { "name": "model.blocks.0.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 22577152 }, { "name": "model.blocks.1.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26247168 }, { "name": "model.blocks.1.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26255360 }, { "name": "model.blocks.1.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26263552 }, { "name": "model.blocks.1.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26271744 }, { "name": "model.blocks.1.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26279936 }, { "name": "model.blocks.1.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26288128 }, { "name": "model.blocks.1.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26296320 }, { "name": "model.blocks.1.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26304512 }, { "name": "model.blocks.1.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26312704 }, { "name": "model.blocks.1.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26320896 } ], "md5sum": "661dc4de7e50384e69e9306211766bef" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.blocks.1.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.1.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.1.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.1.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.1.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18874368 }, { "name": "model.blocks.1.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 27262976 } ], "md5sum": "efc86f517ab68b932cfb79d91b512611" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.1.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b980efdc563407d6c83a75219d76814f" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.1.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6c7bcfc161bc84938e904409aecfb72d" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.1.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.1.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.1.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.1.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.1.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.1.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.1.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.1.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.1.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.1.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.1.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "34538b55ee6d6de02ae33ac034ca600f" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.1.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.2.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.2.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.2.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.2.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.2.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.2.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.2.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.2.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.2.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.2.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.2.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.2.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.2.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.2.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.2.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.2.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "2315fa1cbbed365f00d6f4b1796cdf0b" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.2.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "10ffe073eaaf5c92c632081d5398516d" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.2.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "067aea6c57cb395d5a7de73df9584408" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.2.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.2.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.2.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.2.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.2.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.2.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.2.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.2.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.2.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.2.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.2.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "dae027157816803616b9a2661a4cdad0" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.2.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.3.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.3.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.3.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.3.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.3.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.3.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.3.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.3.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.3.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.3.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.3.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.3.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.3.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.3.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.3.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.3.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "dacfc1937e8cfe2853f69d93bbdf79e0" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.3.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "198d005315cea9d6c48808ff1de7530e" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.3.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4b92713f7e18be75736692e73f952c3c" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.3.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.3.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.3.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.3.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.3.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.3.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.3.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.3.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.3.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.3.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.3.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "7351fc4707e68baeefa0df7c6c623568" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.3.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.4.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.4.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.4.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.4.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.4.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.4.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.4.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.4.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.4.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.4.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.4.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.4.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.4.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.4.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.4.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.4.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "f54e7159751b86752a84b5f1b17a15a2" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.4.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cb6619f759c798c48d8f305e771e86ad" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.4.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7f54497db79f632a61698b80ad4bffb9" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.4.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.4.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.4.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.4.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.4.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.4.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.4.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.4.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.4.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.4.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.4.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "9b52086970bb041c8918001bfcbf0d49" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.4.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.5.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.5.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.5.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.5.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.5.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.5.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.5.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.5.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.5.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.5.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.5.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.5.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.5.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.5.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.5.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.5.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "9c644d4c89f00b95c4b72a3f3aa83664" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.5.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3d0b583ab74e64d1844babd9939d9b8f" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.5.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "88ff61b9e6999528d91bdfeec6a4229a" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.5.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.5.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.5.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.5.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.5.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.5.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.5.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.5.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.5.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.5.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.5.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "3ac3b218f29991dbed1b4c9b51f22f8f" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.5.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.6.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.6.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.6.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.6.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.6.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.6.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.6.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.6.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.6.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.6.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.6.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.6.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.6.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.6.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.6.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.6.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "dea5a129bd5019f00c655333515108a8" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.6.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1d40f97be2590ed11ec51e9fb630b7ed" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.6.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9314627148ca4905167f1ce61cf8358d" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.6.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.6.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.6.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.6.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.6.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.6.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.6.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.6.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.6.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.6.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.6.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "90679f2e25dc8d33900f586a43f7b9e8" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.6.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.7.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.7.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.7.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.7.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.7.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.7.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.7.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.7.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.7.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.7.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.7.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.7.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.7.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.7.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.7.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.7.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "07ed7b601822a7a71fcda4460e3ccc71" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.7.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "edf9527ad19ab0a6cab605607a247c04" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.7.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "87e357c8978cceb7d327a21d9904c4b8" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.7.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.7.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.7.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.7.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.7.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.7.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.7.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.7.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.7.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.7.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.7.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "f91dc74ad448f1ccb94d5881f4ddd391" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.7.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.8.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.8.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.8.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.8.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.8.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.8.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.8.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.8.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.8.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.8.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.8.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.8.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.8.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.8.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.8.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.8.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "d6b46c0d4a6010838f7f47156045f60d" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.8.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "74c066b92485eb4bc3575a6b55ce177d" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.8.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b2efef932ff3673f5f1bf9a82703ae53" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.8.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.8.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.8.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.8.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.8.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.8.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.8.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.8.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.8.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.8.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.8.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "f2e4fbaafc8b75252519feb17ff0f594" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.8.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.9.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.9.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.9.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.9.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.9.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.9.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.9.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.9.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.9.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.9.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.9.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.9.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.9.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.9.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.9.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.9.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "8416d090ef375f0ecf2e2a946ef3ce7d" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.9.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "650c045cdc4b9121eba74dad0b74b928" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.9.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "45bb2237c29ae1c6d3f6ffe2a0f58f1c" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.9.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.9.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.9.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.9.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.9.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.9.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.9.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.9.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.9.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.9.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.9.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "7a97d793ca53279bcc4b0b9a5eb42d5c" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.9.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.10.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.10.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.10.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.10.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.10.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.10.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.10.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.10.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.10.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.10.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.10.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.10.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.10.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.10.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.10.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.10.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "9f5528dd7425508274f14c30d1baae2f" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.10.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0753a6c451a2bba808743a3fad39ac9c" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.10.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "289cf390d5b78283d60cfbe0408e931c" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.10.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.10.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.10.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.10.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.10.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.10.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.10.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.10.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.10.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.10.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.10.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "55314819fca430d3be78d7189015bca3" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.10.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.11.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.11.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.11.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.11.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.11.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.11.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.11.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.11.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.11.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.11.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.11.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.11.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.11.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.11.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.11.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.11.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "b09d4abf7e0dd0a18f2c676f555e3109" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.11.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "aec2c76707cd09171d2c84187541813e" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.11.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0ad9ca4a5f1fbb76c348d3f82b8b0d3e" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.11.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.11.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.11.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.11.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.11.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.11.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.11.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.11.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.11.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.11.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.11.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "d8b1c3db2a9d81e4e8b601f7389259c6" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.11.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.12.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.12.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.12.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.12.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.12.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.12.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.12.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.12.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.12.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.12.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.12.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.12.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.12.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.12.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.12.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.12.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "7af819acbaac0aa1ff9ea12be38b346d" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.12.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ca9cc3c2806682cb581059d03bad831d" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.12.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8be59f7a46352917352ff352156fb034" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.12.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.12.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.12.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.12.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.12.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.12.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.12.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.12.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.12.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.12.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.12.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "c16d4141de11f9e2bf7e5d07606b2266" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.12.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.13.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.13.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.13.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.13.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.13.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.13.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.13.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.13.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.13.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.13.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.13.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.13.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.13.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.13.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.13.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.13.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "cbe7fa3850baf4b3cae63cbf4ec2ee64" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.13.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b700200a9a04b1d97623f8a41645636b" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.13.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "34bc7a88722a31bc4d035d86198618da" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.13.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.13.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.13.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.13.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.13.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.13.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.13.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.13.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.13.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.13.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.13.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "c8c6f5d7e13b7e50a91ea8cd173f49a6" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.13.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.14.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.14.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.14.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.14.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.14.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.14.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.14.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.14.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.14.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.14.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.14.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.14.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.14.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.14.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.14.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.14.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "4e47d2fe8528a5dbdc973d41f55c3e89" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.14.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "184996fd57d6f79bb6416b71033c8d47" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.14.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a9a602610807c6aa19fd083a4e2be034" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.14.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.14.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.14.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.14.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.14.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.14.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.14.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.14.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.14.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.14.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.14.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "9a719bab5ee0b6b46838ab89bed3e5e0" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.14.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.15.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.15.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.15.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.15.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.15.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.15.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.15.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.15.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.15.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.15.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.15.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.15.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.15.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.15.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.15.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.15.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "ae9c852e935eeade224ceb1dc47d6525" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.15.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9d728fcdba1fd7ca1d3969fced4d481f" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.15.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "61e8c6ec3bdf91dcfe5be32564a09961" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.15.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.15.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.15.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.15.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.15.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.15.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.15.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.15.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.15.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.15.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.15.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "46c24894a22cbf694ef973c5161e17b2" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.15.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.16.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.16.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.16.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.16.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.16.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.16.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.16.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.16.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.16.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.16.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.16.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.16.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.16.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.16.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.16.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.16.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "8010343132404299d2034cc787776626" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.16.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8ae651b6522b9e37efd31776ec1470e9" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.16.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4c2b2c76c2ae918452c459ba0dad9166" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.16.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.16.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.16.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.16.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.16.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.16.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.16.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.16.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.16.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.16.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.16.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "902a6ff6acbee8cb0d608f2b0991ce30" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.16.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.17.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.17.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.17.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.17.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.17.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.17.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.17.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.17.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.17.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.17.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.17.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.17.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.17.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.17.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.17.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.17.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "b0abcb70ab84011b0396efaa799e7729" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.17.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "eed9242afa924e4f855b6c8558fa44fe" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.17.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fa53fd6930fe478d1c7303093d5ac14f" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.17.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.17.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.17.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.17.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.17.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.17.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.17.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.17.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.17.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.17.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.17.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "afdb0d5a92c2d71542396ce28e65deb2" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.17.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.18.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.18.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.18.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.18.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.18.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.18.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.18.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.18.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.18.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.18.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.18.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.18.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.18.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.18.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.18.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.18.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "5f84aafe1b29dbaf558c2430977c6180" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.18.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d0004cb8cf869f5d915956582a594fdf" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.18.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e4028f4ef402a15ec87c36a44ed2c9b5" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.18.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.18.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.18.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.18.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.18.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.18.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.18.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.18.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.18.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.18.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.18.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "9bd2595dab92f2557e60cc4229a13128" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.18.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.19.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.19.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.19.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.19.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.19.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.19.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.19.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.19.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.19.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.19.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.19.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.19.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.19.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.19.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.19.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.19.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "02f35718e3cda850a04740502a493e32" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.19.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e70a6728478f0b012b492fedeeaa1ade" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.19.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b339eaa20864cc05c6fa4bbeaf9debab" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.19.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.19.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.19.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.19.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.19.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.19.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.19.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.19.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.19.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.19.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.19.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "f9ddc201398dfc171f88d8203a68c60e" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.19.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.20.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.20.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.20.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.20.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.20.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.20.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.20.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.20.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.20.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.20.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.20.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.20.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.20.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.20.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.20.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.20.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "92aa8c5dbada8f75c7e6788af36a69bf" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.20.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "22f898049d0d8cf37658854fcde5e8ac" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.20.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "50e1080b1c866efd927ac80cacb7561b" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.20.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.20.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.20.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.20.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.20.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.20.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.20.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.20.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.20.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.20.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.20.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "4b2efd10838f1df096b7c0c24db5ab57" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.20.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.21.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.21.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.21.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.21.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.21.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.21.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.21.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.21.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.21.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.21.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.21.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.21.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.21.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.21.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.21.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.21.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "b6aa7d975cb9d3353fc18d498ec61d47" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.21.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ccb2fa28ddc235310835d0318f42857d" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.21.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3732364bf6482d98280a446c4dddae6b" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.21.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.21.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.21.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.21.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.21.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.21.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.21.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.21.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.21.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.21.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.21.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "72e41c09550151679abeb5a0f9a1922c" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.21.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.22.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.22.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.22.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.22.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.22.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.22.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.22.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.22.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.22.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.22.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.22.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.22.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.22.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.22.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.22.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.22.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "74f7c817c52838821ee4de8ebe237197" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.22.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "61966489f2e053a582553f15c6b3047e" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.22.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3b75f4a022339becb0c62b49b459726b" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.22.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.22.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.22.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.22.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.22.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.22.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.22.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.22.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.22.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.22.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.22.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "72dae72473fbf42e1254c6f1cd83b4da" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.22.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.23.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.23.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.23.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.23.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.23.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.23.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.23.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.23.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.23.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.23.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.23.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.23.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.23.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.23.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.23.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.23.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "5a391c3304b3e770df9001a6920f39bc" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.23.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "eae307f40052be1bae2aac1652ac5afd" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.23.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c7cac6e2b8b9d673ec755831c8886f02" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.23.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.23.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.23.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.23.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.23.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.23.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.23.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.23.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.23.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.23.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.23.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "e71060a89c4b7ad8652f8d563f5ed3ef" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.23.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.24.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.24.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.24.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.24.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.24.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.24.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.24.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.24.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.24.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.24.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.24.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.24.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.24.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.24.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.24.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.24.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "293156db377fa0898e5edb08378aafda" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.24.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e12372bb194045d945f1878e0afaffa0" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.24.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5bf2793268a541a647dad57404cc5b17" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.24.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.24.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.24.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.24.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.24.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.24.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.24.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.24.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.24.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.24.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.24.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "18e9aa14be867cc0742e65962af884af" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.24.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.25.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.25.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.25.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.25.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.25.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.25.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.25.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.25.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.25.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.25.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.25.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.25.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.25.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.25.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.25.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.25.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "33c0bb0001b3b7bd5c21eb6c1dcd5026" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.25.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "119bc976a5301be7489b85bceb37c0cc" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.25.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "92f2a6f658a9c78f879485b0332d2e18" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.25.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.25.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.25.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.25.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.25.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.25.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.25.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.25.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.25.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.25.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.25.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "d34e73086bbb49771b5b141e0ea1a79e" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.25.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.26.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.26.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.26.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.26.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.26.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.26.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.26.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.26.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.26.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.26.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.26.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.26.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.26.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.26.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.26.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.26.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "64e9aa449c084f487de04108e7ab8743" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.26.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ed09d3de92814b90ade5b8c19e4fb4dc" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.26.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c9e9aa84cf6b2ae962453b3a43934cd8" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.26.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.26.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.26.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.26.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.26.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.26.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.26.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.26.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.26.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.26.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.26.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "4297288819d8924de502621d698a744b" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.26.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.27.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.27.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.27.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.27.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.27.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.27.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.27.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.27.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.27.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.27.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.27.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.27.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.27.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.27.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.27.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.27.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "46b7eb70e9f8b7c51eccc60c5e892987" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.27.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2621bca49d9d8b258a8187abc37db1fd" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.27.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "87a4734fc6990803571b6477b4a0e387" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.27.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.27.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.27.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.27.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.27.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.27.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.27.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.27.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.27.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.27.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.27.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "0652d3630b2d7f27de53286f92ad53de" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.27.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.28.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.28.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.28.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.28.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.28.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.28.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.28.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.28.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.28.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.28.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.28.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.28.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.28.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.28.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.28.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.28.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "f1156b137dec77d4e6523e84010b7494" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.28.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a0b22b5d16b013c1f8132fa72962a0b6" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.28.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d536e24d1b00996a947dd16e8a94d6fc" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.28.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.28.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.28.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.28.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.28.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.28.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.28.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.28.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.28.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.28.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.28.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "7bbfc5dbe53b79a1b784cf99969e683c" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.28.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.29.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.29.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.29.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.29.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.29.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.29.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.29.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.29.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.29.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.29.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.29.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.29.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.29.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.29.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.29.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.29.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "9c5e4f54200a33e3ab10f15f669e4a58" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.29.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "90df27561a1ff4fb2450337feef63e80" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.29.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f954b8ecfe4d19d697db90e1d4a8817d" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.29.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.29.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.29.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.29.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.29.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.29.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.29.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.29.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.29.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.29.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.29.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "01ae69a9f9c61843f9901e4de0b0ef7b" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.29.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.30.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.30.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.30.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.30.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.30.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.30.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.30.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.30.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.30.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.30.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.30.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.30.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.30.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.30.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.30.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.30.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "c275ba41fb708efeed2a73a50609aafe" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.30.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5546cc902315c367aa04a8fd264fe273" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.30.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "32cb3e258005840a51652ee2b762b420" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.30.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.30.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.30.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.30.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.30.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.30.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.30.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.30.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.30.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.30.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.30.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "2cc258f7fff62ef3e0c25532d24b3cf9" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.blocks.30.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.blocks.31.ln1.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.blocks.31.ln1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.blocks.31.ln2.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.blocks.31.ln2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3694592 }, { "name": "model.blocks.31.attention.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3702784 }, { "name": "model.blocks.31.attention.time_mix_value", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3710976 }, { "name": "model.blocks.31.attention.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3719168 }, { "name": "model.blocks.31.attention.time_mix_gate", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3727360 }, { "name": "model.blocks.31.attention.time_decay", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3735552 }, { "name": "model.blocks.31.attention.time_faaaa", "shape": [ 64, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3743744 }, { "name": "model.blocks.31.attention.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3751936 }, { "name": "model.blocks.31.attention.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12140544 }, { "name": "model.blocks.31.attention.key.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 13189120 }, { "name": "model.blocks.31.attention.key.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21577728 }, { "name": "model.blocks.31.attention.value.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22626304 }, { "name": "model.blocks.31.attention.value.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31014912 } ], "md5sum": "16e257d9ef167d48dd55a000506d5e4f" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.31.feed_forward.key.q_weight", "shape": [ 14336, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "674d28fdeb269d40d9606467e6ba6299" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.blocks.31.feed_forward.value.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ee8460b09579b11467c92510fe5ab04f" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 32014336, "records": [ { "name": "model.blocks.31.attention.output.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.blocks.31.attention.output.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.blocks.31.attention.gate.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 9437184 }, { "name": "model.blocks.31.attention.gate.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17825792 }, { "name": "model.blocks.31.attention.ln_x.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18874368 }, { "name": "model.blocks.31.attention.ln_x.bias", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18882560 }, { "name": "model.blocks.31.feed_forward.time_mix_key", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "model.blocks.31.feed_forward.time_mix_receptance", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18898944 }, { "name": "model.blocks.31.feed_forward.key.q_scale", "shape": [ 14336, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 18907136 }, { "name": "model.blocks.31.feed_forward.receptance.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22577152 }, { "name": "model.blocks.31.feed_forward.receptance.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30965760 } ], "md5sum": "dc190cc79413353873caa9190a097b22" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "head.q_weight", "shape": [ 65536, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2ae12c299d81bd2ccebca8a2ec03e035" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 20463616, "records": [ { "name": "model.blocks.31.feed_forward.value.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.ln_out.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.ln_out.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "head.q_scale", "shape": [ 65536, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 3686400 } ], "md5sum": "0cb368a38d525edab3f07917edb80aff" } ] }