diff --git a/mlc-chat-config.json b/mlc-chat-config.json new file mode 100644 index 0000000000000000000000000000000000000000..16da972b9da561819eb57e0235fc51fa8b0a32cc --- /dev/null +++ b/mlc-chat-config.json @@ -0,0 +1,84 @@ +{ + "version": "0.1.0", + "model_type": "gemma2", + "quantization": "q4f16_1", + "model_config": { + "hidden_size": 3584, + "intermediate_size": 14336, + "attention_bias": false, + "num_attention_heads": 16, + "num_key_value_heads": 8, + "head_dim": 256, + "num_hidden_layers": 42, + "rms_norm_eps": 1e-06, + "vocab_size": 256000, + "hidden_activation": "gelu_pytorch_tanh", + "position_embedding_base": 10000.0, + "context_window_size": 4096, + "prefill_chunk_size": 4096, + "tensor_parallel_shards": 1, + "max_batch_size": 128, + "attn_logit_softcapping": 50.0, + "final_logit_softcapping": 30.0, + "query_pre_attn_scalar": 256, + "sliding_window": 4096 + }, + "vocab_size": 256000, + "context_window_size": 4096, + "sliding_window_size": -1, + "prefill_chunk_size": 4096, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "pipeline_parallel_stages": 1, + "temperature": 1.0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.0, + "top_p": 1.0, + "tokenizer_files": [ + "tokenizer.model", + "tokenizer.json", + "tokenizer_config.json" + ], + "tokenizer_info": { + "token_postproc_method": "byte_fallback", + "prepend_space_in_encode": false, + "strip_space_in_decode": false + }, + "conv_template": { + "name": "gemma_instruction", + "system_template": "{system_message}", + "system_message": "", + "system_prefix_token_ids": [ + 2 + ], + "add_role_after_system_message": true, + "roles": { + "user": "user", + "assistant": "model" + }, + "role_templates": { + "user": "{user_message}", + "assistant": "{assistant_message}", + "tool": "{tool_message}" + }, + "messages": [], + "seps": [ + "\n" + ], + "role_content_sep": "\n", + "role_empty_sep": "\n", + "stop_str": [ + "" + ], + "stop_token_ids": [ + 1, + 107 + ], + "function_string": "", + "use_function_calling": false + }, + "pad_token_id": 0, + "bos_token_id": 2, + "eos_token_id": 8 +} \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..b20a39a2f0a6d131d432ec94cd3bffc2b29feec7 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,6441 @@ +{ + "metadata": { + "ParamSize": 507, + "ParamBytes": 5199330304.0, + "BitsPerParam": 4.50075370326778 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 458752000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 256000, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 458752000, + "byteOffset": 0 + } + ], + "md5sum": "208b8da2a7b271bafdf05f6608c5b343" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 57344000, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 256000, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 57344000, + "byteOffset": 0 + } + ], + "md5sum": "4b1ecc43aa73a8194d6c5662faf07c08" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "7e65a7f12c9aebee845454df40f16f5c" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 28908544, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 7168 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 25697280 + } + ], + "md5sum": "6232bc9ae9fcbb38f093e018eab52ebb" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "a8dd21d37e6756206e8857265c4c0541" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "20729fa45d8ad63cb6d76d0c9bf7ed2d" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "bdb67a4c657d9eac9bcfd6b46cc8e76d" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "02c3e39a5d5347fcdc5026ec091ab4a4" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "fa0287a382a02c739d00c16b12f85319" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "6f395879ca0a123f0021053a050567fd" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "1461ff02c0c7e5f0ae0e5db404fdbf1f" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "da9cbff49e8d852562ef00995c856f78" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "382ef02c69121260bbdb24bce8c1d820" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "a7f3125ecc200f0ab43f7aa1551bf1ab" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "fa2f8666030683e8ea11e507e0b2ef14" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "c0ef81c2053d1b6452a123fe81907fe1" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "c48ba06e85ed0211ddf8bcc3a4224de0" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "7153006acd003f8b98468b64dae3a95e" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "9d4707f7fc018a2c27de7aa11e11f5c5" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "f10f9b8927a460e3700e8c96bc9016fb" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "11517f3b4faa7a81318f9a5ea60f891e" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "9af4a276ca3a965b7d6f880753e58efe" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "234591b84e62dffe4a3d102764fd1cbc" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 31216640, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + } + ], + "md5sum": "12e8be3382c7d39b274fc56bd34b7885" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "5e01c40e3e835395aa5a25300ed8b0f5" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 31202304, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6422528 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21102592 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22937600 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30277632 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31195136 + } + ], + "md5sum": "943d64c7efc90a997f8eb9899c67030a" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "adce3e4992ee0ea1fe8b395f816879e4" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "20c2a3195ce9907c76041340f3bf426a" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "8635fa6388b578cd626d7f26bbf98d4d" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "dfa2cb2fb3a45e86223d877c5df99224" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "c8a6a5b9b87e92e869bc0a108654dd48" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "ae16960b5a58f769bc9b4d406df89433" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "6bc9ccb8d3a4d1ee9fccc8a91245c897" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "a7ba30ca52b5b30e8865df212384951e" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "67a0597c374c49b2cd8a15a90054e947" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "6becc43c9c266e9076ec2cc1869bf107" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "1edd55a9a0e3c18817c66abba28fcdd8" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "bfe19f0b47674d940f7154a851565858" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "c137cd3e6e3812d404858777a238bb83" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "0e579c3f8c95d07708f584b3c1e2847b" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "abbb71f6a58fafc6f7fcf0ef7c358181" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "1b822e0cd53d8e26e54a15c7a76d1b11" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "4c2054370bf2a4b689f784185aeedb79" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "546fc2631ddda941ac302a8d35af04be" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "2b34524f37221e9c762572256b387923" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "a777b74828fb533e005355fdae2274f7" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "f23cda95d6378ea6244a10c3b754793b" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "c07ec5200e9cf2c0834abaf792171a0e" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "e907015fe90307523789afa14c2509b3" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "823c9768612fc0f8344c86895720961a" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "0702a6c1dac786b738daa223d14613fb" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "6d40de89673913109df15d2aa96384f8" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "076059d53a53cf021f01719ae80e0bec" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "62f10e4942e98c77fc507947c5e40ed7" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 31216640, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + } + ], + "md5sum": "29bad1d2ca257d4f50d0aa4a761df717" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d6f6c3bb146f469303793a30700dab79" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "b0e37ac5a11492c878cd56eeaa44b801" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "f212fa7bd4b0a80c87fd975acaf51c00" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 31230976, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 14680064 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 16515072 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 23855104 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 24772608 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 24779776 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27991040 + }, + { + "name": "model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27998208 + }, + { + "name": "model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 28005376 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 28012544 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 28019712 + } + ], + "md5sum": "8dcf25f02e45c258fbd464e9bb45ec2a" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "7bccbd65d1a8bdc878292f2bc4dbfb69" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "09863c6a94fbd26e2aa814ea10525c7a" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "db0092283aefecaa0da02809b03d0be0" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "71a6b174a9d234930c59d7c31b7bbc39" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "c2fa7cf1267af44844661b6aa5fca55a" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "0359886989ae69899a8980e3d981733f" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 32141312, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6444032 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 6451200 + } + ], + "md5sum": "f0177aeda7ff779239a6b2342c487f80" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "b546dee7e99ffea6014dfa4d00ceb6d9" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "43c4b4b3a70166ff96ba22e571fe19a1" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "edf1c8eb022a01af3e12b7de0506e00c" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "2cdc319bcf9056eceb7c0829e8f14ec7" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "4b4ebde6e020efa7f65a9ae29eaaf4b9" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "c54cc7f3ffc3df904bd493d70f0057eb" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "8aa1c3109b1103b1d40f35842fac01a9" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "56f20978dfe5a08a29cf6884b29f42b3" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "6f689a0b2f55f8a16d299134894faad1" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "de2ee10872b09973923e096e93290bf7" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "1d7ae0896be4401ff6334e2099fe8440" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "51d1c8858e4a5784698f941a44dd02d9" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "7cc3a1d8e5d5215479335021ff9ba611" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "bab82493643276dc66f12ed08db1524d" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "d3bc9fa0dbe837d248e2f5df5ff26258" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "61de34082f8b0c483cdb621e325900ff" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d85619cd82592661687353fda029bdb7" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.26.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.26.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "102b7377cf2d4f21f5311704e2c05cba" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "808b82080ac487e604ebcef28cef92d6" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.27.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.27.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "5e377fae3dce0606b9b6fd02f6080350" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "60937200dec856a0c8373d869fb30c24" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "7f4e5304e5b7d9eb5aec5d3d9f4cf49a" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "71e00c2b39c03965aaf1d267d9618ab7" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.28.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.28.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "e1c3cbdc9c4812b8d72ae13326f2be39" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "c28735682ef417f33dcc9144febd9bc7" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.29.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.29.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "7b41d3d0b84962780ccfe2b32fe3c92d" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "457b2d360a3110817dea88a533d043ba" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "fbc517a4ae375571011f42fb4c68460d" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "c78c2b51ea6a834873a562d69dcd942b" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.30.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.30.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "c6a9ae18ce27b6da1fe50db9d0e4c96e" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "6574e0a4b66880cd9f0ab6b53ee0c1da" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.31.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.31.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "0106fc5548befcf144444b76ed4629c4" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "d52f527ecd7ff57ea435da36a0137284" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "3a7866842aadc45f537ffd71727dd8ad" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 32119808, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 917504 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 7340032 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22020096 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 23855104 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 31195136 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 32112640 + } + ], + "md5sum": "07917efad7aae8f247a072fa26d1d5c8" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "3609f910ba861fd36160e7ceae478f6c" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 32141312, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 3211264 + }, + { + "name": "model.layers.32.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 3218432 + }, + { + "name": "model.layers.32.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 3225600 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 3232768 + }, + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 3239936 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 28930048 + } + ], + "md5sum": "812251ab91064059c1fda630b7358ea1" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "f289817e554ad98ed26a1ce86fa1cce0" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.33.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.33.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "a129ed68a579368b75985d6850204b1f" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "ee1091a5658370bef5040493cec1412e" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.34.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.34.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "cea47b883964f9cb623834b33941afe5" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "6765946f1eec5c2d96fff6b7cff8469a" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "43369bbc74908fe30077f839205e0589" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "3209af471a3c4f75e5d3a97324d27537" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.35.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.35.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "59d9e527fcbf2b53ce75864c9d32e60d" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "045ed94b0275e2ebb2e0abc7eea252e2" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.36.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.36.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "83e4e39bc4d0f46782875f225b6cfbe2" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "302f359e58b215c039f411d4b38e821d" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "6ba34fbfc0294ead668b5cbe7f0bbdb5" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "f86610d02c8a87c4632018faa146aedc" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.37.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.37.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "174241c78c8320aba9c04fde25f10833" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "09086c90699eda379cac295837522b44" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.38.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.38.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "9cbfe7ed6cc423e3a96987b17a2a5e56" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "285612c1ce1e956c21ae1211162f8ca6" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "e99ca60217524174c904a767ddb89990" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "be3b080ecec9396a839dc2464e10b390" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.39.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.39.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "4eb3ffb78a5e5d0bfdc4e038f7853605" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "e8cc8360e3358e4341071ec5efd1ee7f" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 33510400, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 0 + }, + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 3211264 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9633792 + }, + { + "name": "model.layers.40.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9640960 + }, + { + "name": "model.layers.40.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9648128 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9655296 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24335360 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 26170368 + } + ], + "md5sum": "8a2f24e40658bab208b538439e72c6db" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "52e0073300fcef78538ff3c7675331e7" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 29826048, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 917504 + }, + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 924672 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 3584, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3211264, + "byteOffset": 26614784 + } + ], + "md5sum": "524ebb4156c6829cde2f3d4aabf470e0" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 31223808, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6422528 + }, + { + "name": "model.layers.41.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6429696 + }, + { + "name": "model.layers.41.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6436864 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 6444032 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21124096 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22959104 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 30299136 + }, + { + "name": "model.norm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 31216640 + } + ], + "md5sum": "645175301cec64b702b4bdb8cb48738e" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea207f4182f6e1ea8d4f3e0d16bee9b3d27c461e --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0e6211a1f3cec7694c3f67b288aeac54902949207a1ca41ff8a0ddb37542976 +size 458752000 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a3df763bfabbad353bbdbdfa19261ee4845022f --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbbcce0e575587bdfbc3886a28f59e4250b17a3cb10a1308090bc059456f7b8c +size 57344000 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..6ceb9e45c004e21ae882b97b28b6b04865bcd6f7 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5444844f3ac989e6eee210e416e98e03fb029a60d5fdaf6b819759408bbaf689 +size 25690112 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..e60820394d1c8f87149ab211bcf51e884a3decae --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42284b63719264f877519b186d02e3c4cb91cd8b3f2ad21df2b41f65b924ce9b +size 31223808 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..a3644a55f996656504858bea8c862bc22c975884 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b4600c79b0f8c8df5d24ce0df697fd8f9768d04ced5775119750ebfb2aaf856 +size 51380224 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..d455dcbc8d643dac6c5c252ec3387029fd63d236 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c544ec96600d3e71d5cf6d1da8f6e3749ef896898ba9d80406fc01ce55d2bb52 +size 33510400 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..63123e4e06f6de49f506533597e07089b346bf88 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e25d82e23760619ab5d3347e924b4658602ba10f3881dce42ac4fe9ee2159e5a +size 51380224 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..33aed3a6c29a46a583ae73bc47a30e411f999599 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:972f6271260c9abdb619bcad1092a919af3b0ccfbe268eb4891647051e0fc90f +size 29826048 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..29fdf062910891f590edfdcfe478d81b4429d188 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20808f9fc18faa1fdc8da590071646065dc7b1772b70022d4541c772f65a13e6 +size 25690112 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..6b3f1dd0e042a51e6a792faa678c147cc34c6ee2 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d33d1dec81b4460da6e3204663b820169de67b1b390e0d4394a86b5c5e6d9c6 +size 31223808 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddf33655121efdc69d5f6c02bbf8635a3e3ca824 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1221955078fa09a4c6d9bf1e9b0cc738279a1b312ee575d4d48512e737ea6453 +size 51380224 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a3358eef3d4b2ae1ec0f978b0318d38a9d4f78e --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a9ba84e92e416bfb7283651ce68def7827bada369908cd6b19ded0c02efa288 +size 33510400 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..9105928ed6df8a35e8e88187183dee563bd28ae3 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b4e85bec5375fb7ccc9c3fe462016a91d262b29b9b3d8be8aa02fd6a232035 +size 51380224 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..273d7ddbf4e0d313c859efa357d312a05bc8fbdb --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfcaeb9be09e15d5ea449e18882018f71c7370cac6c4decc4ff233b81cfc40d1 +size 51380224 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..88a85f4500d84e4faed2b66152b47e864485e656 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a48695df82177d7c0f8a77ba6d70aecd7596dd7cb2c034731df3fe1e1c53543 +size 29826048 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e8cf1285569aa7662fdfc052bc9505e3edf7cea --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe2b354f781f477ef6e37a8bfe77e827f64b0e8527918895138d78c8c9eb4dad +size 51380224 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..a4874e4e3f05e1d26322e52b6273d376b561c42d --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3729c43cc0c224c6a4d43d6df8c8a88e79c66a1a4aa1c8ed2b97b8addf42a83 +size 31216640 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..79b61fbb48071f9f2b245c1f724ea6675f273273 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:187498a0d7777d45d22d74d9d4e7fbfe483f4fa729e84143a9b61149de5a9766 +size 25690112 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..d84bc1545cc686db87fd03ef4c173b4656f60f97 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcaa5ba814d285e6982580e111056a981f03bb4f2fea6e2b4de5983df28a0aaf +size 31202304 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d99d907a41806d707a70c60af10009ff04e8871 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b78341cd4084fe4dcd3d4fcf87a6d417f4119335e16625337e57dde53cf6cbe4 +size 51380224 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..df522e425eaaa3524667abdfcbd6b2534a8373b0 --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8313eeea86bef7286edbf3b0e92f0f08c41abd8bfd45ac97dc72cdfcc63f2e80 +size 33510400 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..c0f41bd67737d1617b49a0208d1f074644e2c380 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b410af5a4ff028ac456e4cf9d91da0bfaf18ca16817f767d0af342c93f23a3b7 +size 51380224 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2b279deceb9d3399e1d2ad5aa47a2d30e586f96 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:602a1d1307a9ae895937fda304632f9419cb3c5d2bf94fb06053a4eaa787814c +size 29826048 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..db55ffb1e36490a760262d2437e949c8fdd30d65 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcee150065f57b0cf4d0c34fbafe2fcbc27b9084aee62b7d585ae78261e5baa1 +size 28908544 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..bbdc032e76bc702350c53371df20d139ab69bd59 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4441c72241f6fad4c2044926cdbff608fb524e2584bbf4d66178b51085435388 +size 25690112 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..3478e058e8185d602e7720d6ba5cf95244cd9d1b --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76fd68a4579b31f19af939808b6dc1fdf0ec92d3c1ef59080176022305490db0 +size 31223808 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ea3fe72f68a65d9ec1001bb34cbdc7e88edfa61 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a73b0eccfc89cc4652cc18385e224ef04ac08c7f8af76c74db606ea13333b7 +size 51380224 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..e9048cc4a8e3c51969a6a318f135daee3ec280d1 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7959050222dd1a64f37c8b44d74710272eaab2087ede733aec051799f06fc384 +size 33510400 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..633011bb991e4851766ccae9619c7ab7574c1d83 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81fb7b1cd8d9fe7c71cfea0e4c3318d4a66d2b2393b19569b9f4e1ad9aa96a7f +size 51380224 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..19f51f4d8c474692fefecaee5f59724985365c33 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da0d59630afe6c020b348b7f9e6af0fb8d87a8b812881aa8f81e9afafe4f5101 +size 29826048 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..65fc715204a63b097648c00872ffb1521cce6295 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c270a4af0c2cc353e6d499909b199577aa5e789194d659fdc1b92e7570a5b388 +size 25690112 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..0b077c82f30f8be50440bbb59666ae63a946d39f --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4cb74198759a34cbebff3c64ebbffbe9ed8a2a3b441731a3c44c695dc9b7939 +size 31223808 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..51f95e389ae242c382a593f9e4d4dfd52b97eea8 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a8dac87a997f5b55b30cc873df2ac4a92d31411ab89f9dbd55f5e7e53157a3a +size 51380224 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcf5d99815d6667deb1199a070cf29af03e7d8ae --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77fbb4753a7d47d0b5696261a662397554c553f88bc891741a175e0685976333 +size 33510400 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..78738c554ed4889cc22fd87e2bef4c79c7bf5465 --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bc70485985b972cf6bc8f55cd908d9a19bbf0dc9e6346ea2370ee9b48db8722 +size 25690112 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..1da4960a0627c6971c482337a0ccb32cc490102e --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e255a4f3a0335ed1751069c35600fc476f7bca029c126aa2350e591cb50d0aa7 +size 51380224 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..9755389e2a4ccc4d6dca62888ea0952fa7b15271 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e08650c39e6cd8bb7aa7f6dc229554c176cf5dfe7ce68d2d377300f92b9e2bf8 +size 29826048 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..194ac43e3f27f5b52abb438ee5b16b2327573a66 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c70967d54d7af00380d808bd95fc188e461e3aeb8fa52a911dd1671e64adf0e2 +size 25690112 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..2049606b6858aa019f2260ed7d9b029746ecb377 --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd6f9dd6796c53554ac16c7023ed2d82c6d3942c05e907cbc8f554f2ff914b96 +size 31223808 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..b44bb8503ee93e47ab15b73bc6793c36ceec5795 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91ab6f6c814e025534dd6d21f3c803a000f30b0ec364c346b28c42191fff7090 +size 51380224 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..1372cb613a103d11a558cb8b0717bbe239191da7 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8712b48856c5a0879422221774a7a880a8285e36d758aed0d888b6e91232c8dc +size 33510400 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..8834af69fd6eff65f4f833d522057cdf50cd3e86 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e13732b4d7454f0f50f59c7041ba03cf2432c450d505584df880cb23b8133d5 +size 51380224 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..0bbb16433338e84c7ecce8884a41ecdf973d0b8b --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e75bd30a6f8cbcf572a16324d1d6c85c86ad97a96806672747cb60ab755eedb +size 29826048 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..4457e3498af55db1a46e9871e98a4fb00eae9c6d --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cb1a433e369a05ab593be1da82283c592122beb4386c3190276bda8118afdde +size 25690112 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..dbc1033026af4bc314feddc71ebf3e966f78ff25 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2a6e1a9946549712420127f86da7d7f9fa7c7527c32c1054a2be5e67e157c4 +size 31223808 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..d60f577f5b87ded74e50d61c08828b60973797ae --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a1dfd6775b73cc38ebde30cf7eaa53d1d2df07f8f1618fe5841196360124211 +size 31223808 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..06a99e1f65b5e541088edeabce9b38550b8249e7 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4bc21841a2905989bf539baca60599abc5fec825626b863330c645304e6eded +size 51380224 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..7566de38088af624160891ee2995b94c4bbd1a86 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e47731f5787cd41ea7fc4b4c2b4b75d5fc68512a73b907c5f095a832c0ec897 +size 51380224 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..f26fe7867fba2e4d98279e493d212e17ca6f8ead --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf030f7201426ffd353c31b65fb467e6ba09e29c5fc1d96b017591b70f866ef8 +size 33510400 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..aff3aa27071e6164ab6a6981d22701929b4d9c55 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70c042976fe6ce3ae462edb3b3283647f2c92b094549c762a6324b50eb65b966 +size 51380224 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..5dba445c906b3314277b905bab0c9badf8c1b7e2 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:691ebc8aa90c6716ada83070bc2c1311dfa9d161cd4fb666a9c7d16d2a3a1c68 +size 29826048