diff --git a/mlc-chat-config.json b/mlc-chat-config.json new file mode 100644 index 0000000000000000000000000000000000000000..3569b3566d054f0c140d01b1ff0bed3c8d5a9179 --- /dev/null +++ b/mlc-chat-config.json @@ -0,0 +1,74 @@ +{ + "model_type": "mistral", + "quantization": "q3f16_1", + "model_config": { + "hidden_size": 4096, + "intermediate_size": 14336, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "rms_norm_eps": 1e-05, + "vocab_size": 32002, + "position_embedding_base": 10000.0, + "num_key_value_heads": 8, + "head_dim": 128, + "sliding_window_size": 4096, + "prefill_chunk_size": 4096, + "attention_sink_size": 4, + "tensor_parallel_shards": 1, + "max_batch_size": 80 + }, + "vocab_size": 32002, + "context_window_size": -1, + "sliding_window_size": 4096, + "prefill_chunk_size": 4096, + "attention_sink_size": 4, + "tensor_parallel_shards": 1, + "mean_gen_len": 128, + "max_gen_len": 512, + "shift_fill_factor": 0.3, + "temperature": 0.7, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.0, + "top_p": 0.95, + "conv_template": { + "name": "neural_hermes_mistral", + "system_template": "<|im_start|>system\n{system_message}", + "system_message": "You are a helpful assistant chatbot.", + "add_role_after_system_message": true, + "roles": { + "user": "<|im_start|>user", + "assistant": "<|im_start|>assistant" + }, + "role_templates": { + "user": "{user_message}", + "assistant": "{assistant_message}", + "tool": "{tool_message}" + }, + "messages": [], + "seps": [ + "<|im_end|>\n" + ], + "role_content_sep": "\n", + "role_empty_sep": "\n", + "stop_str": [ + "<|im_end|>" + ], + "stop_token_ids": [ + 2, + 32000 + ], + "function_string": "", + "use_function_calling": false + }, + "pad_token_id": 0, + "bos_token_id": 1, + "eos_token_id": 32000, + "tokenizer_files": [ + "tokenizer.model", + "tokenizer.json", + "added_tokens.json", + "tokenizer_config.json" + ], + "version": "0.1.0" +} \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..3b6046e0f730e6ec08d9199634f65747b9706c94 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,4367 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 4073866240.0, + "BitsPerParam": 4.500422792921966 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 65540096, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32002, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65540096, + "byteOffset": 0 + } + ], + "md5sum": "d9db01f3dd3b3513e51363bb176ca9d2" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fd50974ce74d97df9ec24c754cb8048e" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "27dd430f5442fde12802eb9c621d45cf" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2575c0c22d391026519583bb0ce84a6c" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cf64f88b5c8d326da76c9ce549dd8093" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 30245376, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32002, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192512, + "byteOffset": 0 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8192512 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 8200704 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 11870720 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 19210752 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 19218944 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 19227136 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 22897152 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30237184 + } + ], + "md5sum": "9e45c3a2e7bbd1d4754e3a532e692bcc" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b5a6af26ec852a180af846d6072597b9" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "772f38aba40c0991e15cdc72584324ae" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "d1bdcde31a2a4b4eb62b149a92106aea" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "acd0a3f1f92618d53be6bbb584dedfe4" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "b41c9d6dc2361d8a643d8b5d1ae90109" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "6b30f04b974d6d62612a7e7323b65fe0" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "5e8c189b69e965c1c78034f033c92975" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b941c3bea1afe94b3851419110f1bc09" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cb9d9e1e999420d301785f1c6687db58" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "b3cdf29c491b6e6a0434e7634818bf2c" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "eff6d1d56294e02aaff595daa450f67d" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "4bd7dbd5ef233e4bac7fad8838c3fc64" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "33e2477b1ef699d6f42f6df31e29938c" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "37c62fbcf3ecea20e51b66a90b528908" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "f97524d454979d1dcc1876a734dbbad4" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "d2c77970c2c72da8423da1759e641b5c" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "16ca8f5150368c11aa4885c1fc55597b" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "a8da4b6cda37f57de2de1c42d774a551" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "791d53d02e274838478b9f3fa94c35ee" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "458de1d5ca459d5c2e328f1c85b41937" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1ab37ad5606e16fd200fe49c199e4920" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "43185997f94866dfe9c4b80ec8b7bf5f" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "8a2674d5c94b949315edc1c8aa23c66b" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "551ddf2939b67dc5a9b197a14b5d0af7" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1ec6765557e56fa1e6b0cb5d2bbf187f" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "a448d556b91d5a03fc037aa777fb3c35" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 65540096, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32002, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65540096, + "byteOffset": 0 + } + ], + "md5sum": "734ef2fda948be0a99fa2c1f3af0faad" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e7f8854317ea35758e6f9cffe9a8cc7c" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 31801856, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32002, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192512, + "byteOffset": 23601152 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31793664 + } + ], + "md5sum": "586255e7bae1223f8eb2dbdf1fe74e0d" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "715828c8bc64b2c7fdf61f49a4226703" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "978bf9d2a4c2ec5ebad0a8aa6b4360ce" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "839769f6a4d3b167343dfdb40ed4b23a" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "5872a57ceb9b35da1b91dd48f7d16230" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "ce97dedf1ba5b2ae413b7e5aaa5308cc" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "0658185ea7417abf16dfd6ad223804eb" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 32505856, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 11010048 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 18350080 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 30932992 + } + ], + "md5sum": "a9f08a9e2cd95658606a0320e017213f" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "812bf3fd020e9363a6b932fc63cab914" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e33ae5d6073475910179e2344e57cd44" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "2546d82915f9aa4bfc14602ec31d899c" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a3bd9c0daa7e62d0510b0fedef460fcc" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "251ac21f5fcc5c076360b53740a458fc" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "1994702a39722fbdcf33bc5270d982f6" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b116333021862b14e611335ee3b7c2a8" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "2d330e5cd87cc594f27ce5afd0922745" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "d2a5327b640a125b3deaed251033a770" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "49176731e8454d47150e01901cd8e7a0" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "6753032fe347f14de51c647f9ba85312" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e6e60fd3f8246b73ee6ad4236c513d06" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "5a27fc2166c8330b06a5add3e5e874ce" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6034a0a8f77ed820047c3b00e8ccbc70" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "f2ea20aa56bad22fd6ec9e6482372cb9" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "c7775ad424ee995f1db51856d9ddb23b" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a1595b6e7056436015ef114a0496b5db" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e79eaf4c371b8168bf70ef9bcd2953ea" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "336cfcdee1eab9c5f493709e0277982e" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "57e870d7f0548ca2b1c63d40eac7abce" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "a2e7d3f849991867c53ddac56f8e50e7" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "49ac806d109f50229999612b8f5c801b" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "158aaf0b1600f64afb25f59bbc64907b" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "5f3d657e52395854be957487a2976214" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "6cadccd7e57987772a604df2aa884708" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "8343a314b66ec8fd0c2b27aaef0958ca" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "88ac3fb8ed95ededc9d6b80275d0e72b" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ff8e42a8efe053bfe574786c2ad9ecfc" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "eeb5a5a8f79a438b45b59960528efa14" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 13115392 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 13123584 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 13131776 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 16801792 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24141824 + } + ], + "md5sum": "a08d04acf0bef8e8c88032953f32d91d" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fa5a345d29e4081bce4a86562163f0eb" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "9c2ad4d8eee95b31320f0d0d53cf0ecd" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "65c4f1e8a2cfc2b038a9818edccd1664" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f42618c6cdffd9d4395d66c72c34c95c" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "2bdaada4ca73b025ee8d404e86e23a98" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "fa140dfcad76e71f13e02e81f20d54eb" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "fe3ef4873c6584144f51b959da9173ed" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "bbff3f2b3cacc148fa925ab8de150ca2" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "a2587ac0ffcb5d1fadb526d1fafa638b" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "fccd9513a8458a2f9d0be18c93b55fed" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1a6aad1a41aa839fbc2921a616728d43" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "0e4dd7f7109675c3b21a2de8dac8bbce" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "6965687468b5ed6a2f21ca4d074ecb34" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3a0590f9112da5d5337338bb2892c62e" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "0b4a18c5dbf0f8638a610558c15cf7ae" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "420e5ab6429ab2948d0fe9759da42cc4" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7d56f91975a7a7ec7bee2f21884afd75" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "3b303e43c3ed7028ef5a7ef40446fa24" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "77d31194173d25196240211d92072799" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "cbf3705615c4ecd54ae6f7856f4b7845" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7995a6f378be0c9f83d4774ca88b11ea" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "9c64ebe5d6fce98e4abbb4cfbd0a2d56" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 33046528, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20455424 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20463616 + } + ], + "md5sum": "84f37b849241a526e6040147056f7094" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9301de66600aaa9981de1e94da1b5e49" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "4dcd54ef0eb1cc8307553cdb257c51d7" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 22036480, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 1572864 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9961472 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11018240 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14688256 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22028288 + } + ], + "md5sum": "f8543f05e41a4c8610474af4a6698b63" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cea9a6608388edfd10064210586e2084" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d8c00e752c14bc45adc8f45ea5ec6c2d" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "3d23cedde2a9f7df77ec00a1f2bc2d43" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1763e93ecbff873f308a81f28b26f995" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "74b04e40aab7e846cd15e0d5bf3b98e9" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "8794e37c2b98a1a774cd3905377ed90a" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 3670016 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11010048 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11018240 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 23601152 + } + ], + "md5sum": "2ccc3a2873a4110d16a5a8f2cdfa5fed" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 22020096 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 23592960 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31981568 + } + ], + "md5sum": "c3682f954e7a89e2b343b44c52a00173" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..237a2c88a85b04b3178c63273fe266c51680fb07 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2db61b12dfbbbfef1ff4682261274fa739c150571574e296bad856d9dedf1ad6 +size 65540096 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..d94443314d0f590f57aa079f97de343d469e78c1 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c3caf34d3988700c0736062d202d764fcfc93e32419f7445a67d92b5eeeecc8 +size 29360128 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5232cee5a463584874bbc898782a8f7546e8232 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:430704e12a754d8f25aa9f910b3de289cce3beb0fbdb36406e5c6aa96c25b5dd +size 30949376 diff --git a/params_shard_100.bin b/params_shard_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..72ff6c803ac7f0de5e9dc8438626130e8fb676af --- /dev/null +++ b/params_shard_100.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74089c1e5000e25c50be6193d9c1a2daa254826623818bb601a46615e1cb86c6 +size 27271168 diff --git a/params_shard_101.bin b/params_shard_101.bin new file mode 100644 index 0000000000000000000000000000000000000000..ac3abd926bbde544233df0bb5722fd1b662ec4da --- /dev/null +++ b/params_shard_101.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe2a334ba241207fa9d95e5d370694848070419d5037c4a43e8056a44197066a +size 29360128 diff --git a/params_shard_102.bin b/params_shard_102.bin new file mode 100644 index 0000000000000000000000000000000000000000..4626a0c9e439f6a90dc7fce6711e8cd3f51313c3 --- /dev/null +++ b/params_shard_102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dbdf6e3598a8d1632ab0a3e140356c01fd3d1beb95b86523d8b22460b80e4e3 +size 30949376 diff --git a/params_shard_103.bin b/params_shard_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..16296240c259b2ea5abe11be75916d73655ed17c --- /dev/null +++ b/params_shard_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f43ed71e3dbd2cbda77ca19a014d16517994f18cbec0f0db83ee5bc3bede66ab +size 58720256 diff --git a/params_shard_104.bin b/params_shard_104.bin new file mode 100644 index 0000000000000000000000000000000000000000..e3834ee6d636998149d85f5db82359256d6aa8ec --- /dev/null +++ b/params_shard_104.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8885a896fcd054a7ea1168cbc3897ef88e06ad5ad78b1787f472b4c9e5a8f02b +size 25174016 diff --git a/params_shard_105.bin b/params_shard_105.bin new file mode 100644 index 0000000000000000000000000000000000000000..70d770f3a23276bf363fb3670ecc1e4867d2cc44 --- /dev/null +++ b/params_shard_105.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94ca2f58cf9319ca0aabb8e5c84b73856efecac862863c2df20e5c50cf261f7e +size 33030144 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9595b25aa36b09f487ed210b5763a09191002c0 --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46401c65ed881e6c9e5357957294d74b9c46b5afd9b4acd0d444f7c089e8c08a +size 58720256 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fa587184e19e12db95398dbbcbd38901cdbf7ce --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b49d9436869e5637834ccfa1a2d92fabe3a0b19db0e2ff102bc3c30a34fced3 +size 25174016 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e9b5cf38e1973d7d3af4b600e686c4f37386d9d --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d7c3afacbe6dcd593995c60d7b50f9fcc8dce1500bcb0a8ec9ce668b2d3ee8 +size 29360128 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fa826c1441815800caefc083682738a18837e9c --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47dc545186cd12a7ecefb0527c6ee49c0c31878dca7b7aadcd74058d78d5db8 +size 58720256 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5ad33e40734e520e3c79aaf946b5be453e3d480 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bce7103b316cb60e421330bf4085edc91dd97afaa7feeb19ac238932c90cfcc +size 33046528 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..1f95629e7fa843ef290f09fba88ad7efd502d6af --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9269ac19df65735fb0d1e6419f63595442fd7f8eb0e6df98d97bbc79ce432c71 +size 29360128 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..fdb53ed62df2833bbfb6ac28cd18825a96c804de --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24ccfce5fdc4687f4edb18fadf2736c94467333a4a1daa414b70146f09aeaed9 +size 58720256 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..05f486cc5ff45d144cbd46446e7157d36029b388 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca505b616c3f04d9072faf874df6ab676552301c1bd35834d9452265e0d0986c +size 22036480 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..daf0dc8e90fbf5cb88d66d7bdb5d27d1184b8f05 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2031e7739fbdf996ebad7cf0324e8561318ae891435c1c5a0a5ac49f1f04bd0c +size 29360128 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..cdd18ace6954a95c400bf870fbbe873983faa872 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e5a332625968bb0f95e4e8d830564b798e94c2d1cf6406fde445ae9529a725b +size 58720256 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..3565e93a294cdb079ed5c0d2fbbd804bc6ebdef8 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0affbd610e7de873b40fb0eb9eae55d0fe62a944578c453a171d4655d2a7b5a8 +size 58720256 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..732a5d86595dd6a3f6de724884b645ab6b811fed --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1abb31c0c87bdde21c54ae3a2874d571a7e230399b1628e28e678e70966759a4 +size 27271168 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..e1ad111c9aca2f0bac18bf73bd79e13ba4e733a0 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f7fe4e524547f45961d0484a1555ec6ff742073cddb7b180d27db84cc668afd +size 29360128 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..7d2f0d8b0536b7853fed6706307cebbb8ffbd335 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aab2de72b5c90734b24b147fa46db1b4c846ca0912fe1928afa565fd3428152a +size 30949376 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..909fcacca9575c510f77e48773b0269294a6a26b --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fe75d979500346c27549550568935bca93bace0ead2884cd6a6aa677b5a0b47 +size 58720256 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..4cdca7ba9f46966599be73c9f84d89b2b9d939a2 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1395e930d844f37365cd4a3901c3332d1819791af753f3f73ea9a0ef8b0bd069 +size 25174016 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..4dffaff5373fbdc540b94878798ad38518c067c7 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71a9ee1f7076f7524ee8d1c3248ab718e7e1997494c9c292a01feffebd2ff979 +size 29360128 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ffc55dffe7aa082de92bd2ccc7dfd2f1e8eec9f --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f201552ddd4c8e28203054e4ebb84dad35bf78148d6c1b59b1f24d50aa0a745f +size 58720256 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..aee6f7dde5136446b4327c1e121eec8f4ae90526 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f85308edc8bfbd4e997ac3a4b51244fdc24f8c1e72aee4a64f91a7ff8c3472e +size 33046528 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..65c3760c7c264128b086f96818f0c4447976bebc --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a4b6ce3d780aef03e4d0d615ac6284f3583eb4f93b4b78fa2c4fcc0f6f75e58 +size 29360128 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..a6034bba95cb566a8e270073986cd2c2b110590c --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9950592d13092e71a817afda0c29a1bbe625f602034f89f97b1724960d1d6590 +size 29360128 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..7eb7e68c31b2e6a8b792cca7c4ca04d9f3298ebb --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64928e89efa082e174f6813eb5be9538abc15b5ee546367c126ecb4ab17d3df0 +size 58720256 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..e9d6dd62c861e0eed5d96f555bc7ecaa8b1e8b3c --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97fbe90b9479efdb257b22859bd238291783263848279a794660be14982e3adf +size 22036480 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..808e5e9895424ec58b4e21bfb012fcbd88458562 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:865d71a9d8c4c56ab69bbd8b41db2f38062da2ef9543d05334f190be39cdf92a +size 65540096 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..dbd925595b089096d78c491cfbdcd8937a80cd79 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:566a01f81448949f1c8f1f05a375129fa91b7d1bcc1a9fc413cb3c5497485d77 +size 29360128 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..f56a3dd9b90f271d191e2f4ae51c247034af27a5 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba0acddd7ea897512c20c796b45e6ee11f63b6ee72f8d7a914b06d93b432aa7 +size 31801856 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b663ec5dbba703488ed3f0c84b14bc0d14a09c9 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff69ed72b732e4e28a3cb265912a24c8c1565685c76d3cd5d82bd08814007ce7 +size 58720256 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..cfa8da417669f3de0174902a979213b32f51901d --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:538d0aee12fa450c74e6b9304a8e9449f411055da6c2a5d645fdb959742c9ecd +size 25174016 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..e467d79eaf7c827c56e551fe6d0b2335b4059ad7 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:562026b1c6ad959e07e3a456936f60380719c7fedfc7e193f80d72cfd5b0d90f +size 29360128 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..fd4c86220a088a5611665de67d84db56a53e49df --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:396135f34cebece2f8670958d6c10788247f8b75eb5dd0a5c045d0c174bce272 +size 58720256 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..8448fc6f5e2642bae10eeb78e1d3057e2e77f551 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8aa052d2c4590394d3a0c55370e250aa7a6589a6fc99fa88aac396d54347b26 +size 33046528 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..5eb22f04b31c13d192d5b9233ed01243850fe8e6 --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8286a17a6d4ed6f4f1558e1db194f0bee00e403956dedd414c19bcaa004514d9 +size 58720256 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..98d1694c5d3b154225a1634699837e9fae613cdf --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e5824dd51a607c9e930b9fb94ff678a0191494d60807ec657746724914506a +size 58720256 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..d08eed40e3dc3bbfebbd5268b0b80862148dde10 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4695d5dad261cdcbe419f60a02902fc78d55216879b05fb64a132c14d494beab +size 32505856 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca9ef16063b1406ffe7453ac0997980f4dc1d197 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff6c0c7b3061edfa8cee13d4b96e791e6aa9496c998a758c0fd181ac60f9283b +size 29360128 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd1b81b325194a617de16840acbab8e3c7f4c4ca --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f9f427047bd6dfb2a38cf6e803ef94f874641582c654c76af56dc019082d316 +size 58720256 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..95888a7a4735df1733023c968adb9ab69cfe0160 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a62b38e99999c16e4087ea41e875a33886ac5f38c417d26d051f0cd51ab9f2fe +size 33046528 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..89f19b52851c83231c4d16bacf4732a7937e9741 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d0395e5d051fe2bd626af1de969838bfcc3e78c4bed2da483fdfc7012b82d2 +size 29360128 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3bb2ddf2cb0d753a12a428192fe570f25870d93 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcbdf061cdedbc684528e35e2e4d3451ce275c07b5782be5f3b88ec71b12fd51 +size 58720256 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..4c1b0ef7f82adada2a6378cb853222a9b990b5a5 --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa53cf47e748dc714123931df4a0a2ea4572e20e1495f702d24df5e679171af +size 22036480 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..9a8a5bb84e1d24d1b67cd0cf7cfd75d2afb459ae --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc5af1dd569cd7f88bd8e89a8bdf27a378da6f35a9a0b0bc4ed6cf2bb8dc911d +size 29360128 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..3173ff8d12a732ca1b00d2f47539cb62467f781f --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c7e1f20be2692b60fa042f169ca0d1d63a3bc392ac56fe34669fa6c987aa40 +size 58720256 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..38e7c671ab4861a79f5c546267b57ec5c25e3e1e --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e8ce94fce852c60e097da2230ceb5edd732d012a7a2a57a05ec640da7e05fd0 +size 30245376 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..27d90bf0d00a503d57b67b8aaa8b0e9cc23e58a3 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed422d08aa836f484e071ff6eea7dec24a4b143a33a03c5784bb27c471cca02e +size 27271168 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b575a7f79f407522d62c951fddf7337cf82e087 --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3779ecdbb13dc801b29903b9d32c062e527f90d8bcc1e398fe952fe6b277f4c +size 29360128 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..ac205c0a2923154a654f5085e46d1927bdc1ee71 --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97db18371ac7274dc131f21c0f8b61960e529d55d706df5c999d6104eb0f83b2 +size 30949376 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..071f76c9125ae539523c75189e0b8e5ce916b068 --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f6528227613fab497141ce387d2e99cfdc88605c31703abb4ca74f1f1f8b4d2 +size 58720256 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..240da659a6f851e9e07b465e518157cc62ae7a6a --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e987c2f1bb24cc74cd5bbe563cf3f525c9ee9051f409790a6ab102d6e05b57e4 +size 25174016 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..094f868927074b0cb6bdc1040ae0d99c8832e4c9 --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08b61e9788571e087db9daf5cad6e23eb7a48381e28616c24d9bbd5216b5b0ca +size 29360128 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..f4359e14b121bf2ee638d62893e41630d8c35f39 --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5cdd4c19349a3f1f235b51ed36bc9cc0b76d8f9042d2bab70edbd0e4c8d6163 +size 58720256 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c24426ab7d65b1b6a4424801c28aa3cc116c7b9 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d15b4216c8784383d411d84438ffeac89b535661d4f7a18fb0ddb0e807a0e153 +size 33046528 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..681bec7129dc229d2927277af3d72630b3a9ab40 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:507cbdb6a1bf50a71ec4ce4ca01bc4ec374550ddc8ef0b4bbba52278d552a805 +size 29360128 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..96bd845f6d84be49c28bc35a06f01a7ba2d8a9c8 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b09c33fa72b02a450d00a0d1390526716b14affadc25c905c78c8b3ad078377c +size 58720256 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..11c6e31c7c45255087bbb987023ea1da8c2ec411 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:540503c51d2e8eb3555d30a505c66f053c232594cda1e7e07eb2fb3cfe270cf5 +size 29360128 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2d29e7b730a244922e9f6ecb882cb2e7f5d3724 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f42f7c8a8e8f5ffe9d829c8eb15960d575a3ec3e1e4856b9c1a18d80eb0d1e4 +size 22036480 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..26f448b35b2e3c298258980f2002737d5a5de679 --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b66e27deae5951e02b8d9dff4a14df61dddad7ac664af794fcc5624d23729598 +size 29360128 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb697c0e3dfd02d371ae47bfd736440531d8dbb3 --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a1c0fd5e54aec6ded9b5c97e5dc4bf1bf97c68732fc5a5e2c534578b6f63f7c +size 58720256 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..4cf687bde9899ab831af50eadc1ff5d5761cb390 --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a28ac70b945312a2f54cf7aeab37ce2cbe3140eac91a4b69e5faccb1586ab612 +size 27271168 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..1db4859dc85783ac40c0a0c33ab88c0b03a3fe18 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52922afc7b7f11c9a69c49bc1c26a410f57c150e071491c4a277935b52da9b85 +size 29360128 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..6955b37fb701ec816f178e0a0c8c7b2301b6f8cd --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a94642187abc439d734025c4f15cfcb4ee890aa76cc25ee47207349db857d94e +size 30949376 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..65d2ef734cefb4445a7f1f963ebc4d07bf76be11 --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88c1e5ec2b8614b5d439b119a70886ae1e20ecb441270037470f022e98d28f5d +size 58720256 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..47b8604705391c89151c151992263a2e96a792ed --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:822c7d7b5788386ad8ab97e72a903ac79a31ed85a90a70a0e2c6838549db9d4c +size 25174016 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..739a682ddb8845cb3cfb0826000626b900df8ea5 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef1886adc1339b480061a34792ce0c9e76279f9b8057a410f6ca1d4edc859a62 +size 29360128 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..b9a087418de1d61c90587c0350e062495c0136de --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3b6deca884451798805303b9f1e82f89ffbb112c46b6182632ed39cbb47686d +size 29360128 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a5753b3a4d74d5972ebd8f43859a420059848a2 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef3ec2d40242fa32ec7060de81411afadc3afbca468421a87e29bf79693ac43f +size 58720256 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb228c227eec43c6c8b6533159b31f89903baaf7 --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aca86558a85d75a9f2b25678bba9d08e2ed6b64309f20f40848b0216e622ecf2 +size 58720256 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..cdc77cc7ace5de12b4fdeedc08eaf0d3a0410b9a --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:771ba7059a8735e7994e432efe51fd7fea7e2db96daaec332af253a9c6125aa8 +size 24150016 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..b8c1d1bd75d884d5f32ae7c97dbf4570d580db71 --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d8accc0b7bb874834f578f90de7b5fca37c0e00236927911dca37d15cc493c0 +size 29360128 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..f691d95e9cbf5ad528e13b0542e9d402cd0d90f3 --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d0c4eda5a920d095577ba4ae49c60b14e04079a95dcebaa4b60acb124cd4126 +size 58720256 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..91b59a85d14ce9a0857047b15c167d5fe434372a --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3408a9db262da40610bd69e8dd7d9786085734f57cc4dd7904fa90f76832600 +size 27271168 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..d7f00f059a5ab8ecf16755ad3fd801b8ba6de0c0 --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c73f8c8444f83dc00154bd0b19e496420b33e36e535e91dbbf8b0f127448f540 +size 29360128 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..741ecbff1937a90d4785e894095b286486afedc4 --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db93a3d182f9de21cb143ed9905a2f3bcfbb0e35f8443851c7819a75fd018f8 +size 30949376 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..9682e4e2a2a2b7228350ca312861420b20d34163 --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e27573f0f2ce8eec46ee20be3de34e6173de9c4f1cebf731e2680c0d81686fc4 +size 58720256 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d5cf849ded7df5eef5170af8a5d1390dfbf92d3 --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a918b1c20fee9e516bfec0afa41fabc60df0ffdba312914bab504117d2fb51 +size 25174016 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..8bb4cddfb1ec3f3fe29447623a28034c75a8b792 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1c4f19ff90aff0da7ccc08ed65f04636fd6aab03e16d255b0bf3243405f033e +size 29360128 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..8367b757c60eb61115cefd40850da18996a9f0b9 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b34473e306f7a71b3ad264fc8ef68f416964b4e684aa3b269b2aec22a54c0a +size 27271168 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..ffce8d9bc05a3d4e2c5bb0b1f5356c5b9ec23c22 --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d07669d7e6e72bbe6cd4a4c6fae70be59008899f484c00ccfa2bdc0fcbdd07e +size 58720256 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..25c6b90cadc501331a661564d09a4eb55e8ccbd9 --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a74d97e635c43d15842a6bd62d120642e3290de09a34d45eb1bf54d87f308fcc +size 33046528 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..6289a9e0aa68ff77717f553df8dc60af2b5112fa --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5451034aa250566724b0863f51327b4ebec01563260853eece4b7b6ecaa237 +size 29360128 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d954f7ab4bc4d4150fd2545b77daee0fff55597 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0640a5f17693538130a2cd40ae13756051cf36873485efb624b40dd636428b60 +size 58720256 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..1f8ed483cf2c36b92a053c1c372a200c6f09fc9c --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2d703527cd1202a264672c0cde5b42d54cc380da241f733892610dd0e5099e1 +size 22036480 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ca37e80ae2a9dcc4b0a8fe5e70b8aa7d847849e --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd6f9c04263a444739f854600cf38d6e0150039f129115153add9e139f212702 +size 29360128 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..e3e07ba87f025ad30d6e9443fae3d56ce3ab7d2c --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0be486ecbb321600ed2b6c5c4b2e592fc0ff5a25d42dcf74af623a1edf544cb +size 58720256 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..d76235f603b7b9dbe6d67acc2a5efb51e039dbf0 --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32cb4bb435e0b21799c7d2c999259ae683b65ad17c3ef9de3c08d53c3a674762 +size 27271168 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..64fb950017d423f953cd465db028ee7a5abdaf0f --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83ebd452be5982fc0d049078d8ca729b037de41755dde9c571bc603cca9365c2 +size 29360128 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..c821284d27f16edba3afbba0872a5113de4bec48 --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ddec71270767938a913f2d08e547cdb9da20542f1704f9bbb5cc6afc93a419 +size 30949376 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..152efa63f34f0f3876f39a86f411fdc9687fc208 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3b6371dd5516d9d562ff2e89cc855a5e587c47e5826cfd57d22bdd58f2f85aa +size 29360128 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..4579ca9b3536cf713edc57437bc64674ca012045 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35da318e69e42a116e6d234368c5b83fffff8f062ddced54f77fa6c590ec522c +size 58720256 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..62ed85381c5a0fe261757f2778e5f177547137ff --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287fc928b3b8d325e30c54c13cd16a3273320295f8d57491d30090dcaee9f797 +size 25174016 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..4adc2cae8a537988f981db94463137280c46a383 --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:280f3c90bfa4b042e45fa0fde5c1e75a94a7afbdc516e7b2ea3e2d4b0ef838d9 +size 29360128 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..45f2d09af1f3dd6d86c75798e2e116a8ff50dc66 --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6637ce45b7f9b9be156d7c85c9e2dcc5717523d9b126bcbd2d0b6f3129d505f +size 58720256 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a94626dadfdc297bf1f6aa61d66716e7f3e3c20 --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2edcbd2a91fc108c911727fd536cb07c6c4ce1b62c45be084e09d74a88275d23 +size 33046528 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..64a0ea369a7cb354d3de68f7fcb6c9637cabdd9f --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d54836c83fd0de149babfeb4cc491baf0f44f25da194ca09c7b27c2b2fd7f70e +size 29360128 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..3891ec24cb3ab7cab837f2c6bf701f79e5f6a1af --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca091e5db2e59ad22aeeaa66a19bd3a8eb517c97e6d4ddf08f6c12753906b314 +size 58720256 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb377ffddb8f5751470cf1943c47a07636c0ea23 --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2320119ade984019fe29ff5fb8432cb0e8d4c020749556cfc0f402928f8751d +size 22036480 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..d30f691788bfa865db7b83769ae367c4fcc8dc43 --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d6ac4edcd9b09d32b36d937f6364da169fbaa046c8c480352f41a637147c6be +size 29360128 diff --git a/params_shard_99.bin b/params_shard_99.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee8020df6d34f667a33a5c308b0296ec82cdb4dc --- /dev/null +++ b/params_shard_99.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9483c26f04acc421f80862798bd78fca2ad26d75c7f7883ee790bd86568e935c +size 58720256