diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..16d5d4a4fe069c8e077ca7fed078ea38c3bd2840 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,14 @@ +{ + "": 32011, + "<|assistant|>": 32001, + "<|endoftext|>": 32000, + "<|end|>": 32007, + "<|placeholder1|>": 32002, + "<|placeholder2|>": 32003, + "<|placeholder3|>": 32004, + "<|placeholder4|>": 32005, + "<|placeholder5|>": 32008, + "<|placeholder6|>": 32009, + "<|system|>": 32006, + "<|user|>": 32010 +} diff --git a/mlc-chat-config.json b/mlc-chat-config.json new file mode 100644 index 0000000000000000000000000000000000000000..7084b8a60f3881a23eb16b1141387eb869992a4d --- /dev/null +++ b/mlc-chat-config.json @@ -0,0 +1,128 @@ +{ + "model_type": "llava", + "quantization": "q4f16_1", + "model_config": { + "image_token_index": 32011, + "text_config": { + "model_type": "phi3", + "hidden_size": 3072, + "vocab_size": 32012, + "num_hidden_layers": 32, + "num_attention_heads": 32, + "intermediate_size": 8192, + "rms_norm_eps": 1e-05, + "num_key_value_heads": 32, + "position_embedding_base": 10000.0, + "context_window_size": 4096, + "prefill_chunk_size": 2048, + "head_dim": 96, + "tensor_parallel_shards": 1, + "max_batch_size": 1, + "kwargs": { + "_name_or_path": "Phi-3-mini-4k-instruct", + "architectures": [ + "Phi3ForCausalLM" + ], + "embd_pdrop": 0.0, + "eos_token_id": 32000, + "original_max_position_embeddings": 4096, + "pad_token_id": 32000, + "resid_pdrop": 0.0, + "sliding_window": 2047, + "torch_dtype": "bfloat16", + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" + }, + "bos_token_id": 1, + "hidden_act": "silu", + "initializer_range": 0.02, + "rope_scaling": null, + "tie_word_embeddings": false, + "transformers_version": "4.39.3", + "use_cache": true + } + }, + "vision_config": { + "hidden_size": 768, + "image_size": 224, + "intermediate_size": 3072, + "num_attention_heads": 8, + "num_hidden_layers": 12, + "patch_size": 16, + "projection_dim": 512, + "vocab_size": 32012, + "num_channels": 3, + "layer_norm_eps": 1e-06, + "kwargs": { + "model_type": "clip_vision_model" + } + }, + "vocab_size": 32012, + "context_window_size": 768, + "sliding_window_size": -1, + "prefill_chunk_size": 2048, + "tensor_parallel_shards": 1, + "max_batch_size": 80, + "text_architecture": "Phi3ForCausalLM" + }, + "vocab_size": 32012, + "context_window_size": 768, + "sliding_window_size": -1, + "prefill_chunk_size": 2048, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "mean_gen_len": 128, + "max_gen_len": 512, + "shift_fill_factor": 0.3, + "temperature": 0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.0, + "top_p": 0.95, + "conv_template": { + "name": "phi-3", + "system_template": "<|system|>\n{system_message}", + "system_message": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user.", + "system_prefix_token_ids": [ + 1 + ], + "add_role_after_system_message": true, + "roles": { + "user": "<|user|>", + "assistant": "<|assistant|>" + }, + "role_templates": { + "user": "{user_message}", + "assistant": "{assistant_message}", + "tool": "{tool_message}" + }, + "messages": [], + "seps": [ + "<|end|>\n" + ], + "role_content_sep": "\n", + "role_empty_sep": "\n", + "stop_str": [ + "<|endoftext|>" + ], + "stop_token_ids": [ + 32000, + 32001, + 32007 + ], + "function_string": "", + "use_function_calling": false + }, + "pad_token_id": 0, + "bos_token_id": 1, + "eos_token_id": 32000, + "tokenizer_files": [ + "tokenizer.model", + "tokenizer.json", + "added_tokens.json", + "tokenizer_config.json" + ], + "version": "0.1.0" +} \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..c0adab2ba2d75d791b526e2a1a7cbf74e733212f --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,4183 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 2149464576.0, + "BitsPerParam": 4.388496039030708 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 49170432, + "records": [ + { + "name": "language_model.lm_head.q_weight", + "shape": [ + 32012, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 49170432, + "byteOffset": 0 + } + ], + "md5sum": "e977e1d99e35696bc3afeaa75f34699d" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.20.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9e17e219937c9a198aba2ecdfe11f938" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 23460096, + "records": [ + { + "name": "language_model.lm_head.q_scale", + "shape": [ + 32012, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6146304, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.20.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 6146304 + }, + { + "name": "language_model.transformer.h.20.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 6152448 + }, + { + "name": "language_model.transformer.h.20.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 18735360 + }, + { + "name": "language_model.transformer.h.20.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 20308224 + }, + { + "name": "language_model.transformer.h.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23453952 + } + ], + "md5sum": "30fa7d1a7b4660f56797088e0b5cfb6f" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.21.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "67eab4fcf9cbd1606df780ac17e8175b" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.20.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.20.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.21.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.21.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.21.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.21.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "c9991e24ab15328bd4149b9ef6377897" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.21.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.21.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.21.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.21.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.22.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "87f4b37ffd5821e5668bb894dd1022f7" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.22.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e8ceff6f44e26265e9f6fc92d09b9159" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.22.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.22.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.22.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.22.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.22.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "915ca0f8f0201387e420d02e099569f7" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.23.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6383b04e4d19378c72106ee386120f35" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.22.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.22.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.23.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.23.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.23.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.23.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "495f09e39f8c99d4207327a1a6759041" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.23.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.23.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.23.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.23.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.24.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "abe49a7b8bf6bf50a6e2dacf4acdbc53" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.24.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1e764fd4a60ed88577f9e9b77bc08619" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.24.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.24.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.24.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.24.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.24.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "d01f74a0048ad42c6ea6c605a1313304" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.25.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1a85d0b2174d6745ae44b944b90e7bf1" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.24.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.24.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.25.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.25.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.25.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.25.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "0acb48647a621095c00f16dbfe2e4912" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.25.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.25.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.25.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.25.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.26.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "0e6eec470bb41fa90769f54cdecb5ac6" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.26.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1c5dccd984a028491ca2f98fbb5599a4" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.26.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.26.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.26.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.26.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.26.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "603429a9f3dc0aeaef5fa1b089e5ef93" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.27.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5fc58acf1755c56a959878dcfe50573f" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.26.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.26.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.27.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.27.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.27.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.27.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "df45ab87fc2438bbe118c654ceced54e" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.27.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.27.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.27.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.27.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.28.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "c8702198114de4128e2cef3cbb2ecfc3" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.28.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d9c97dbff01268aea20cbbbddb2f8daf" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.28.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.28.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.28.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.28.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.28.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.28.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "7a8a5ac6573137fc7aac96baeabe86d1" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.29.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2f419fe0426474a07abbf77beba33d22" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.28.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.28.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.29.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.29.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.29.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.29.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.29.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "6f2e52c717c39dac73c5bac0352b5932" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.29.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.29.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.29.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.29.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.30.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "394dee2235343347a6bc1fcc41f1837a" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.30.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e364e2b8c05055ffc4ddbd339888fa7b" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.30.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.30.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.30.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.30.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.30.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.30.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "6b54e448177ad9e5704e38b9c1bf1c61" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.31.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "bbc282217aa869e3b2b74ffba3f77ab6" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.30.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.30.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.31.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.31.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.31.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.31.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.31.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "431417ffde8ba479674b69719a156c50" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 49170432, + "records": [ + { + "name": "language_model.transformer.embd.q_weight", + "shape": [ + 32012, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 49170432, + "byteOffset": 0 + } + ], + "md5sum": "d91fa9d4488dbdb61cc1762eb59eacbb" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 27392256, + "records": [ + { + "name": "language_model.transformer.h.31.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.31.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.31.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.31.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + }, + { + "name": "language_model.transformer.embd.q_scale", + "shape": [ + 32012, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6146304, + "byteOffset": 21239808 + }, + { + "name": "language_model.transformer.h.0.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 27386112 + } + ], + "md5sum": "400abce47bbd58c3ee50197e8dfbc95e" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.0.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "60284f4bbb9aa062a1cad7c88d450326" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.0.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.0.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.0.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.0.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.0.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "1d732626957bda9cd13a25d9002978fc" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.1.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8b8c80c26c413541b633545686c9ab80" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.0.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.0.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.1.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.1.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.1.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.1.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "f156bc8c39bafc7e8d062ac22641ff96" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.1.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.1.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.1.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.1.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.10.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "eb95dbaaf3d1ae5bf658084b4372fafc" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.10.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0d623bf57406c64bb861971099e3878a" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.10.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.10.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.10.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.10.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.10.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "b0b125d04e4cdd73f14f0f1b92cbe352" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.11.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8d91c01c2610dcdef0034b6d74b9f083" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.10.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.10.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.11.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.11.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.11.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.11.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "1277d25ffc04bd8db42bf50b29019276" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.11.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.11.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.11.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.11.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.12.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "ccdd0555ece64a13f91f0193ae115afe" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.12.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "71c6b534184163da0973884b68627ada" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.12.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.12.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.12.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.12.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.12.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "37bec1479f549bc4a4fce453359a0b18" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.13.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0a32234aa7f4f1720bf5b2ceda686e8f" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.12.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.12.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.13.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.13.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.13.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.13.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "38982e1ba4416a9c1a43fe23ef6a46b0" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.13.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.13.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.13.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.13.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.14.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "88066c8a3cc31f8da54d2dac1c888176" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.14.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "274fe6271f13e75198e06c1dd0f6fbcc" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.14.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.14.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.14.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.14.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.14.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "f011ae0479e1ae9cadd65487086b8ff2" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.15.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "cbf764399d66945ba9e7b6d2b9b747c8" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.14.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.14.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.15.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.15.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.15.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.15.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "cb7c6fa3ccec40103314c3d6d34ac0e4" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.15.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.15.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.15.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.15.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.16.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "0e6f428e8f50c4fa29071d4fc83f4ac0" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.16.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "426658c8be2e813117593dc718f7ece1" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.16.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.16.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.16.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.16.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.16.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "a03418777dfac087a063e6e2b72d159b" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.17.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f18103d0f2dd174586525efeb592469d" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.16.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.16.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.17.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.17.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.17.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.17.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "915b83cad2a141e9a09b6e925de00a34" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.17.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.17.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.17.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.17.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.18.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "7ca33d293f0f08ae7b269aa04b0c4421" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.18.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1228ddf709245f77cb3699d8dc21cd47" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.18.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.18.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.18.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.18.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.18.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "88ba71c35188ba3a601bfc2b95df5801" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.19.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4229683ad46dd5f51cf576cf689ca2ce" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.18.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.18.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.19.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.19.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.19.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.19.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "c337a9b1bec4e93b592b0866fe0e782f" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.19.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.19.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.19.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.19.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.2.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "8e3e19f3e0e6aff075c743018630fced" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.2.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "71a8a1db3a46ba03bb2a1b225b1b7ba0" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.2.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.2.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.2.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.2.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.2.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "69d90a06c585ec0bf90785cfd72ef049" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.2.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.2.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.20.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.20.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 20643840 + }, + { + "name": "language_model.transformer.h.3.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "b909b55efdbb3a9da374ff8d8f138670" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.3.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0f1e66e48a15b7fcbbd250503b0d1e4f" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.3.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.3.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.3.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.3.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.3.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "af9d5247cc55ec8f931dcc1d0b6fe183" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.4.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f2b9ee314c5c94d3c731d31656417841" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.3.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.3.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.4.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.4.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.4.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.4.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "997c2c628c50712b50d7f26c26084d3e" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.4.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.4.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.4.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.4.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.5.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "4143b000c6ea0317abf7ce367d965a1e" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.5.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0aff6bb46fed4f8503a0fe2640f36af6" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.5.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.5.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.5.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.5.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.5.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "91127a278d13aff4d42c6f29f1f09d47" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.6.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b14b9a1ecc2bf637a18604ac9bf702c4" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.5.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.5.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.6.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.6.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.6.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.6.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "dce47da18f429ae05574459293e396b5" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.6.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.6.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.6.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.6.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.7.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "5856ef4af8d4312f507505180b614fa5" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.7.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4ae17cb79f6f2c8296adfcdeb202a4f0" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.7.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.7.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.7.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.7.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.7.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "42972fecba07f6be38542ce92ff3b551" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.8.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5094f00a0e50ae507b92f70a183b3284" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "language_model.transformer.h.7.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.7.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.8.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "language_model.transformer.h.8.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "language_model.transformer.h.8.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "language_model.transformer.h.8.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "language_model.transformer.h.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "a3ec0d681ba40d39e13f68678c81eb56" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "language_model.transformer.h.8.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.8.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "language_model.transformer.h.8.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "language_model.transformer.h.8.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "language_model.transformer.h.9.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "43bf093dde1a08e0f83d30439be9f5cb" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.transformer.h.9.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4a4a978152943f8d142761654e04fe71" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "language_model.transformer.h.9.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.9.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "language_model.transformer.h.9.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "language_model.transformer.h.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "language_model.transformer.h.9.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "language_model.transformer.h.9.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "3e5b484d31b787c98c58431f4cfc5427" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 15925248, + "records": [ + { + "name": "language_model.transformer.h.9.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "language_model.transformer.h.9.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + } + ], + "md5sum": "bd9301b52f682db4db7e91df8cefc931" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..454b2325086775a3c442b9f00764713c2ba880e0 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:012878f3317721928042a6cd2773081ec80324608d40ab47250cfc90d4790047 +size 49170432 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a1a694e179f7f2236c2385a989366b9431c99e6 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be3b4fcde84935fa8c57a3f886081fbc6e1be4f4d8057725202b6d888c07c83 +size 25165824 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..6cc744de5914f470ac8ff9f294fa23c6e126b17a --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93f8d18d0d10080f658eafb5bab940b6bf7dedcf6f0997274dfa937b10e9460 +size 21239808 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9957010f0ea63c3cab663b94015d6a518d47e71 --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33873aabda254c230b32e4edf7973b33091f5dc03d9840de50ecae2658430d9f +size 25165824 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..dfe7a6a615bb7b88854f6c0e269e32c535f31d80 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da36a93f2312841d761af65b3fa6edd1929f4ab60cf8264a0c4c302fb06e3412 +size 22616064 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d1a9cc82e2ee9efcec973e1d5e133eeef5c7ae4 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6afc66b33fb941da710d368a1fc49c2c0b2da3ddb4b378eda12d6fd1d844d2 +size 25165824 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..e02202e1e45f851096c0ad430e7e28a53aee054d --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5a82ac917bf0ee64a77d373e1f913185f575082eabcafbf4d9cb8046f66ccf0 +size 33239040 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..4836d28fcc0abc75b9f0232afaf21f141e54d254 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c02cefebf41529b5bf439256bd7081f85e41733c730bb209974f52d589cc3b +size 21239808 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a81e9e08bc36d807db02b1fba27140548044d74 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f900f620e62620a219fe773c52187e02dde9ce4b4c0369b25721ca5b760141 +size 25165824 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..a057c2db43545f2902b588dc8e2dce40117547d4 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d419a27aa255eb8f72715fbaa9c29fb4468baba2ead999d89c816e1382ec4c5c +size 22616064 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..c86e8396249658d39343b72786ac0cc784099ef1 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec75459ee44cf141487ae133a421ca2be2beb95b750dcc64d02cd604e0b1934e +size 25165824 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea55e5b51007e7585ee2b5d709ddac57a2bcc6ea --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fad1ce53aff642e2588c1f6af5fe20a2856d834d0590403dc1f8729fdf2c60c +size 33239040 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..077f8856e706543f78a6184fd06be3080fd6b4c8 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08d5adb888497e87774c0d0fcf41d8b175fb2e140957873fdeb3f39848c502c4 +size 23460096 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c571da1049778493cb456384cc9e386853b1233 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45a40cd3f9c4f89df515a707c65191cf3f64c204c9c6195e0cebd5855e16cb1 +size 21239808 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..228972b9387e64df21d633fbefc5519d7fcd386e --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836b9bb80d1bffde911b5860aef5e24f0d742824067073b5422afc6f75d1620d +size 25165824 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b570f0698a6752af792b8f316703e6b2f9f6ca2 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02b71b140507b6c77208fcae3d08adb73717164382d50ecb5cab105ecfd74f72 +size 22616064 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb95d143953dc5d5d5a80ea6270863bbc9c686a7 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56de535180313ea5c846a132ff786bb522e63e2248339fd971ce48cea1cca05f +size 25165824 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..5f7e0ae1164963f24e70a8ea0291bc9d51bee983 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c4ef0d0dbe249444ab467e83512e0765bf14c31990a84937cb50d6fc561fb21 +size 33239040 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2976dbe74656dc6bee7f65f8dba1567f5f0c4d4 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a9c9373151340113a7344bef3b8916665a3cc257bfb7d58773dfff32eb2d904 +size 21239808 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..d944aca47bf4a97562c9a5aeb606539d5ff0b89c --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:897f7d79532522cf5fa3f08d2388fb43a26588f6bf6cce7265b4ef1af261a93b +size 25165824 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..623971a62a72859672c3f38a57a65f73877df911 --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03f50f3b28e9a4f64451fc72a7d4bdc518d51d8e0df5cf1c355c8fe199ebdba3 +size 22616064 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..64411d821aac6fde1790d50e135314523167f7e8 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95a2793e85d6ce7687dbdc1dedfe28534c63bbc9062d6c20f09875dd84dfd31 +size 25165824 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..be94ea28b12ccc76e0f3eda8f79d18735bc5a78f --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5362f85165c2d94652820b31c21033d99aabed92c0446a7d1b20ffbb2d92cdb +size 33239040 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..7ba0836a4d2d1985c7a778a819267f585c9ac507 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d4086bed6a473de0a57ad730383edf27c4b43426412b838e90429549b9fb7d +size 25165824 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..48828694d8840490edbc8c06e98c788f14f46015 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2590997363fa72b314e2aa9dbb45f5b4f51a63d2bbe0912601149b3d41e812d3 +size 49170432 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..a136ad4562bd646c66e9bba340389f26190c60a6 --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:833d93e395bf664c71b26ade4ea289697ee042fb6b888933d45cd8348d975a07 +size 27392256 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..d6cad604289966ed48c18f4e61e6a5aa3d72c8ee --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b349f73e1016e6d954ed74ba1db2479bcaf1614c2c5fb17d257cba7ddee263 +size 25165824 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1f6a84fe9e17a7795079e0357490c9c1773544f --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f02278c8aca4ef3b4f48d2b392c42d044e8c7a709d09766883c38b08faa96480 +size 22616064 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..040a6d5c34c6616693ed6b52c0655ec551418d1a --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eec1f2eae0c0724a92745319ee784edb2a681c37d4313caaeabf62dbea4f2f6 +size 25165824 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..81338ec43d4611dc4fa805ca5e38fb00cd50ebbd --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db939cbc4694fca9eda07bb02f9600e3f3f045a462f4d9c734c2ce0f755409f9 +size 33239040 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..6dd947f10ee0d70d8d26449d0d284663a39f2aaf --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d33cca16674685174887d5fed65015e13d0c11ec3174a1844f2f4e359fbe004 +size 21239808 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ca4b84eb2327efdeb78560cac480c927407e4db --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:011926770845c28b50680387aa05c556e349d8fbc181dda07d482c12d3cbed41 +size 25165824 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..25aad1a76cc23c5c9b7f37dadfa731095667930f --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1143c354b595d9a153dbb92ad601aaad1595e39a63d3ee3c1ad8b26fd502d655 +size 22616064 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..36f109dc6d5b27ea4881fe5e10e14cee050647f4 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ebdb12a82bcea1ca21a9e426853ceb9086bec877925268fad2f0ebaf8917bcb +size 25165824 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..0cae721aed40f5b26c2ad1c34d85168bda0b2bc5 --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5fce8df5f137aaa376cfc9ea1e1d77f82b68ac7ceaeba0ba2c4dfa9e3564810 +size 33239040 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..11a0a52bd9dcf9a159047e70e30c74e857c9073d --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e157ee8bc75c3a60e1152e5af2f3e1df0a37d0d400c831b3e2cd439bbb70c0 +size 33239040 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..958fab5dffaede67979fcf717404be81e0429350 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cee1c59225cf00bf9d842915881c4ee540734a41a65540359b593858d605e30 +size 21239808 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..083a34df2e64c62d4ed833fe95234b4d29d3cda7 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78bdbaa27fa8ef1fc4eddae8dc2a0c75aad17ee0cd82e214ee1c2d221b78db34 +size 25165824 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..eba3fc34ae178e666b48c24524fc094ed5b328ec --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68f04836f61eb7fb026e09d0107d6917dd6d008a5e455cfa7ae4df93107c42d +size 22616064 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..d56a563806771d29dcf0ac3b88bafe11bf3444fd --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4504cd75bc46d54fc6ce8842d02957c896b93b459e45115a7f6dd63d5ced836b +size 25165824 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..8aeda662692039e6c29f45284e9e670f887440ec --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:485286be591bbbe1419f352a6b9a3bd4c95a5ed4e76a390cebd57728958aa5ef +size 33239040 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..36bb0eddc23fb93304fddb75677eda4ce294d139 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65882b28ae410efb09a41d80bcb2406e5026671d34d430c44f8f57310741f259 +size 21239808 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d704c2f5b78b39386b4aef680129fb8a08c6d42 --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3e5cde50e0bc431859e5c24ea78a9908431312a391b859ba6c73aab1ebfb381 +size 25165824 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..7f8787def07304e61809211fb3eba754787dad81 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fec42eae89118d08601a735a1b9b671f71613b7f62f7b887784cb7e290b74de8 +size 22616064 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c73fb8ae28d43226455def7cd8deff252302285 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:938bf8ad03a468b781c59c3d2f397b203906f4c5ae4cfdbad03fa0da4f4e0ac5 +size 25165824 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b41e9d1554a9b8dc233af93763402a85c62f88b --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f48b09abc9adbbd6817c458a87f5d502a41306d192ec9610772f8650d8006e42 +size 21239808 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..e29bb38d59ced51829bce50f4f3cfa8437e21044 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c0455059394d4baa0cdb5858166a764231b628c6a1719f04f423ca05da87dee +size 33239040 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..7ca600edb7bcef19fc42495c701276fc1aa3e8c8 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb201945c4df06e0c3bfd8387cdf2faa9a3f1363f2c60e9eb8135a545bd535fc +size 25165824 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..b79364db75c294eafe1581497892bab5471ad13e --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c67641b3e6d88827c05d13ba9d54ca4d55bf19bf35d59ff584b438c35b7ae28 +size 22616064 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..436013824e67b15f86f2192bfe56221f2bf0ca6c --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a0a6779881a4759c430479c118c3e9d6fa3723860f8353ee518213269025f65 +size 25165824 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b0984f3de34235e35aee593c5d4a8b71c4d84a1 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f194ccd10c4065d6b6fc10e1f6274956ed51e9a15bbdc46712c66b5f33debc2 +size 33239040