diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/mlc-chat-config.json b/mlc-chat-config.json new file mode 100644 index 0000000000000000000000000000000000000000..25a468e638110f5a8f9f8594aaff56dec8313a81 --- /dev/null +++ b/mlc-chat-config.json @@ -0,0 +1,45 @@ +{ + "model_type": "gemma", + "quantization": "q4f16_0", + "model_config": { + "hidden_size": 3072, + "hidden_act": "gelu", + "intermediate_size": 24576, + "attention_bias": false, + "num_attention_heads": 16, + "num_key_value_heads": 16, + "head_dim": 256, + "num_hidden_layers": 28, + "rms_norm_eps": 1e-06, + "vocab_size": 256000, + "position_embedding_base": 10000.0, + "context_window_size": 8192, + "prefill_chunk_size": 8192, + "tensor_parallel_shards": 1, + "max_batch_size": 80 + }, + "vocab_size": 256000, + "context_window_size": 8192, + "sliding_window_size": -1, + "prefill_chunk_size": 8192, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "mean_gen_len": 128, + "max_gen_len": 512, + "shift_fill_factor": 0.3, + "temperature": 0.7, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.0, + "top_p": 0.95, + "conv_template": "gemma_instruction", + "pad_token_id": 0, + "bos_token_id": 2, + "eos_token_id": 1, + "tokenizer_files": [ + "tokenizer.model", + "tokenizer.json", + "tokenizer_config.json" + ], + "version": "0.1.0" +} \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..1b270a5d352a54e09f93e683081605b228b48792 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,3881 @@ +{ + "metadata": { + "ParamSize": 283, + "ParamBytes": 4802697216.0, + "BitsPerParam": 4.500235859834132 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 393216000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 256000, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 393216000, + "byteOffset": 0 + } + ], + "md5sum": "752374fe75aeec625f154bdba2af2447" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 49152000, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 256000, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 49152000, + "byteOffset": 0 + } + ], + "md5sum": "67ce689d7e81e4d7e847a5fb1dd51983" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "746ad94dbbc5e5c3946d13b5bf727e90" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "49adfd54eb8b3ab4e1e89e9dd3cdf47e" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33042432, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 6144 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 4724736 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14161920 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 14168064 + } + ], + "md5sum": "275395bf4fb959d0a8c89cfb24762dab" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "a1c5a7d8333657710b4dcaf2f8554be2" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f8a5be8a43625f7dae1d6a320dc985a8" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "472d2a7b0930900e3a25c6e78988b159" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "17362291fc07e4cadc28d9443c254f84" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 33048576, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 9443328 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 14161920 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23599104 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23605248 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25964544 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 32256000 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33042432 + } + ], + "md5sum": "20c05b2a3fba919923aaf12970efa0ea" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "deb3af66351b2089ae9f8696f3519d6b" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 33036288, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 4718592 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14155776 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 14161920 + } + ], + "md5sum": "7801a6d91f0b4f1931d8a1ca8339ad2b" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "b73413de2a24bdf9a702b8be703a3915" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9233aeedb818c823fdbc8d63ea04e413" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c6207691bcd18659d1b0e384c061120d" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "d6fabb1b11e1eb51a297ffa3b684116a" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 33048576, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 9443328 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 14161920 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23599104 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23605248 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25964544 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 32256000 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33042432 + } + ], + "md5sum": "9b7b81f3b8a8ce23e8362ca840e16727" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ee9fc39bfbf7689188bfa0921485eb3a" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33036288, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 4718592 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14155776 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 14161920 + } + ], + "md5sum": "d723cc19fe2c7583878c2892548dfaf8" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "4a8043e0455315b8f52fff4e9e2d4d94" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "374d4dc68dfdf03c8240a7342d0df8a2" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "6aacf0adf550fb907fc87fb9a948cf18" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "9a249cce2324da725e92f0a97b7f0236" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33042432, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 9443328 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 14161920 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23599104 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23605248 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25964544 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 32256000 + } + ], + "md5sum": "289977dbbcf0627a243efdee02e33637" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "0e70c5559843d0f3f6e4aeb70f8cb16c" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "de718bbb230ac8e592a71b981abd5547" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "17850eaea95724f6f3c09c7dcd974454" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "b3cffd71d62beafd2e3ac066fcf052aa" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33048576, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 9443328 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 14161920 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23599104 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23605248 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25964544 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 32256000 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33042432 + } + ], + "md5sum": "f7c85ef13a43237fa5943ed990b13616" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a6fd89d0f77762710e7dfc0ea3f865a1" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33036288, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 4718592 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14155776 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 14161920 + } + ], + "md5sum": "18093e8c9466adbd15fd8f76296eb0da" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "b063b6cfc160691f5584a9cf41739a0d" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "6d4e2c09057135b2dd07b60bccd375b6" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "3034411e64e200cff10cd54a935986ac" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "f5f276e4f4c0142e194de0bff2286947" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 33048576, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 9443328 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 14161920 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23599104 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23605248 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25964544 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 32256000 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33042432 + } + ], + "md5sum": "659bb12be1ee9cc3754feddb1357773a" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d03356ef88c3d69bd33a5fa1ca2c6e19" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 33036288, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 4718592 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14155776 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 14161920 + } + ], + "md5sum": "238920c9d8c5d46753599401e4a4979b" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "d0a82389fc83c2991b349df03e29713e" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ea5d93cbc3fae66165fe0f13224abaaf" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1f81a9b75a3e7e237668c90d852e4ec3" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "50bb1467a0ddba10596ea56fa3abdc7f" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 33042432, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 9443328 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 14161920 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23599104 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23605248 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25964544 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 32256000 + } + ], + "md5sum": "e761bb62069ee6aeca9d6887ea1d8857" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "4a05c49ebed9b7d718073b183e8089a7" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "aeb1493a8fbe560c6837e0f5e691f668" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "95fa6545ae01c6ba07721595a1cf2a29" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "683f161e289529cb7201f33ca58ffcea" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 28329984, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 9443328 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 14161920 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23599104 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23605248 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 23611392 + } + ], + "md5sum": "da47319eaa593ffd13886df802e1da66" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 30676992, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 9443328 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 28317696 + } + ], + "md5sum": "402604a556d211e06d2104fe58ff96c3" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "61536dad74dbab013377a94eeb70a14d" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9b597d135ccf9004b6fbd04764ce4c70" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "232c9ae1cad87524740d1ba388f3039d" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "62134d1562e43cdce27ae77c264f1e81" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 30689280, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 6291456 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 7077888 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 7084032 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11802624 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21239808 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21245952 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 23605248 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 29896704 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30683136 + } + ], + "md5sum": "88c532b62aa545e0953d8dc4cafbba5b" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d300d64fe443737a8c98b072b8912cea" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 33036288, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 4718592 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14155776 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 14161920 + } + ], + "md5sum": "9cf938fc1573f90129b2412574b8d9e7" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "103abf44635b18b7bb456662175bb5aa" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "5e516279b5f807aeb382b85d12b79da6" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "a5b65d598ca50a2428d201280bae33eb" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "28dbcc483766ec6b0c426b53a87eb5e1" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 28329984, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 9443328 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 14161920 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23599104 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23605248 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 23611392 + } + ], + "md5sum": "2dc22bbece44fa639ad72db56c28acf9" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 30676992, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 9443328 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 28317696 + } + ], + "md5sum": "ee563289d28ecd5f03bdd3b452bc1432" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "44964973b881155e5f588ab19d677420" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ee430d7c3ff86f5cd41188eac6322a52" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e43bdc9900d919f8d48eca61049172ad" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "02828fe9d6d6bf4d930867c4d005a1c5" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 30689280, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 6291456 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 7077888 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 7084032 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11802624 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21239808 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21245952 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 23605248 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 29896704 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30683136 + } + ], + "md5sum": "6e6ede6708786b529bc2bd386998d636" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "67d320c0608cedb1c0b6ddaca05529e4" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33036288, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 4718592 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14155776 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 14161920 + } + ], + "md5sum": "954c2acf938fea71682295d2f5fb0268" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "272587536bcdaaced3b76153138012f2" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "8026f1ef2c15eaa109643418c1091608" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "ce35d8725f21746904c59c40b521560a" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "bc3655dbc318a34cc4b8d651e3e15b6f" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33048576, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 9443328 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 14161920 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23599104 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23605248 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25964544 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 32256000 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33042432 + } + ], + "md5sum": "4bc7af89c43b230da07154be89392246" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "8332352971e7af2cab9e24bfdb1359ab" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 33036288, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 4718592 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14155776 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 14161920 + } + ], + "md5sum": "cddc31ae3164b327927d853519c99caf" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "84cbbb70c18bb2c8e85f870bc8d653e5" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "607b26798107e4b0fa7c3b1f49fda4f1" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "7003e513bf098f7b563fb2e17aa60cf5" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "08d04ed6d828c611bf911336e2ec3c24" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 33048576, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 9443328 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 14161920 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23599104 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23605248 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25964544 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 32256000 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33042432 + } + ], + "md5sum": "93e8116fc5a867e452624fd46feda2f6" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "960c6a79997443b10271b08cc0cedab4" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 33036288, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 4718592 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14155776 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 14161920 + } + ], + "md5sum": "8277c7c01db1e7f0ef83f36415f5c5c3" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "adf2acc291412d70bef1def2858d27ec" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "6dd2ca671c40b63024612c503f646646" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8068db63028fdcbf58d9c115e74189df" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "444089513c22aeba4cb891d614382a15" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 33042432, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 9443328 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 14161920 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23599104 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23605248 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25964544 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 32256000 + } + ], + "md5sum": "0f940ab38043aed006d96d3188d0cae3" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "6f701fd42867ede64499f94735822adb" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "7b4b65f4fb128f3ac1d0f7d6e8229b87" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "118cc7806325ee0aca5c51e6ed610325" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4b9d8697d5b03b77e8a5b2c717feb9cf" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 28329984, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 9443328 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 14161920 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23599104 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23605248 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 23611392 + } + ], + "md5sum": "4931e61db7c99f002a56815c1cc6710d" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 30676992, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 9443328 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 28317696 + } + ], + "md5sum": "5605e054ee014037eb1b1cba8eeca4bd" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "c81766fbfdb81748a71a02d6dddaae2c" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "7f341d507f778de05dcb0488b6c400a2" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "ad7262dab759a60a8095f5e0fd741651" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "3e16173cea5901db081dfb9ed64aa10b" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 30689280, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 6291456 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 7077888 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 7084032 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11802624 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21239808 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21245952 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 23605248 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 29896704 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30683136 + } + ], + "md5sum": "556ecbf627fe71befbd3cbc4fca9c223" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 384, + 49152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f12eb6c39de5424dd4adedd9657e5d5e" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 33036288, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 768, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 96, + 49152 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 4718592 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 14155776 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 384, + 12288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 14161920 + } + ], + "md5sum": "a69aa623d966a661e7e77a24d84d954b" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 9443328, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 96, + 12288 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2359296 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 128, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 8650752 + }, + { + "name": "model.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 9437184 + } + ], + "md5sum": "7cbf7bd122b10968c893e9eb861abf72" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..696d29af1d6c91e160b6b37b0acf4a8caa9b37cc --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb07c314a0bc54bb896a328e543ba0b50dd49ee4e7d518a00daa14a550a76ba3 +size 393216000 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..e6ada0881ff56be16012a53e7fa2beb7c2c61423 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63fa7708104b0af3b9c6b126dee669516ac878feddd8a8dcb0e2509df9e7ccbc +size 49152000 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..435db0f3e561a7be5df46bbd593f1a06a749e0c1 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65961d7b8025fc7f14d8b6c580687f648f12a656b28a789ebb18e9356442160b +size 75497472 diff --git a/params_shard_100.bin b/params_shard_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca3dd870df7b8561e9ddf36ca4cdab24ed55b2cd --- /dev/null +++ b/params_shard_100.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f76c3df72da9b7379488f5c6bdef11085d4a809036b1353f1bf61eb48cb2eb8 +size 33036288 diff --git a/params_shard_101.bin b/params_shard_101.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1f429c1094c5a9849391340117e76b793ddd1af --- /dev/null +++ b/params_shard_101.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a45af99bf69b84e195a3df0851043a164d9e684becbd9121d164954c53c71b58 +size 9443328 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f0a7220afd104e2992290f596b9f18c0739b26e --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f4913f62d105a34cbce074ba5c264edaec67632fea802ed66f92e3653c6bc78 +size 33036288 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..11b5af3dbf1e265c95642f4d14ae06fb51ea2925 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb01f30569e854a6187ffc06f2ce19cb7aecbc51dc481b08265a9dbdd33c2da3 +size 37748736 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..4438dc432aef6136655a252873331b332b9bbe99 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b0d82858ec376e555a47e98fe1dc8a8d6a2fee712134f32205d05f680fefc96 +size 75497472 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..bed417645799a53b6853161618d1a4721b2a3522 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26f9be9512520dbc8b715c629f63c92a45175a04f10cf822eddf7ef084feaeb0 +size 18874368 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..b33e7034fa4b153199dda044283f6964d9f86b71 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0333c009a47710d8873b3920d99bbe37357f80b34dfa2f53190d49d62e502024 +size 37748736 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..905b56ed29006d1fa860dc7d57556e68cd517a13 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb6fba705ada839bad5656a5451ce4de219c19a465e533d4fea5852afa77ddd0 +size 33048576 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..43c68b78d006f701c710402410264def677020e9 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2cc8dcae1634c4427ab4df0b7144ab1976e8444352c0d754fb2a10615547e9d +size 75497472 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..256023b0a29f09b7109b07208c2d1a9247952d7b --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:470ecabfeeba502b9def35bd8781e1cbd4016f9e6c9b9f022b851bd25bbe70ae +size 33036288 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..03ffcb3d1b8e0bf0c5a569e1b2d5d60e2b68fbc5 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:968ba69d9606f111cc3b940f2c617ba02602c53dfc042539aef11eb5216fdc21 +size 37748736 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..c8c2b79ca88e1fb9909861f06dfe84938ed8c23a --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3838dfc032d4e33d76e2d3685be1b2812378e8b8fe93d6cac5021278eb0cc8f +size 37748736 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..f254a2734a87ea57ad0775f88a1c76f3f29b276d --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cde06b4b48869021bce3649f0c4807d4f3eaa457bee16f9dd114a3f2c1390e2 +size 75497472 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..e28e247604995e82bb3838510d6614c537190486 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1816489510674bd335922204de404baa03034bc7a9313b42cc7711a11e21ac3 +size 18874368 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fd5799d2f197730c704d9812656666d4f921cba --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ac6e28bbf2e1e7f1377f55fc260d1ca845933e4b7e0f2e3256f43cc36f6a65f +size 18874368 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd11b2e139a2ed0bbe62ed665d9eaf4760a625ab --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93a445724eaed413f1e0f1c1d45e0fb377f32291028c8bccae03dfe190b3d0d8 +size 33042432 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..998da04625322f7e3a665450457e79a378c6d0f2 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cd940330458e65d040eb511d1118f5ea32ef67df22cf6bc9b287e1bbbcffe47 +size 37748736 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae78cc4069d5cb0b4b1702bdcd2a133dd24bf4c6 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:110bde984fd473c04a88eeab749131fc8b8e6069ad272e291dc42cce3b0f020e +size 75497472 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a6d3891f7ed735ccf7c2a4b61518da34160e5af --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66a31c4c1d94fa4163e6ff3048395ca3abd4d8c2ff3355c7ac1ad484629d65b9 +size 18874368 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..b1b25b3800091d46ffe3a84423cb0306e2d7904d --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b091f498769fba2da0a0d050125015f09a0e97f6d2f2192348268ad284b876ac +size 37748736 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..1faa30ac93c1c0d60b110dc350c74e3c7d06ecab --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb75a91c1e30ab59d53aa09854e951193b88aa3b39fc25c011e5fd327aced7b1 +size 33048576 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb6bd354bb25d75496f9cb99356dd6d4ba79561c --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01e8902e5a99de97fa944f881beb82096e571289d4d607fa3a4285f4f86ed332 +size 75497472 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fd173dd03ea61e547875e90333113ad47e8b20f --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ef6f434167e77347331a631c335e667d927683ac90c419cc4fc02ebccc7af7 +size 75497472 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..351b012a9e5df39849f45515ad8408268b78b2d6 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ec3a26bb17c69dc2007f34ad678c06974289a45172c1a6b9086b0c0ea924b55 +size 33036288 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..e20173e2c7b693c04494fe35f83c6349e65ceb4c --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0b53780a6a026c810a77dc6af809d463075ac75ffed57059cfc59f5b797abc +size 37748736 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..411421a92a0e843b04acb13c7939a1b7d9243b95 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b8c0ed3b7e024f916e66bdd828122520b3baf680805f8872cfdee446ff61ebe +size 75497472 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..2eb582f0c5223093e583e1aae2c4604b74d0a440 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee36f174bf6b9aa4b988ffdcd575229a2e7bbd2b0aa9dee46ee33e6f6d909d1e +size 18874368 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..13f88be9c0d8e42abda3dba296970ecd6aba8e0a --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f52633ac0c6f9c0a6bfef93bd006bb22b9bdb376026b7dc0e1c015cf8b14099d +size 37748736 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..939cc24a0090c750877a8dacd267a29d0de9dc73 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63616ab7b04d2d2fb07680677e986becad34ec272f19be98ce387a21fcc9a43 +size 33048576 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ce1e1b82242ae3808006423c5d793cbf60c616b --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:479ce0fd5b1a66b9bc57a6f51532fb00c6c0dfbf0905d37bb543f01099a18ff7 +size 75497472 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe0d8c859e9bc1222823201f58b11558098dc32a --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:039128ba09120f1c4fae1d310b75b57978b8b8d8446b39ef5a77fbf886d751be +size 33036288 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..efaf8171e7ce53a5032200276917702c9a0f431e --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7119f7daa99f269f8cebafd5b96711e11749c6d35d22f01093b00fb773c5ee1 +size 37748736 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..695a3ccdfda131d9c487e1eb05bbff16bc0dfd82 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13b52dec000f552de9724cd54eb2324b6c34ceef7086cf62efb98b256cfe2337 +size 75497472 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b51091be7e89c7a8c11e1f461455e2ec963ea10 --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27320b23591ca86026f1bc6d7ec6bc040b970ffb2be0a6117c644f68ed7447d +size 33042432 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..c5e33f5b5a31a92e6bc7a6f705b78243bced0aa3 --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:613ffc0f63fedf7ff871fc5164aa3be3b344e4025b002372ea219cd9af06e66a +size 18874368 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2f835b274054cfbfb77f8db4f31b62bebbe9441 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f5d2bba620638ad1a8033ab318a78e0c5676bccb830fc92dcad05c72dfd3e2 +size 18874368 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d0011ccbd114ee903e0d2a6e79ab3c266c84899 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e7b09e3033aafe32e3b3ee9f8124492b8e3f4db0c9a63aad5b0bf340118b41c +size 33042432 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a658aa353602874dd0c794062de5cd5dcfb3b8d --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04c7929c277aadc6cef83fb949f50dfdad9a7b01de7ee56672f0bc702b4990b0 +size 37748736 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..26fffbe07673bb13af01b62c3998750c88ef1d1c --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79ae0ad5893cacdff1778b766d55e6ba6e6635ad8818b63d7eec2f833fc2c6c +size 75497472 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..928231dee97602a33afebf2465f0f92123d0a5c2 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b73ada6489689750fa1da5218f63737042a317841e5448aa338c3b335001b3ba +size 37748736 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..76f61d92025e81e596bb1f27cc3799601e342fcd --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf241480f96c85b20c931c3dddb6ff889a782362ee099e9329529a14c2c0813d +size 75497472 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..17f19e8303c65fe17173ecde75239e2aec987ff2 --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503c03712dec997620bc70958557df654cc0c38936e4b09e898317fa55feaa1a +size 28329984 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..e927243a4c168448f670328a59d250f75d64cab9 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3672ef6d074d87594884f8419cb3f2b10abb9f931a5b9ee02bb478a9690958cf +size 30676992 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe28acc9563514983b1d8c1215173724a31b39c4 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:924508c0d5338ac455fcca38b8f4f5d135ec420d7c78ea4a2a81b3c86eca9297 +size 37748736 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..1950338610e460ee21303316d4ffd25e84add903 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e491d1c6f159b48bab1639e11cc6032c3474326dab6f987bf8641bc382c5fc +size 37748736 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..025c051562d3524eb61d453e4952906194996218 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69b9f408bace22685059dcf06e908c2de2c5f5f965222fe83de27e0fd721237c +size 75497472 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..647a55ad22647fbb97d77e0e3a5cc7591bc00370 --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e10a4bb07f191988d700f3977cc617bd9562563829c58450c9708281e7d5d94e +size 18874368 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..0c6b47d1407cee52106d5167459e52ea021383c2 --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09cc31703388659c9b579dfc8d85c5866c12790563c20c3826cafcb3791b7ff7 +size 37748736 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c4f4004f194003328ff1e848997577acffab65 --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bfd6978f77ce8680db2b9d41c5a34fbd0f99d7e02f1f6c3a02ea8f08d576715 +size 30689280 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a1c61d44360ed31c01d3946fabcd504b8f55d23 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864acccac24c4d6bde856f8962705199a05abe01216ed2b6e2578dc402752392 +size 75497472 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..93aa1228d4b3caaab81453542ee74da639a87db2 --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5541034d536cee54c5f70990f47af943b724c364fdd9238ea55dd7a182450a1b +size 33036288 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..dd6d9d8d614525a294ebee0ca112a8d4bbe6a61e --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6a01ea830328261a00706dd6c6eb6d37358ea7ba60508a1724148d54b581255 +size 37748736 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..bf1c0f30a9743145f7879edd44b3c1a14a7b96de --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc8f005bf5c5152dd38705f27377e063d52ce0a08534a9ed2a2e29677954ff69 +size 75497472 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..798c032f5677e04361c428c5e5ab85593b3bf6e4 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e980a0efd956ad34555fcc201b55f254179b37f0221f90f4e7169177edcde392 +size 37748736 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..f294112c0abf642e2315b1de0c3934d418d0446f --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:990071f65c57a01d0c52aee10748c0c10000382b261315b4b24de393b033f3f8 +size 75497472 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..8906cc470a1e7a9b9664a599bec266e1673d50ac --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:581e963a9844a5e405e1da017415fb3d24837708b2b8a0ca098641897f6a002a +size 75497472 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..bfd7dea1e94af1fec020885567b2a506ee09da02 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638329697f980e9d2f61d8d81caf737ab1fcb6a823bfbf540c10806071fa8bd +size 28329984 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..b4c7d40996f009a662222d77ad65012970ecde20 --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b2ac41e27874dde97727ded8c38d7bd0360c52deae4611ec2dcf3a19d45c736 +size 30676992 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..481353a9c709a5ad2c2a846ade60285fd5a877a9 --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10541c3e63a00680a20061630bbf970364d12defcd2cdd954ef7658f0110dac7 +size 37748736 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b97908787f9b8b5e53969906f53ce9f4044f930 --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1690df612af123378684b42579d18160385e6b0812b508f3825a30fe95c7097c +size 75497472 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..26e4c06d87d33b545211120897b2b0b9c9e04039 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1e024b784b6cb467fe15f0dcda9fee7ae285364aca5d7c55eafe45ff526f008 +size 18874368 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc5858df4c472d389ca16f2033c4bc1cb3ed1fba --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc8be16ae1ebca87aaff0663563f8991d6a17c30154035fb51e5b01bd619c34 +size 37748736 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..eef1a369b0b82740c659bd52de45b9b2103d729d --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9c3edb5282ba09dbecb552e64e8ef9fda3d46c0efa715244b67486c873b3e40 +size 30689280 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..337bb748d992d2b62a60a8a271ca9201dd17506f --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67b3fa5cd2137d5358f9af91dadb1a5650a0ff58899706ddedfaf484f7ac267 +size 75497472 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..18e1a0be6708874b6f6988b3d630794d598e7b62 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0f14cc88d5c8e55361e6ccf90dbdbedf608c40f5634cd44751e8d8f84edbec +size 33036288 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..2857c8b07f887732f08d719580ecc1687fafe7aa --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435c69b0854ca4c9289376e134e477731473b7ed44fca66f17febd0bb7539100 +size 37748736 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..ac60da06ada285c52ac206e7de074cca5fe02662 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832a0ba4fab45f71bb88c569fc7bd2ef5f26fa195f54c05bb2391e5568d881ad +size 18874368 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..0608d64feab7f8cf5edf903a9fc45f3b7793736a --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9bf7c6c1a112734e394b560f0604e599747b41f4badd1717e259e33f5feb234 +size 75497472 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..00b02d87d6d8180e6e527463db1a13a97a4b33f6 --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:318464c545c50d56d17be082cdf3a19bccfdae4def91d0fd2f0073e5fdb867ff +size 18874368 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b0220c177db08edcec0f98848daca381b5a1d09 --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f38495209e42097510c891e573dcf77ed62f059d21722b078566efea8416fe8 +size 37748736 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fdb0a04889f23dab93f3abd99822733cea5a5e5 --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b2466a8d47c00124cb6ebbdb017617c583989a84b98835d75e1f6e21e1ce8c6 +size 33048576 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..cc5a790e71e9ea534735f2ab839618830799b694 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc634d34dbe6331d05ce4bf8fdb310baeebc3573d0da325c35513a9d31ce348 +size 75497472 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..ce7755f4e358aa7a6ee56db873f912b2ac668cb9 --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fca34a97e35730fa65f1c4b51bc65a2898cf0eb30a3722839b72e9f368a354c +size 33036288 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..dc4331ecc14c8b9ca1bda63b15a6250cb1f1815d --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b00ee065ab9d9d046dadab1d21bc1ad81eb5808689bdae216891406be3e0a98 +size 37748736 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..e678a2efc2f4bb3e416e266328fcf49f1fd141fe --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60233b0955dcf6055af124e2bb0247839869110bd6c305b421dafa42a7772e2 +size 75497472 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..55f438a8b9a324e5aebe4c0cae2fb20905aaa416 --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f4ab53ca4209c67fcdb08be62cd5eb711edd76057f8e9f50e715c2efda75ff1 +size 18874368 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..df376779360e7938c454c7763c1a1be56762ef99 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baca3e2185745fbf014b003a82aee5f6531a3adc215606e93516c0bbf81f2eae +size 37748736 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..fab5ba61016ef6ca67f4b69f094925f14c2a7eba --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cea650708ced9f7ab50c0cf6b4e976c95d4020f5db325bf129d48e2e7d56d67 +size 37748736 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..86cf0b1bb270f895dab4ecd60f7036596cf55254 --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d70f2ba8642b77ef573d537813763e8187c3ed83d11cc8f4a638cc51d21d312 +size 33048576 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..dff9fd08fbb399d133e5f2be1742d487cc7d710a --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:546bdbba0d75a6ba777856399e5d85352f478f925fb11e703686b4bf3f7612d4 +size 75497472 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..d27c4501f604eb634b95a4cbd7c2726c7cc7eb0b --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba8d39fb62140438227f24127b2f62c4ca13c2a92ae9d9b611f3c6ebe7288c27 +size 33036288 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..dbe92327182ec9e033f416463d033c43ba2a77a5 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bb73a4724637204fa8d9b8be9607f46e94a8285b3227124edfbaa7ab311bb5c +size 37748736 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..b09a86c2b299fa3c0c53d8b28e83335280263476 --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f125b0702dcff80c50c9ce9697aeea747e50aaeaf2093246c6d792879b5d9f1 +size 75497472 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b5cbb0f5f716413654aa83c8d034404cd3df82e --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8a168f70cf2ebfd1b86a93f6e7364e6fa24718052aca1e8e46c5a816b311f38 +size 18874368 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..2401574162aa572b10121818a4845275958509d6 --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4f9de4c5906c0ee457759d86982127ed601ac113ff1e58419a4bf24e2d7ac3e +size 18874368 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..bae81e7898fd32df935d872ceb7ccafe24d2b85a --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9721d493adae47f492f985be0aec28a8deb296884cf17d6097b907e155485f12 +size 33042432 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..a82eaf18e1e25d5c4ed12d088e4179e8265469fc --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb39948effe57e7b3a586e3d3cffbe784a0144caf44d715c4ad66c1a619bde41 +size 37748736 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..0b197467e43798da65791bf3b85e839885f95f20 --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74e1bfa43584dea76e47d5a23ab45cb66c1dc771a24c6ed4b606e99ffda0ef4f +size 75497472 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..35dd467128ffee30436e144b74d6249eb106a505 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8a13808e2a2107dda27b05db39a10b40a345bec77246acb64a9df9bc1545c1d +size 33048576 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..467f64c573ebf07488f5dae7b3bfc5d7680d3211 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2cba978447aceefdd07985b067d68734db315153683aeb7b12ad4c4a1757ea2 +size 37748736 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3c173c9018b92a291a37cce6fad36b91a84fa9a --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0cb0fdb1755aa87f4d914a7e1ea4ab6f38bdb1541bb367851ad19778c5daa2 +size 75497472 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4ee3ba30422179916b1292b0953e2ddea9383f1 --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe983d9bb5d009dd85745ec414e87cfecf141deceb037d3a7c4edfd8c372d15d +size 28329984 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..36ad3106e2f29f83b2910dd606dc35cbbffc45fc --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bea4cfc2f0d081fd2eeddb8b5b1f89c048bf53319376b921865ca56bc3568097 +size 30676992 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..602749e265e283845be1f02f0445304bcbe3487f --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cfc847d8631a4baab6d7605aaae36753a02df50230d5da1ccee39cedcf9b7b4 +size 37748736 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b183b86b738ff2f4272041e93b31b5be7015788 --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3969f0476ed89b3045d34040ab9300d4414e6557ddb058dfa6225621f28f219 +size 75497472 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..a478b4bc03d6d9299c4b8653b256c4f7741c3df4 --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5c0d717e917c92246891ceb5e1c489bbb077ce82415c1f1d12091db7b1de7b2 +size 18874368 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..ce691b6ec72d19373e3f2d9d6b725e899bec022e --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e0c393f4e012adeb34bd58cb2a8412c00a69ffbad3c3d45a231b0e1aaf14eb4 +size 37748736 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..74eb2804d339e68f37890665d1cefa932777cd66 --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30463d4531162e4fc7d8bb51b7a761727163bd7c8fd395f99a8f1109447c2dc2 +size 30689280 diff --git a/params_shard_99.bin b/params_shard_99.bin new file mode 100644 index 0000000000000000000000000000000000000000..558e6e83b6ed1789195bc72494663f2ba10f8484 --- /dev/null +++ b/params_shard_99.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30463d4531162e4fc7d8bb51b7a761727163bd7c8fd395f99a8f1109447c2dc2 +size 75497472 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c0fa7ef562b9e0929bb27c335e3595f2366d39a --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30463d4531162e4fc7d8bb51b7a761727163bd7c8fd395f99a8f1109447c2dc2 +size 17477929 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1451ca400a880c5056ab40cb960930cf827de2de --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,70 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": null, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}