diff --git a/mlc-chat-config.json b/mlc-chat-config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f624e409472f0124ef3ad3981918ce37b7a8ebb4
--- /dev/null
+++ b/mlc-chat-config.json
@@ -0,0 +1,42 @@
+{
+ "model_type": "minicpm_v",
+ "quantization": "q4f16_1",
+ "model_config": {
+ "hidden_size": 2304,
+ "intermediate_size": 5760,
+ "num_attention_heads": 36,
+ "num_hidden_layers": 40,
+ "rms_norm_eps": 1e-05,
+ "vocab_size": 122753,
+ "position_embedding_base": 10000.0,
+ "num_key_value_heads": 36,
+ "head_dim": 64,
+ "sliding_window_size": 1024,
+ "prefill_chunk_size": 1024,
+ "attention_sink_size": 4,
+ "tensor_parallel_shards": 1
+ },
+ "vocab_size": 122753,
+ "context_window_size": -1,
+ "sliding_window_size": 1024,
+ "prefill_chunk_size": 1024,
+ "attention_sink_size": 4,
+ "tensor_parallel_shards": 1,
+ "max_batch_size": 80,
+ "mean_gen_len": 128,
+ "max_gen_len": 512,
+ "shift_fill_factor": 0.3,
+ "temperature": 1.0,
+ "repetition_penalty": 1.0,
+ "top_p": 0.5,
+ "conv_template": "LM",
+ "pad_token_id": 0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "tokenizer_files": [
+ "tokenizer.model",
+ "tokenizer.json",
+ "tokenizer_config.json"
+ ],
+ "version": "0.1.0"
+}
\ No newline at end of file
diff --git a/ndarray-cache.json b/ndarray-cache.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c82f98a541e516a9989c35742b90c6871c6e125
--- /dev/null
+++ b/ndarray-cache.json
@@ -0,0 +1,10252 @@
+{
+ "metadata": {
+ "ParamSize": 898,
+ "ParamBytes": 2952110912.0,
+ "BitsPerParam": 7.485917575658407
+ },
+ "records": [
+ {
+ "dataPath": "params_shard_0.bin",
+ "format": "raw-shard",
+ "nbytes": 28214784,
+ "records": [
+ {
+ "name": "llm.model.layers.36.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.36.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 4608
+ },
+ {
+ "name": "llm.model.layers.36.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6640128
+ },
+ {
+ "name": "llm.model.layers.36.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7469568
+ },
+ {
+ "name": "llm.model.layers.36.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8299008
+ },
+ {
+ "name": "llm.model.layers.36.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21570048
+ },
+ {
+ "name": "llm.model.layers.36.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23228928
+ },
+ {
+ "name": "llm.model.layers.36.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24887808
+ },
+ {
+ "name": "llm.model.layers.36.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 24892416
+ },
+ {
+ "name": "llm.model.layers.36.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 27546624
+ },
+ {
+ "name": "llm.model.layers.36.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 27878400
+ },
+ {
+ "name": "llm.model.layers.37.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 28210176
+ }
+ ],
+ "md5sum": "35e881a0efe648d25103b9fa7e33067b"
+ },
+ {
+ "dataPath": "params_shard_1.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.37.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.37.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.37.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.37.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.37.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.37.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.37.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.37.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "21f1eebdca43e049d6b3aa5e63e15e5e"
+ },
+ {
+ "dataPath": "params_shard_2.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.37.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.37.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.37.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.37.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.37.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.38.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.38.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.38.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.38.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.38.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.38.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.38.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.38.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "fb73ea3fb64c204a020b307341493942"
+ },
+ {
+ "dataPath": "params_shard_3.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.38.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.38.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.38.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.38.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.38.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.38.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.39.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.39.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.39.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.39.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "860d07237df0044ddcc09fafb2a8f22c"
+ },
+ {
+ "dataPath": "params_shard_4.bin",
+ "format": "raw-shard",
+ "nbytes": 29882880,
+ "records": [
+ {
+ "name": "llm.model.layers.39.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.39.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.39.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.39.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.39.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.39.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.39.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.39.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.39.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.39.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.norm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ },
+ {
+ "name": "resampler.attn.q_proj.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29869056
+ },
+ {
+ "name": "resampler.attn.k_proj.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29873664
+ },
+ {
+ "name": "resampler.attn.v_proj.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29878272
+ }
+ ],
+ "md5sum": "613cf80845cbabf67bad7f564a8022a7"
+ },
+ {
+ "dataPath": "params_shard_5.bin",
+ "format": "raw-shard",
+ "nbytes": 31855104,
+ "records": [
+ {
+ "name": "resampler.attn.q_proj.weight",
+ "shape": [
+ 2304,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10616832,
+ "byteOffset": 0
+ },
+ {
+ "name": "resampler.attn.k_proj.weight",
+ "shape": [
+ 2304,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10616832,
+ "byteOffset": 10616832
+ },
+ {
+ "name": "resampler.attn.v_proj.weight",
+ "shape": [
+ 2304,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10616832,
+ "byteOffset": 21233664
+ },
+ {
+ "name": "resampler.attn.out_proj.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 31850496
+ }
+ ],
+ "md5sum": "b70f08ce50cf05ab62a298950a385886"
+ },
+ {
+ "dataPath": "params_shard_6.bin",
+ "format": "raw-shard",
+ "nbytes": 31880448,
+ "records": [
+ {
+ "name": "resampler.attn.out_proj.weight",
+ "shape": [
+ 2304,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10616832,
+ "byteOffset": 0
+ },
+ {
+ "name": "resampler.kv_proj.weight",
+ "shape": [
+ 2304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 5308416,
+ "byteOffset": 10616832
+ },
+ {
+ "name": "resampler.ln_kv.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 15925248
+ },
+ {
+ "name": "resampler.ln_kv.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 15929856
+ },
+ {
+ "name": "resampler.ln_post.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 15934464
+ },
+ {
+ "name": "resampler.ln_post.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 15939072
+ },
+ {
+ "name": "resampler.ln_q.bias",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 15943680
+ },
+ {
+ "name": "resampler.ln_q.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 15948288
+ },
+ {
+ "name": "resampler.pos_embed_k",
+ "shape": [
+ 1024,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 15952896
+ },
+ {
+ "name": "resampler.pos_embed",
+ "shape": [
+ 64,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 294912,
+ "byteOffset": 20671488
+ },
+ {
+ "name": "resampler.proj",
+ "shape": [
+ 2304,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10616832,
+ "byteOffset": 20966400
+ },
+ {
+ "name": "resampler.query",
+ "shape": [
+ 64,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 294912,
+ "byteOffset": 31583232
+ },
+ {
+ "name": "vpm.blocks.0.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 31878144
+ }
+ ],
+ "md5sum": "c6c0bf99d5f816bd2e58612ea6e5a7c3"
+ },
+ {
+ "dataPath": "params_shard_7.bin",
+ "format": "raw-shard",
+ "nbytes": 33140128,
+ "records": [
+ {
+ "name": "vpm.blocks.0.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.0.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 2654208
+ },
+ {
+ "name": "vpm.blocks.0.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 2661120
+ },
+ {
+ "name": "vpm.blocks.0.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 10623744
+ },
+ {
+ "name": "vpm.blocks.0.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 10632352
+ },
+ {
+ "name": "vpm.blocks.0.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 20548768
+ },
+ {
+ "name": "vpm.blocks.0.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 20551072
+ },
+ {
+ "name": "vpm.blocks.0.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 30467488
+ },
+ {
+ "name": "vpm.blocks.0.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 30469792
+ },
+ {
+ "name": "vpm.blocks.0.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 30472096
+ },
+ {
+ "name": "vpm.blocks.0.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 30474400
+ },
+ {
+ "name": "vpm.blocks.1.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 30476704
+ },
+ {
+ "name": "vpm.blocks.1.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 30479008
+ },
+ {
+ "name": "vpm.blocks.1.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 33133216
+ }
+ ],
+ "md5sum": "c3ad13cb76623ef5d880b2b9311dc535"
+ },
+ {
+ "dataPath": "params_shard_8.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.1.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.1.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.1.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.1.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.1.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.1.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.1.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.1.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.1.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.10.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.10.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.10.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "45a5b2b91fa99add2a1743647e5dfcb0"
+ },
+ {
+ "dataPath": "params_shard_9.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.10.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.10.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.10.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.10.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.10.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.10.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.10.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.10.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.10.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.11.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.11.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.11.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "7fcb2887fc163eddabf5bdcdc534b3ad"
+ },
+ {
+ "dataPath": "params_shard_10.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.11.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.11.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.11.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.11.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.11.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.11.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.11.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.11.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.11.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.12.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.12.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.12.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "68ed719e489bf53061f9f1b0ec99b580"
+ },
+ {
+ "dataPath": "params_shard_11.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.12.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.12.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.12.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.12.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.12.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.12.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.12.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.12.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.12.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.13.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.13.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.13.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "8edcd48dd209bd2dccfbb49897244fe9"
+ },
+ {
+ "dataPath": "params_shard_12.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.13.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.13.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.13.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.13.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.13.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.13.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.13.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.13.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.13.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.14.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.14.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.14.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "4ae0f16883658cc23ef7ae87f4d8fc4d"
+ },
+ {
+ "dataPath": "params_shard_13.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.14.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.14.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.14.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.14.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.14.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.14.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.14.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.14.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.14.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.15.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.15.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.15.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "ab50f345a4ac97461bf07c8408d87a6f"
+ },
+ {
+ "dataPath": "params_shard_14.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.15.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.15.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.15.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.15.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.15.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.15.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.15.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.15.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.15.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.16.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.16.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.16.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "9b3bdcef0674bc4656ecc1fda38f39ed"
+ },
+ {
+ "dataPath": "params_shard_15.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.16.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.16.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.16.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.16.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.16.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.16.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.16.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.16.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.16.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.17.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.17.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.17.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "0f260f95345406805c449ec965de283c"
+ },
+ {
+ "dataPath": "params_shard_16.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.17.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.17.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.17.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.17.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.17.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.17.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.17.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.17.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.17.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.18.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.18.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.18.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "e1c9e9a7039ad2982e15f9402dd1a320"
+ },
+ {
+ "dataPath": "params_shard_17.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.18.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.18.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.18.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.18.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.18.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.18.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.18.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.18.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.18.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.19.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.19.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.19.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "1c27e2bdfec0defdbb610b2c85551131"
+ },
+ {
+ "dataPath": "params_shard_18.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.19.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.19.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.19.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.19.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.19.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.19.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.19.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.19.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.19.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.2.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.2.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.2.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "c27aa9cf5c3d850bd5b5f8373a4d11be"
+ },
+ {
+ "dataPath": "params_shard_19.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.2.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.2.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.2.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.2.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.2.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.2.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.2.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.2.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.2.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.20.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.20.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.20.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "8dbcd4e41a6e8853e3907b32c2735c8f"
+ },
+ {
+ "dataPath": "params_shard_20.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.20.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.20.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.20.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.20.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.20.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.20.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.20.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.20.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.20.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.21.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.21.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.21.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "2afb946a41f7e6a3e320814ed8f2f840"
+ },
+ {
+ "dataPath": "params_shard_21.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.21.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.21.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.21.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.21.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.21.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.21.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.21.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.21.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.21.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.22.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.22.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.22.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "af821f2a3624447a51e713b02c51ccf9"
+ },
+ {
+ "dataPath": "params_shard_22.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.22.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.22.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.22.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.22.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.22.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.22.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.22.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.22.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.22.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.23.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.23.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.23.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "dda805e13ee7fcad796f1e45d5e6e42c"
+ },
+ {
+ "dataPath": "params_shard_23.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.23.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.23.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.23.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.23.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.23.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.23.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.23.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.23.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.23.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.24.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.24.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.24.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "045f93b29c0d66e29f7e42ff9783ac3c"
+ },
+ {
+ "dataPath": "params_shard_24.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.24.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.24.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.24.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.24.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.24.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.24.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.24.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.24.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.24.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.25.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.25.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.25.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "eedb6dda6685e17f7a6b2c2eafd0f407"
+ },
+ {
+ "dataPath": "params_shard_25.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.25.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.25.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.25.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.25.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.25.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.25.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.25.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.25.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.25.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.3.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.3.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.3.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "19dd1f2ac66cca0491cd50c845dd94eb"
+ },
+ {
+ "dataPath": "params_shard_26.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.3.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.3.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.3.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.3.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.3.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.3.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.3.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.3.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.3.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.4.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.4.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.4.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "6788f4d1f7fa056b27ad996689ddf628"
+ },
+ {
+ "dataPath": "params_shard_27.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.4.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.4.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.4.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.4.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.4.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.4.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.4.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.4.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.4.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.5.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.5.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.5.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "da49fda13457516aa8570c3443f3f872"
+ },
+ {
+ "dataPath": "params_shard_28.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.5.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.5.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.5.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.5.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.5.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.5.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.5.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.5.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.5.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.6.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.6.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.6.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "6af5d8de40f68c73db16e9610289f318"
+ },
+ {
+ "dataPath": "params_shard_29.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.6.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.6.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.6.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.6.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.6.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.6.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.6.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.6.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.6.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.7.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.7.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.7.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "0d4ceca3b257163c00dc1068bea5224f"
+ },
+ {
+ "dataPath": "params_shard_30.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.7.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.7.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.7.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.7.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.7.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.7.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.7.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.7.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.7.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.8.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.8.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.8.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "287638c1f051e3824256437107a74204"
+ },
+ {
+ "dataPath": "params_shard_31.bin",
+ "format": "raw-shard",
+ "nbytes": 30479008,
+ "records": [
+ {
+ "name": "vpm.blocks.8.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.8.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.8.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.8.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.8.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.8.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.8.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.8.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.8.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.blocks.9.attn.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.blocks.9.attn.proj.weight",
+ "shape": [
+ 1152,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.blocks.9.attn.qkv.bias",
+ "shape": [
+ 3456
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6912,
+ "byteOffset": 30472096
+ }
+ ],
+ "md5sum": "b114610d1b36bf2553f674a0b8fab3fb"
+ },
+ {
+ "dataPath": "params_shard_32.bin",
+ "format": "raw-shard",
+ "nbytes": 565645824,
+ "records": [
+ {
+ "name": "llm.model.embed_tokens.weight",
+ "shape": [
+ 122753,
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 565645824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "02f59850bf732470b7c83d87210f4b79"
+ },
+ {
+ "dataPath": "params_shard_33.bin",
+ "format": "raw-shard",
+ "nbytes": 31541152,
+ "records": [
+ {
+ "name": "vpm.blocks.9.attn.qkv.weight",
+ "shape": [
+ 3456,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "vpm.blocks.9.mlp.fc1.bias",
+ "shape": [
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 8608,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "vpm.blocks.9.mlp.fc1.weight",
+ "shape": [
+ 4304,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 7971232
+ },
+ {
+ "name": "vpm.blocks.9.mlp.fc2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 17887648
+ },
+ {
+ "name": "vpm.blocks.9.mlp.fc2.weight",
+ "shape": [
+ 1152,
+ 4304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9916416,
+ "byteOffset": 17889952
+ },
+ {
+ "name": "vpm.blocks.9.norm1.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27806368
+ },
+ {
+ "name": "vpm.blocks.9.norm1.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27808672
+ },
+ {
+ "name": "vpm.blocks.9.norm2.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27810976
+ },
+ {
+ "name": "vpm.blocks.9.norm2.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27813280
+ },
+ {
+ "name": "vpm.norm.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27815584
+ },
+ {
+ "name": "vpm.norm.weight",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27817888
+ },
+ {
+ "name": "vpm.patch_embed.proj.bias",
+ "shape": [
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2304,
+ "byteOffset": 27820192
+ },
+ {
+ "name": "vpm.patch_embed.proj.weight",
+ "shape": [
+ 1152,
+ 3,
+ 14,
+ 14
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1354752,
+ "byteOffset": 27822496
+ },
+ {
+ "name": "vpm.pos_embed",
+ "shape": [
+ 1,
+ 1024,
+ 1152
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 2359296,
+ "byteOffset": 29177248
+ },
+ {
+ "name": "llm.model.layers.0.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 31536544
+ }
+ ],
+ "md5sum": "3b0465eb5dacf342c63d8433766ccfb3"
+ },
+ {
+ "dataPath": "params_shard_34.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.0.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.0.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.0.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.0.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.0.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.0.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.0.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.0.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "0dbc5229b535940f8f1abb4a8ba99c7f"
+ },
+ {
+ "dataPath": "params_shard_35.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.0.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.0.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.0.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.0.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.0.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.1.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.1.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.1.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.1.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.1.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.1.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.1.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.1.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "5b9312985ea77d7a9bdead27bf54ed44"
+ },
+ {
+ "dataPath": "params_shard_36.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.1.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.1.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.1.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.1.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.1.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.1.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.10.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.10.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.10.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.10.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "3bc78c7b03657ecc6e3db691e11c8dd0"
+ },
+ {
+ "dataPath": "params_shard_37.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.10.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.10.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.10.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.10.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.10.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.10.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.10.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.10.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.10.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.10.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.11.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "2eb9ae047f1d08451ca7b4371e959abb"
+ },
+ {
+ "dataPath": "params_shard_38.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.11.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.11.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.11.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.11.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.11.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.11.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.11.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.11.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "bcde85dcfe5102b8a3b976c22d702528"
+ },
+ {
+ "dataPath": "params_shard_39.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.11.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.11.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.11.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.11.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.11.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.12.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.12.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.12.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.12.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.12.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.12.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.12.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.12.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "7a7512c0da8b04112754123dd03c0a54"
+ },
+ {
+ "dataPath": "params_shard_40.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.12.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.12.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.12.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.12.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.12.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.12.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.13.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.13.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.13.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.13.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "be9a5d488f8b31939678dc976702b803"
+ },
+ {
+ "dataPath": "params_shard_41.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.13.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.13.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.13.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.13.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.13.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.13.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.13.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.13.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.13.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.13.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.14.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "ca4e619827037459ea9f8fe2154fd310"
+ },
+ {
+ "dataPath": "params_shard_42.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.14.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.14.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.14.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.14.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.14.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.14.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.14.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.14.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "67d703e1bbf6501fb8213036ee9bd843"
+ },
+ {
+ "dataPath": "params_shard_43.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.14.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.14.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.14.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.14.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.14.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.15.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.15.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.15.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.15.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.15.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.15.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.15.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.15.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "2ddf36644f25b8997edaee3186a463eb"
+ },
+ {
+ "dataPath": "params_shard_44.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.15.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.15.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.15.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.15.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.15.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.15.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.16.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.16.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.16.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.16.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "e970d3419c57d33cae9353a42fceab1a"
+ },
+ {
+ "dataPath": "params_shard_45.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.16.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.16.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.16.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.16.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.16.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.16.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.16.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.16.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.16.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.16.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.17.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "166b74ba4507e712827c3bcc015ec3c0"
+ },
+ {
+ "dataPath": "params_shard_46.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.17.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.17.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.17.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.17.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.17.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.17.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.17.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.17.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "a69d268ece2807f675f4ba28cca72486"
+ },
+ {
+ "dataPath": "params_shard_47.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.17.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.17.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.17.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.17.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.17.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.18.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.18.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.18.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.18.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.18.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.18.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.18.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.18.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "0d68b513081b20d4725b7e757df40d34"
+ },
+ {
+ "dataPath": "params_shard_48.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.18.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.18.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.18.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.18.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.18.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.18.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.19.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.19.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.19.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.19.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "ee1c975ff22c3d9a92ac7dc743837e4f"
+ },
+ {
+ "dataPath": "params_shard_49.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.19.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.19.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.19.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.19.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.19.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.19.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.19.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.19.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.19.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.19.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.2.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "e39f521a2d80808243ccbb45ffda7e59"
+ },
+ {
+ "dataPath": "params_shard_50.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.2.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.2.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.2.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.2.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.2.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.2.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.2.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.2.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "05adac5518d4632c487f2b3636443685"
+ },
+ {
+ "dataPath": "params_shard_51.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.2.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.2.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.2.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.2.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.2.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.20.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.20.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.20.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.20.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.20.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.20.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.20.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.20.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "b93523dc0644319d75b53e6e664621bf"
+ },
+ {
+ "dataPath": "params_shard_52.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.20.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.20.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.20.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.20.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.20.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.20.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.21.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.21.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.21.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.21.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "86580fb013cda9b02b79b7284ece3f8e"
+ },
+ {
+ "dataPath": "params_shard_53.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.21.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.21.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.21.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.21.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.21.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.21.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.21.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.21.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.21.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.21.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.22.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "39bed558a4fd2f42ef398c14bd5eddb5"
+ },
+ {
+ "dataPath": "params_shard_54.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.22.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.22.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.22.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.22.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.22.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.22.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.22.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.22.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "50ea7c70ac913d7f9cac703a1ff80140"
+ },
+ {
+ "dataPath": "params_shard_55.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.22.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.22.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.22.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.22.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.22.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.23.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.23.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.23.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.23.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.23.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.23.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.23.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.23.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "32c56da14c68670bff9e3bcec790fb7d"
+ },
+ {
+ "dataPath": "params_shard_56.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.23.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.23.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.23.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.23.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.23.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.23.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.24.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.24.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.24.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.24.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "cc12333f48379a33000e4c51f1c0f850"
+ },
+ {
+ "dataPath": "params_shard_57.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.24.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.24.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.24.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.24.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.24.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.24.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.24.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.24.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.24.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.24.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.25.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "1c475d0582c4ccfb1a31bf70370fc289"
+ },
+ {
+ "dataPath": "params_shard_58.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.25.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.25.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.25.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.25.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.25.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.25.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.25.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.25.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "254dcb7dc16168dae1bcb49c47d612d4"
+ },
+ {
+ "dataPath": "params_shard_59.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.25.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.25.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.25.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.25.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.25.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.26.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.26.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.26.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.26.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.26.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.26.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.26.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.26.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "4ccb623c56961d4a14127cf310d8a0f4"
+ },
+ {
+ "dataPath": "params_shard_60.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.26.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.26.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.26.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.26.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.26.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.26.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.27.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.27.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.27.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.27.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "74a771618a793fce90c7b5db6f2fea5a"
+ },
+ {
+ "dataPath": "params_shard_61.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.27.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.27.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.27.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.27.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.27.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.27.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.27.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.27.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.27.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.27.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.28.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "64ef568084ecbff1c611d3e41aba463c"
+ },
+ {
+ "dataPath": "params_shard_62.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.28.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.28.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.28.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.28.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.28.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.28.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.28.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.28.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "be4a2b25cfe5b4c41eebd0f36efc81fd"
+ },
+ {
+ "dataPath": "params_shard_63.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.28.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.28.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.28.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.28.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.28.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.29.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.29.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.29.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.29.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.29.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.29.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.29.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.29.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "6d00a96471ec6b2da46529dd222ac6c4"
+ },
+ {
+ "dataPath": "params_shard_64.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.29.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.29.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.29.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.29.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.29.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.29.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.3.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.3.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.3.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.3.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "52bda64d796b202d4b5d524ee59a6542"
+ },
+ {
+ "dataPath": "params_shard_65.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.3.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.3.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.3.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.3.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.3.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.3.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.3.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.3.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.3.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.3.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.30.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "970375b93a1bc2e9896545afb1efe41a"
+ },
+ {
+ "dataPath": "params_shard_66.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.30.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.30.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.30.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.30.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.30.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.30.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.30.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.30.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "5f08202dfa4888bb19fc6d9fcd4b07cd"
+ },
+ {
+ "dataPath": "params_shard_67.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.30.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.30.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.30.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.30.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.30.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.31.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.31.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.31.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.31.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.31.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.31.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.31.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.31.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "402d9cdb76cee45b19ee96b115848a5f"
+ },
+ {
+ "dataPath": "params_shard_68.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.31.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.31.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.31.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.31.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.31.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.31.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.32.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.32.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.32.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.32.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "af333716a27060a77bd1309dfea682e4"
+ },
+ {
+ "dataPath": "params_shard_69.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.32.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.32.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.32.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.32.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.32.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.32.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.32.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.32.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.32.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.32.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.33.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "7b74ba34b51889dc34948df6c504efcb"
+ },
+ {
+ "dataPath": "params_shard_70.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.33.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.33.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.33.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.33.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.33.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.33.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.33.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.33.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "1884f7d17246c6c65d71f76b512c42cd"
+ },
+ {
+ "dataPath": "params_shard_71.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.33.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.33.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.33.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.33.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.33.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.34.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.34.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.34.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.34.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.34.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.34.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.34.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.34.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "67bdd7015a394715f4b935865f07d5f0"
+ },
+ {
+ "dataPath": "params_shard_72.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.34.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.34.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.34.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.34.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.34.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.34.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.35.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.35.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.35.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.35.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "ac02b462700a37127c6b7008c0ff9a86"
+ },
+ {
+ "dataPath": "params_shard_73.bin",
+ "format": "raw-shard",
+ "nbytes": 29864448,
+ "records": [
+ {
+ "name": "llm.model.layers.35.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.35.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.35.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.35.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.35.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.35.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.35.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.35.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.35.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.35.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ }
+ ],
+ "md5sum": "e62639dffa8480de096da424ded83d15"
+ },
+ {
+ "dataPath": "params_shard_74.bin",
+ "format": "raw-shard",
+ "nbytes": 33182208,
+ "records": [
+ {
+ "name": "llm.model.layers.36.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.36.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.36.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.4.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.4.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 9957888
+ },
+ {
+ "name": "llm.model.layers.4.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.4.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 17422848
+ },
+ {
+ "name": "llm.model.layers.4.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 18252288
+ },
+ {
+ "name": "llm.model.layers.4.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 31523328
+ }
+ ],
+ "md5sum": "03502bcbe2e396c002bdcc55c55e1f64"
+ },
+ {
+ "dataPath": "params_shard_75.bin",
+ "format": "raw-shard",
+ "nbytes": 23233536,
+ "records": [
+ {
+ "name": "llm.model.layers.4.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.4.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 1658880
+ },
+ {
+ "name": "llm.model.layers.4.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 1663488
+ },
+ {
+ "name": "llm.model.layers.4.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 9626112
+ },
+ {
+ "name": "llm.model.layers.4.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 10621440
+ },
+ {
+ "name": "llm.model.layers.4.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 11616768
+ },
+ {
+ "name": "llm.model.layers.4.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 14270976
+ },
+ {
+ "name": "llm.model.layers.4.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 14602752
+ },
+ {
+ "name": "llm.model.layers.5.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 14934528
+ },
+ {
+ "name": "llm.model.layers.5.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 14939136
+ },
+ {
+ "name": "llm.model.layers.5.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 21574656
+ },
+ {
+ "name": "llm.model.layers.5.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 22404096
+ }
+ ],
+ "md5sum": "a18b564c182246e9494b4f35776a17d9"
+ },
+ {
+ "dataPath": "params_shard_76.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.5.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.5.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.5.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.5.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.5.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.5.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.5.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.5.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.5.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.5.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.6.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "fe555e656ca09a653dad6cd536eb2213"
+ },
+ {
+ "dataPath": "params_shard_77.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.6.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.6.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.6.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.6.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.6.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.6.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.6.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.6.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "a3b3a6951a8987588d4d01f5e9bba1f6"
+ },
+ {
+ "dataPath": "params_shard_78.bin",
+ "format": "raw-shard",
+ "nbytes": 30200832,
+ "records": [
+ {
+ "name": "llm.model.layers.6.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.6.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.6.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.6.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.6.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ },
+ {
+ "name": "llm.model.layers.7.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 5308416
+ },
+ {
+ "name": "llm.model.layers.7.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 5313024
+ },
+ {
+ "name": "llm.model.layers.7.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 11948544
+ },
+ {
+ "name": "llm.model.layers.7.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 12777984
+ },
+ {
+ "name": "llm.model.layers.7.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 13607424
+ },
+ {
+ "name": "llm.model.layers.7.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 26878464
+ },
+ {
+ "name": "llm.model.layers.7.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 28537344
+ },
+ {
+ "name": "llm.model.layers.7.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 30196224
+ }
+ ],
+ "md5sum": "5fd870ae774eb8be84a3bd5a4a12b724"
+ },
+ {
+ "dataPath": "params_shard_79.bin",
+ "format": "raw-shard",
+ "nbytes": 21570048,
+ "records": [
+ {
+ "name": "llm.model.layers.7.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.7.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 7962624
+ },
+ {
+ "name": "llm.model.layers.7.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 8957952
+ },
+ {
+ "name": "llm.model.layers.7.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 9953280
+ },
+ {
+ "name": "llm.model.layers.7.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12607488
+ },
+ {
+ "name": "llm.model.layers.7.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 12939264
+ },
+ {
+ "name": "llm.model.layers.8.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.8.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 13275648
+ },
+ {
+ "name": "llm.model.layers.8.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 19911168
+ },
+ {
+ "name": "llm.model.layers.8.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 20740608
+ }
+ ],
+ "md5sum": "1d0cd0d59fb651ddafe82ee35b787b0d"
+ },
+ {
+ "dataPath": "params_shard_80.bin",
+ "format": "raw-shard",
+ "nbytes": 29869056,
+ "records": [
+ {
+ "name": "llm.model.layers.8.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.8.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 13271040
+ },
+ {
+ "name": "llm.model.layers.8.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 14929920
+ },
+ {
+ "name": "llm.model.layers.8.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 16588800
+ },
+ {
+ "name": "llm.model.layers.8.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 16593408
+ },
+ {
+ "name": "llm.model.layers.8.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 24556032
+ },
+ {
+ "name": "llm.model.layers.8.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 25551360
+ },
+ {
+ "name": "llm.model.layers.8.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 26546688
+ },
+ {
+ "name": "llm.model.layers.8.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29200896
+ },
+ {
+ "name": "llm.model.layers.8.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 29532672
+ },
+ {
+ "name": "llm.model.layers.9.input_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 29864448
+ }
+ ],
+ "md5sum": "d9559d4c1684bf9254309705b0e3240a"
+ },
+ {
+ "dataPath": "params_shard_81.bin",
+ "format": "raw-shard",
+ "nbytes": 32850432,
+ "records": [
+ {
+ "name": "llm.model.layers.9.mlp.down_proj.q_weight",
+ "shape": [
+ 2304,
+ 720
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 6635520,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.9.mlp.down_proj.q_scale",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 6635520
+ },
+ {
+ "name": "llm.model.layers.9.mlp.down_proj.q_zero",
+ "shape": [
+ 2304,
+ 180
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 829440,
+ "byteOffset": 7464960
+ },
+ {
+ "name": "llm.model.layers.9.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 11520,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13271040,
+ "byteOffset": 8294400
+ },
+ {
+ "name": "llm.model.layers.9.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 21565440
+ },
+ {
+ "name": "llm.model.layers.9.mlp.gate_up_proj.q_zero",
+ "shape": [
+ 11520,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1658880,
+ "byteOffset": 23224320
+ },
+ {
+ "name": "llm.model.layers.9.post_attention_layernorm.weight",
+ "shape": [
+ 2304
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4608,
+ "byteOffset": 24883200
+ },
+ {
+ "name": "llm.model.layers.9.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6912,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7962624,
+ "byteOffset": 24887808
+ }
+ ],
+ "md5sum": "d2800904eaf9cbc7b0aaacfcd24ad75c"
+ },
+ {
+ "dataPath": "params_shard_82.bin",
+ "format": "raw-shard",
+ "nbytes": 5308416,
+ "records": [
+ {
+ "name": "llm.model.layers.9.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 0
+ },
+ {
+ "name": "llm.model.layers.9.self_attn.qkv_proj.q_zero",
+ "shape": [
+ 6912,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 995328,
+ "byteOffset": 995328
+ },
+ {
+ "name": "llm.model.layers.9.self_attn.o_proj.q_weight",
+ "shape": [
+ 2304,
+ 288
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 2654208,
+ "byteOffset": 1990656
+ },
+ {
+ "name": "llm.model.layers.9.self_attn.o_proj.q_scale",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4644864
+ },
+ {
+ "name": "llm.model.layers.9.self_attn.o_proj.q_zero",
+ "shape": [
+ 2304,
+ 72
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 331776,
+ "byteOffset": 4976640
+ }
+ ],
+ "md5sum": "38ee4d8b0f6b09524bae331f6c4f8c11"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/params_shard_0.bin b/params_shard_0.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d2840c7d19fb135e6f0ae643b2c794c2e033e496
--- /dev/null
+++ b/params_shard_0.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dfe955d7fe865f3d88490c269b7f72aaca859893d6be85de54eac4da50c98082
+size 28214784
diff --git a/params_shard_1.bin b/params_shard_1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..07e7e40f43e7b21807be0bd6c22b1d04a76f654f
--- /dev/null
+++ b/params_shard_1.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ca69bf41757c384d6c531b9ef3ebbd1c6ae420360846db20904b4517846c355
+size 32850432
diff --git a/params_shard_10.bin b/params_shard_10.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bf8ea1bc795bd33327d5e7f53216c9a52c86aaa6
--- /dev/null
+++ b/params_shard_10.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9e05ebccc02bb864a0dec48ee1a4902dc8b762833946d6e03142a552972107c
+size 30479008
diff --git a/params_shard_11.bin b/params_shard_11.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4466a01e3f9d8371393673135448c5c3445888fc
--- /dev/null
+++ b/params_shard_11.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7604418676dab2ae8ff866fcedd0d2d0cc285fcc248599c5e64d18bdba4818f
+size 30479008
diff --git a/params_shard_12.bin b/params_shard_12.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e90dd9a6c7a8099b01f0179620fd52c9f4f7e095
--- /dev/null
+++ b/params_shard_12.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76f09296f65d5b73e8ba4c8f16ed8b86d675a997077424850c7c31f642a32005
+size 30479008
diff --git a/params_shard_13.bin b/params_shard_13.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e8347de3aa15f9a9006f1c458e921becc27af38c
--- /dev/null
+++ b/params_shard_13.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a006c043af12cddec038afac655016c5dc175afdbdc895cf23e12117f419b1e0
+size 30479008
diff --git a/params_shard_14.bin b/params_shard_14.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9ffbb1580b520cf0f60c69bd56fff83e8520a630
--- /dev/null
+++ b/params_shard_14.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0509ab5ac0930aba2b4ae25d7afce87668873b35ff57c4ada647f8fe7cd435fe
+size 30479008
diff --git a/params_shard_15.bin b/params_shard_15.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8e4ff821c67ef04da0e02504cacb22dd731684b6
--- /dev/null
+++ b/params_shard_15.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae1ded63a3525672b6dcd4dd9a1eec2d7e8ee1ee5988ffce9c997a9ba670bd65
+size 30479008
diff --git a/params_shard_16.bin b/params_shard_16.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ecbbc2077a2c6f57bb9271217dc28552db8b68d2
--- /dev/null
+++ b/params_shard_16.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4ece7895e8eab137e7d5ba46d35eebc20bb5e210f750ba675865244a4142252
+size 30479008
diff --git a/params_shard_17.bin b/params_shard_17.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b16de9ad446ed6fcd1545fcf9228239f120262e5
--- /dev/null
+++ b/params_shard_17.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8323bf4f2068d4749eaf7fff8ba1e5d81e68611479590f48f02fe07aa16a1e88
+size 30479008
diff --git a/params_shard_18.bin b/params_shard_18.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b56dfce7c76f8c574d32342203f2598ddad0f83b
--- /dev/null
+++ b/params_shard_18.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:086f2bcde4a0386e3315fad3ccdcaa7cfda07336a54a568e3b29021927882c22
+size 30479008
diff --git a/params_shard_19.bin b/params_shard_19.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bf183988dc18dcb4b11062468ad6e0e18e2a78ae
--- /dev/null
+++ b/params_shard_19.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4ade58f5652e604d2b9fbdc2ba3a13ca7d3fe78610be6a28000620c0a6d147f
+size 30479008
diff --git a/params_shard_2.bin b/params_shard_2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..681fc50fc21bab2aa1b8d5b82b783f6349ddbc63
--- /dev/null
+++ b/params_shard_2.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0adcc77f82e4e54d57f605100879cbfbfacc812c213c9f42f137495300b10654
+size 30200832
diff --git a/params_shard_20.bin b/params_shard_20.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9f0a2d6f19d7e6bd7ad57aa93211c3a3488ea27a
--- /dev/null
+++ b/params_shard_20.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6cf46a3813295f22c414dc27186cec1486cf08b36f97bc6a691d1fc8612fb82a
+size 30479008
diff --git a/params_shard_21.bin b/params_shard_21.bin
new file mode 100644
index 0000000000000000000000000000000000000000..535b816ea76962d4007087d83903812fc9dc99f4
--- /dev/null
+++ b/params_shard_21.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37e6d5669df8a833b08f533de8b2672d9d3499654433dbe3db15dca6c693df5b
+size 30479008
diff --git a/params_shard_22.bin b/params_shard_22.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a77e95f504973f6291b57abe1b06ee460d010261
--- /dev/null
+++ b/params_shard_22.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:985e255664a7daeae9e020a05cff2d925dad14a14c406a8e4f16f3731d8fc135
+size 30479008
diff --git a/params_shard_23.bin b/params_shard_23.bin
new file mode 100644
index 0000000000000000000000000000000000000000..960dcda49ea01b92ee7ce730dbccbb4b3c33f659
--- /dev/null
+++ b/params_shard_23.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eeb695236f71ba6050364119ff2518f8d99cdf303bbbfacf24c096e39ab9c690
+size 30479008
diff --git a/params_shard_24.bin b/params_shard_24.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1e1235873b89e97dec55a23c8bfe430e207ba6df
--- /dev/null
+++ b/params_shard_24.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c036ff3abfddaeb97b9f3c7f558fc15bb1774b90e24f4fe678af8b869fe099e
+size 30479008
diff --git a/params_shard_25.bin b/params_shard_25.bin
new file mode 100644
index 0000000000000000000000000000000000000000..39e95e9829e83a4f738e8ba2ed70a11c19364df6
--- /dev/null
+++ b/params_shard_25.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d05a337a2ac307071d0cb4ecb6c50999aec75b9c5aeae67c9e66148154bd81bf
+size 30479008
diff --git a/params_shard_26.bin b/params_shard_26.bin
new file mode 100644
index 0000000000000000000000000000000000000000..478e6addb4b1f13d48cdc25396b4c244d5df3d9b
--- /dev/null
+++ b/params_shard_26.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:169864e971d283df490669e8a3fa6e3d47a0ba91db1d8ca51dc2a0f2e1d53259
+size 30479008
diff --git a/params_shard_27.bin b/params_shard_27.bin
new file mode 100644
index 0000000000000000000000000000000000000000..844628befb75f56d1e3b65ac66443b959563f295
--- /dev/null
+++ b/params_shard_27.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11f14103bd1e9cf39273707acd1a4048b2a533c88d43ed90cbee784801ce64d8
+size 30479008
diff --git a/params_shard_28.bin b/params_shard_28.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9c4e599e113b332e915b94d80ddf8a29858b5853
--- /dev/null
+++ b/params_shard_28.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3a004e8af1898fa50e119a2b1b5d94abf6fd8e3fd8996033158c1335f36bf8d
+size 30479008
diff --git a/params_shard_29.bin b/params_shard_29.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7e87e966c96765d4753ad06e188c625f09674acb
--- /dev/null
+++ b/params_shard_29.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c47749721c14757a2475972d2f4dc4f94f95a36924cef0fd58aaf6c121fb6a8
+size 30479008
diff --git a/params_shard_3.bin b/params_shard_3.bin
new file mode 100644
index 0000000000000000000000000000000000000000..452a23d046cf9804fbeb0b04171a5cb838a56e85
--- /dev/null
+++ b/params_shard_3.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba93d19dd9f4fb34ff022c07b0cf26fa621073674a3a0fb65e476c16646b7a05
+size 21570048
diff --git a/params_shard_30.bin b/params_shard_30.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8320297bf83154d0e0c156ecf6d7491fc82dbb1
--- /dev/null
+++ b/params_shard_30.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccb23012a89cde4efe3abc150e957e44b2b08d0b75d66486803da6335ea101f1
+size 30479008
diff --git a/params_shard_31.bin b/params_shard_31.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d479664658c820596f43a47e18f83164e11f4b73
--- /dev/null
+++ b/params_shard_31.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a50b978925a2d0d5e3e2c65a9a7a33c1916bbb087c979b2831aacd164d36b866
+size 30479008
diff --git a/params_shard_32.bin b/params_shard_32.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3c62d9151cbdadae7acd5b5c2b11e9b087f3b78e
--- /dev/null
+++ b/params_shard_32.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03322daea16187ad9fb07f9ddbe8a48fcd70fe03fc2c2159ccc555284be16b2d
+size 565645824
diff --git a/params_shard_33.bin b/params_shard_33.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6cc6545a0f13339b289fe9190fcdda0be9043779
--- /dev/null
+++ b/params_shard_33.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07e2df9527e1e33504b441e65c261a9fd3e6b7b588d4c801204b425526363b11
+size 31541152
diff --git a/params_shard_34.bin b/params_shard_34.bin
new file mode 100644
index 0000000000000000000000000000000000000000..08f458740756c2a25909095b2984afce56a5db2f
--- /dev/null
+++ b/params_shard_34.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc4d2d7ed4608050c7aca8e272062f535edddbe5e4c6953997d60739d576b5d1
+size 32850432
diff --git a/params_shard_35.bin b/params_shard_35.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d992e963c539be710890b3e60a50e2bdbba44d25
--- /dev/null
+++ b/params_shard_35.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd1f07bebc01bd8ae97ea3acb0c0f5d11515212f9082b18e982fa2c757e870cb
+size 30200832
diff --git a/params_shard_36.bin b/params_shard_36.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8eb0f2ad606d1d8cd2ef013a2c8eba591ebae962
--- /dev/null
+++ b/params_shard_36.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15e65a1a217827c475127cb4f056ab4019464f82f765758da9a7302444695ffc
+size 21570048
diff --git a/params_shard_37.bin b/params_shard_37.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bff5d3efe7602f023d501237d8842d19dbc59407
--- /dev/null
+++ b/params_shard_37.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2dba6c6e6923b53d7117c08d9422768c5a75c966338e904058ba2b64c7878c72
+size 29869056
diff --git a/params_shard_38.bin b/params_shard_38.bin
new file mode 100644
index 0000000000000000000000000000000000000000..75d85500b34fe6a2587ab058f70d4d2f2957b012
--- /dev/null
+++ b/params_shard_38.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b11edc8f07edba8d183e117bfc1cb63ce7ff178972f186d1ae028e8fe7d36213
+size 32850432
diff --git a/params_shard_39.bin b/params_shard_39.bin
new file mode 100644
index 0000000000000000000000000000000000000000..aa457a9da6af353de40203d9bbd590583d59cdb4
--- /dev/null
+++ b/params_shard_39.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87dae6fced212725c87f8a595f3b84067af1c9aa640ff0eb50ecc1e75683ebe1
+size 30200832
diff --git a/params_shard_4.bin b/params_shard_4.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1762323a2a42d2181fd31a0fd69e5c14e33ea864
--- /dev/null
+++ b/params_shard_4.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b36f95687bcd4765ab6ada7548c25bca8d3bd507cc126f1fdbb8dd2ea80b0b6d
+size 29882880
diff --git a/params_shard_40.bin b/params_shard_40.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ce3175745a709be7d47d3cd4cc900540e57da375
--- /dev/null
+++ b/params_shard_40.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6445775b4b9f2199ea007540764de9413b4864fd7e8da7b3a42682721c310e1f
+size 21570048
diff --git a/params_shard_41.bin b/params_shard_41.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8c422bc0ae09a66fa700c26181a274941c9fabac
--- /dev/null
+++ b/params_shard_41.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:269f882741d967146d540a31fb15441241e7e67349d769677a20259094960f7c
+size 29869056
diff --git a/params_shard_42.bin b/params_shard_42.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8612b49434b407219479fc5e6f0ff581e1a972bc
--- /dev/null
+++ b/params_shard_42.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa8e302323ad433d5974e960da39a409499d2e16cb372512360b39628ac8056b
+size 32850432
diff --git a/params_shard_43.bin b/params_shard_43.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f31053ab215368916249c14c2e21c74d9ef1b7f5
--- /dev/null
+++ b/params_shard_43.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5f0029e2474c7c203de57425e0b937b561214249aadaca401e0d6893b80315c
+size 30200832
diff --git a/params_shard_44.bin b/params_shard_44.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5044b54e1654dff75fe0ab5964574575b2d1d6c9
--- /dev/null
+++ b/params_shard_44.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69e9341433ef6cfb42410f9d6ee77c3e1f6f64bcc894b471dad23b93229148bb
+size 21570048
diff --git a/params_shard_45.bin b/params_shard_45.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a6eb0eb0ba86b27ae477be7e25bfe09a1656bfbe
--- /dev/null
+++ b/params_shard_45.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6951473def9b78f09addaf63f422778c73536e2f413ff4ee227d19e5122dd239
+size 29869056
diff --git a/params_shard_46.bin b/params_shard_46.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a0a161fd8500d35ffc1ef718a1bb060bc489ae1b
--- /dev/null
+++ b/params_shard_46.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13b569765932470328dec0cdd420c33fc716ce0af0aa60d1643854127ee2ba30
+size 32850432
diff --git a/params_shard_47.bin b/params_shard_47.bin
new file mode 100644
index 0000000000000000000000000000000000000000..108f96769bd7dd2a970e9cd506ffb0058d1910fc
--- /dev/null
+++ b/params_shard_47.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f96363d59b2636d9bfc6b39b1ac92738bc8b4489bdbd9e6f479df2dc0af45235
+size 30200832
diff --git a/params_shard_48.bin b/params_shard_48.bin
new file mode 100644
index 0000000000000000000000000000000000000000..65f4f7f118eacd0b7520aa87034255022fbd81fe
--- /dev/null
+++ b/params_shard_48.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:decd3c9946fcde764cfc5cdc3b83c7554176d2ed25c4941e581f8e767d213ac2
+size 21570048
diff --git a/params_shard_49.bin b/params_shard_49.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3314e7c04bf48726873e25d44cd5e55d1b0bdeae
--- /dev/null
+++ b/params_shard_49.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:743098f01b3803f3beb4bf86ff1675397bcdb8641a87081301c205b55cf6c0f6
+size 29869056
diff --git a/params_shard_5.bin b/params_shard_5.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f7d1859b6735e2989332fb2af06f3f7ceb31b4f0
--- /dev/null
+++ b/params_shard_5.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:daf64ad02dd81b0dbbaac2599fd05105d7b9732b929fb54d65b1f27c67bf10d4
+size 31855104
diff --git a/params_shard_50.bin b/params_shard_50.bin
new file mode 100644
index 0000000000000000000000000000000000000000..64a3cc9928b0164ba0003c217831b6d440217893
--- /dev/null
+++ b/params_shard_50.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf40692608b1c9cf801e58f5d71d057104a9e3028cde790172bd1ef7f23cb6b1
+size 32850432
diff --git a/params_shard_51.bin b/params_shard_51.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d9a8e5ad7e1fd068768f91412f0bab3a7b436160
--- /dev/null
+++ b/params_shard_51.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b68e5f5e007b48e5367785417d1f569ffd96b5d1905d0ee423840b0f44e6c002
+size 30200832
diff --git a/params_shard_52.bin b/params_shard_52.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3b0dfd77ec23f5b5024f0780fe7965329019cef7
--- /dev/null
+++ b/params_shard_52.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e69d4990fe5cce996bb94bb579bb6cc03a7753970047271ad2f985d9c6f21186
+size 21570048
diff --git a/params_shard_53.bin b/params_shard_53.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d350a122b30c2c0080248b41d8620af54474894d
--- /dev/null
+++ b/params_shard_53.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f1b07ae1305d8deafd98e7b54ac4b1c0ef4c7541a400828526cfc196ae8daba
+size 29869056
diff --git a/params_shard_54.bin b/params_shard_54.bin
new file mode 100644
index 0000000000000000000000000000000000000000..03e3e08264b74d41cac7cd5d99fe1f8fcca93b47
--- /dev/null
+++ b/params_shard_54.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:583e1c0203d0547c0f9c9e349784969e7d85cdbb7196e7890eb23f24af8baedc
+size 32850432
diff --git a/params_shard_55.bin b/params_shard_55.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c906ccad91d94fd98c9832b0bca078ff2630d50d
--- /dev/null
+++ b/params_shard_55.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97994248ad6ae62dffab8b8f921dd52750e348469576692cd960905a317c3718
+size 30200832
diff --git a/params_shard_56.bin b/params_shard_56.bin
new file mode 100644
index 0000000000000000000000000000000000000000..61e0b2125ac14066c7c705344d6fea690eb51afa
--- /dev/null
+++ b/params_shard_56.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bcc08fa50bf14c24d43fc578bc8c30b3a3957962103a4ab5bc8e7202ed64b2f
+size 21570048
diff --git a/params_shard_57.bin b/params_shard_57.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4737da9f6405d9554c83859416ee86e44a4788d2
--- /dev/null
+++ b/params_shard_57.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c37c5ec9d82440ab4717c985608c7b038d4c127c1425b5929fdd2bd7803b5fad
+size 29869056
diff --git a/params_shard_58.bin b/params_shard_58.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7917e07977c5e447dbcbe4be7b648468c5d04ed6
--- /dev/null
+++ b/params_shard_58.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94675e4e71a0f9f482357a20134c9e4e3220cb7804cca5b109cbb4f27792751f
+size 32850432
diff --git a/params_shard_59.bin b/params_shard_59.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8d4bdd2735e5b6b19d3c03add5e71b3203e84903
--- /dev/null
+++ b/params_shard_59.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd6913f340a9cbaca12a2466b3ae51c032667e94036692439e6fda9f33e7a387
+size 30200832
diff --git a/params_shard_6.bin b/params_shard_6.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fbf7fcf7c8937b214daade7427e108820a72e8b1
--- /dev/null
+++ b/params_shard_6.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3e5541bbfe2f0ea56eb285b0a6c0a772c3ece6f295d15225c34b8531f653736
+size 31880448
diff --git a/params_shard_60.bin b/params_shard_60.bin
new file mode 100644
index 0000000000000000000000000000000000000000..941df76d37a3803bc9334f29406d4259cf5ac5f3
--- /dev/null
+++ b/params_shard_60.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5e9acdc77c85922177b46535932ad6167f614a0a5e882aa461b186b2041942b
+size 21570048
diff --git a/params_shard_61.bin b/params_shard_61.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1fcd9af8b0b6c00d7965fd6660c0fdaa18937527
--- /dev/null
+++ b/params_shard_61.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5baa574aca773a1a2cedc613e69eaa9d0d047e255f48bfcd98396d906570ddb
+size 29869056
diff --git a/params_shard_62.bin b/params_shard_62.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c02f7722f01de34492a92b33fb514c5f5921dd24
--- /dev/null
+++ b/params_shard_62.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:907c2560e86f63592a08afa59f4130a680637b05f38aa8cce79afe5f9da52cd2
+size 32850432
diff --git a/params_shard_63.bin b/params_shard_63.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f2900b5f94ac979b820cb0b44334abf80241f8d3
--- /dev/null
+++ b/params_shard_63.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2781cdff8ed35b01b9c8e702a02d14de493d164e49c1de6cddc076e3f9aa991
+size 30200832
diff --git a/params_shard_64.bin b/params_shard_64.bin
new file mode 100644
index 0000000000000000000000000000000000000000..461721e246b6db328cc9bf7760a7f5a2a7eab0f0
--- /dev/null
+++ b/params_shard_64.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a45479bf21f97e065dceb3ec7611b9dadad9d434617f16f855320b8dfc671895
+size 21570048
diff --git a/params_shard_65.bin b/params_shard_65.bin
new file mode 100644
index 0000000000000000000000000000000000000000..35e55e71061f3bde0fa0aed7e4b75e1c26f431f8
--- /dev/null
+++ b/params_shard_65.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4393d57fb62993e1fe2e4d97ff2ee9e6eb4c61afaacead4fcac7b903ccbe8b68
+size 29869056
diff --git a/params_shard_66.bin b/params_shard_66.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d63309e8240c1a0b332b241a686800dc85d82c9c
--- /dev/null
+++ b/params_shard_66.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26513f9bc650ede62dfd6b6e2289b130825d2a2c68aa99cb6ad764f18d689c4c
+size 32850432
diff --git a/params_shard_67.bin b/params_shard_67.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a7acf76133b125dab33604ca54476fb90ecf9f03
--- /dev/null
+++ b/params_shard_67.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c25cdf0371453f228edf185c5c21442c660874082b60839011d2c0491987903
+size 30200832
diff --git a/params_shard_68.bin b/params_shard_68.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b6fa5ea9de87e2a170ac58bf15013e7bb004cb97
--- /dev/null
+++ b/params_shard_68.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5fed7ecb9efcda63cb1d0eb5461eb5a4160af7621b9fce8a4c31f02de308ba1e
+size 21570048
diff --git a/params_shard_69.bin b/params_shard_69.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a95905873ff7f9027f2a5a680bc36d8f5bffc75d
--- /dev/null
+++ b/params_shard_69.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee1c05ccd2fdb220796dc33895163c113c25d42a2ab75399fd73e06fd1bc37e5
+size 29869056
diff --git a/params_shard_7.bin b/params_shard_7.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c92fe9333e18819ea4353fcaa617a07f87d79bba
--- /dev/null
+++ b/params_shard_7.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47953a68cc1c3b82a986488e0fc35b21c9a932bebfa6a8f252fb4c1a9e85cb79
+size 33140128
diff --git a/params_shard_70.bin b/params_shard_70.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6b202a7062beacba435000245c8fcefcc288d65e
--- /dev/null
+++ b/params_shard_70.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92a730d4e72b7f69ac0a6324d08ee14edb47bd21ef73dd1b30f195853fe02acd
+size 32850432
diff --git a/params_shard_71.bin b/params_shard_71.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c971af9a84c60e093e9d08875aec826886a667f
--- /dev/null
+++ b/params_shard_71.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c96cb51ea10b35b74908ac51570f7ccf1c5f32be7ae2f91a62657325696437cc
+size 30200832
diff --git a/params_shard_72.bin b/params_shard_72.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4bafac484617873379c18c45b749afa670aad029
--- /dev/null
+++ b/params_shard_72.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd53f0d29bde44c3a7495642308ad19f34e244a2aed695edec1dac773bfbb988
+size 21570048
diff --git a/params_shard_73.bin b/params_shard_73.bin
new file mode 100644
index 0000000000000000000000000000000000000000..15b818649a0cdc07478de6371761f1547aa1e9c5
--- /dev/null
+++ b/params_shard_73.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ea13a4d7c5cc32c2c8871b87478c5f24c4e7d2eb590ca694bef7ebd5f32ff47
+size 29864448
diff --git a/params_shard_74.bin b/params_shard_74.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fa6e3b6eb5cee8b4f3828b2cfafc2af3f9b08c09
--- /dev/null
+++ b/params_shard_74.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41a81a6898f7884a8cadc4a069aa9d190a879f3c09f80ab4627de3577866598d
+size 33182208
diff --git a/params_shard_75.bin b/params_shard_75.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ee86a07e15b4acc6cbec89e71c40b74c61b0cb1a
--- /dev/null
+++ b/params_shard_75.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa73d5ce6ce48d56ada60b4f3522dc63d23f37128faa8dd6af7220ff804404fd
+size 23233536
diff --git a/params_shard_76.bin b/params_shard_76.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d00ea6154830dccf652d2182e921280aee706461
--- /dev/null
+++ b/params_shard_76.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d20cc8bea5d2a3d99642f35d6425846d22af1c88c0e141976475cd68bbae87e
+size 29869056
diff --git a/params_shard_77.bin b/params_shard_77.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c2050992953278c557994287fc1bb6d000b24090
--- /dev/null
+++ b/params_shard_77.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1588fed2bd5cde1d5af4049a8430084ab19d6809c43b415931c01b6c2688487
+size 32850432
diff --git a/params_shard_78.bin b/params_shard_78.bin
new file mode 100644
index 0000000000000000000000000000000000000000..33ce5e92c09bd202c53f5aa1e9c18ac7f4d0c739
--- /dev/null
+++ b/params_shard_78.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:575f8da35a78d223964367ae29ffb58eceeb622f16bb54099fdf29b38c0d0076
+size 30200832
diff --git a/params_shard_79.bin b/params_shard_79.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ded79f3b0f99bc020f44b3cdd87338c448256fff
--- /dev/null
+++ b/params_shard_79.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e37f71f2c51fe468effd855cc0816b5bed4727e4976b9da31e2495eff6b6d04c
+size 21570048
diff --git a/params_shard_8.bin b/params_shard_8.bin
new file mode 100644
index 0000000000000000000000000000000000000000..87be8b16ab82b9de4b8a20cb8fa98b5af0609a2d
--- /dev/null
+++ b/params_shard_8.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65398d36c5d37ce229ccc5c27cbe6819a21c32b52b13dddec64fb858b4a1a375
+size 30479008
diff --git a/params_shard_80.bin b/params_shard_80.bin
new file mode 100644
index 0000000000000000000000000000000000000000..33b629f1f614638decd82f456cc101f864990136
--- /dev/null
+++ b/params_shard_80.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d5ed72eba67268cba7feee9e9b8f95c2f7f93b837b09a99bf7c7bb9318a6f0a
+size 29869056
diff --git a/params_shard_81.bin b/params_shard_81.bin
new file mode 100644
index 0000000000000000000000000000000000000000..48d2606622fa27b8cce42b1de33f6b13dc3a8819
--- /dev/null
+++ b/params_shard_81.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c79e45bcd108917e99357c3efeeec879c969a9d2c4d6e6c453c4a18fdd234eb9
+size 32850432
diff --git a/params_shard_82.bin b/params_shard_82.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f0685ca75d35095b218424d7e87826a84b1b1bee
--- /dev/null
+++ b/params_shard_82.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b9fe4dd90c3e5650b558e523aa6c0c52d87eb77a69092a696692e04485922b1
+size 5308416
diff --git a/params_shard_9.bin b/params_shard_9.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8583abbb69fcb04fbe91765ca485f23d61976751
--- /dev/null
+++ b/params_shard_9.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:945e2b3701e63ea58057e135843f0c9dccf1b3dd73d3f65ad8463ed54394e8e0
+size 30479008
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..7a68d2399b5869488ff3ac92c05c2af67209d4a1
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,294543 @@
+{
+ "version": "1.0",
+ "truncation": null,
+ "padding": null,
+ "added_tokens": [
+ {
+ "id": 0,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 1,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 2,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 101,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 102,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 103,
+ "content": "[",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 104,
+ "content": "]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 105,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 106,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 107,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 108,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 122753,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 122754,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 122755,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 122756,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ }
+ ],
+ "normalizer": {
+ "type": "Sequence",
+ "normalizers": [
+ {
+ "type": "Prepend",
+ "prepend": "▁"
+ },
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": " "
+ },
+ "content": "▁"
+ }
+ ]
+ },
+ "pre_tokenizer": null,
+ "post_processor": {
+ "type": "TemplateProcessing",
+ "single": [
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ }
+ ],
+ "pair": [
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ },
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 1
+ }
+ },
+ {
+ "Sequence": {
+ "id": "B",
+ "type_id": 1
+ }
+ }
+ ],
+ "special_tokens": {
+ "": {
+ "id": "",
+ "ids": [
+ 1
+ ],
+ "tokens": [
+ ""
+ ]
+ }
+ }
+ },
+ "decoder": {
+ "type": "Sequence",
+ "decoders": [
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": "▁"
+ },
+ "content": " "
+ },
+ {
+ "type": "ByteFallback"
+ },
+ {
+ "type": "Fuse"
+ },
+ {
+ "type": "Strip",
+ "content": " ",
+ "start": 1,
+ "stop": 0
+ }
+ ]
+ },
+ "model": {
+ "type": "BPE",
+ "dropout": null,
+ "unk_token": "",
+ "continuing_subword_prefix": null,
+ "end_of_word_suffix": null,
+ "fuse_unk": true,
+ "byte_fallback": true,
+ "vocab": {
+ "": 0,
+ "": 1,
+ "": 2,
+ "": 3,
+ "": 4,
+ "\n": 5,
+ "\t": 6,
+ "": 7,
+ "": 8,
+ "": 9,
+ "": 10,
+ "": 11,
+ "
": 12,
+ "": 13,
+ " | | ": 14,
+ "": 15,
+ "": 16,
+ "": 17,
+ "": 18,
+ "": 21,
+ "": 22,
+ "
": 23,
+ "": 24,
+ "": 25,
+ "": 26,
+ "": 27,
+ "": 28,
+ "": 29,
+ "": 30,
+ "": 31,
+ "": 32,
+ "
": 33,
+ "
": 34,
+ "
": 35,
+ "": 36,
+ "": 37,
+ "": 38,
+ "
": 39,
+ "": 40,
+ "": 41,
+ "
": 42,
+ "": 43,
+ "
": 44,
+ "
": 45,
+ "": 46,
+ "": 47,
+ "
": 48,
+ "": 49,
+ "": 50,
+ "": 51,
+ "0": 52,
+ "1": 53,
+ "2": 54,
+ "3": 55,
+ "4": 56,
+ "5": 57,
+ "6": 58,
+ "7": 59,
+ "8": 60,
+ "9": 61,
+ "+": 62,
+ "-": 63,
+ "=": 64,
+ ",": 65,
+ "。": 66,
+ "!": 67,
+ "?": 68,
+ "、": 69,
+ ":": 70,
+ "¥": 71,
+ ".": 72,
+ "!": 73,
+ "?": 74,
+ "...": 75,
+ "。。。": 76,
+ "。。。。。。": 77,
+ "《": 78,
+ "》": 79,
+ "【": 80,
+ "】": 81,
+ "『": 82,
+ "』": 83,
+ "```": 84,
+ "": 86,
+ "---": 87,
+ "": 88,
+ ";": 89,
+ ".": 90,
+ "=": 91,
+ "<": 92,
+ ">": 93,
+ "-": 94,
+ "+": 95,
+ "%": 96,
+ "‼": 97,
+ "㊣": 98,
+ "/": 99,
+ "|": 100,
+ "": 101,
+ "": 102,
+ "[": 103,
+ "]": 104,
+ "": 105,
+ "": 106,
+ "": 107,
+ "": 108,
+ "": 109,
+ "": 110,
+ "": 111,
+ "": 112,
+ "": 113,
+ "": 114,
+ "": 115,
+ "": 116,
+ "": 117,
+ "": 118,
+ "": 119,
+ "": 120,
+ "": 121,
+ "": 122,
+ "": 123,
+ "": 124,
+ "": 125,
+ "": 126,
+ "": 127,
+ "": 128,
+ "": 129,
+ "": 130,
+ "": 131,
+ "": 132,
+ "": 133,
+ "": 134,
+ "": 135,
+ "": 136,
+ "": 137,
+ "": 138,
+ "": 139,
+ "": 140,
+ "": 141,
+ "": 142,
+ "": 143,
+ "": 144,
+ "": 145,
+ "": 146,
+ "": 147,
+ "": 148,
+ "": 149,
+ "": 150,
+ "": 151,
+ "": 152,
+ "": 153,
+ "": 154,
+ "": 155,
+ "": 156,
+ "": 157,
+ "": 158,
+ "": 159,
+ "": 160,
+ "": 161,
+ "": 162,
+ "": 163,
+ "": 164,
+ "": 165,
+ "": 166,
+ "": 167,
+ "": 168,
+ "": 169,
+ "": 170,
+ "": 171,
+ "": 172,
+ "": 173,
+ "": 174,
+ "": 175,
+ "": 176,
+ "": 177,
+ "": 178,
+ "": 179,
+ "": 180,
+ "": 181,
+ "": 182,
+ "": 183,
+ "": 184,
+ "": 185,
+ "": 186,
+ "": 187,
+ "": 188,
+ "": 189,
+ "": 190,
+ "": 191,
+ "": 192,
+ "": 193,
+ "": 194,
+ "": 195,
+ "": 196,
+ "": 197,
+ "": 198,
+ "": 199,
+ "": 200,
+ "": 201,
+ "": 202,
+ "": 203,
+ "": 204,
+ "": 205,
+ "": 206,
+ "": 207,
+ "": 208,
+ "": 209,
+ "": 210,
+ "": 211,
+ "": 212,
+ "": 213,
+ "": 214,
+ "": 215,
+ "": 216,
+ "": 217,
+ "": 218,
+ "": 219,
+ "": 220,
+ "": 221,
+ "": 222,
+ "": 223,
+ "": 224,
+ "": 225,
+ "": 226,
+ "": 227,
+ "": 228,
+ "": 229,
+ "": 230,
+ "": 231,
+ "": 232,
+ "": 233,
+ "": 234,
+ "": 235,
+ "": 236,
+ "": 237,
+ "": 238,
+ "": 239,
+ "": 240,
+ "": 241,
+ "": 242,
+ "": 243,
+ "": 244,
+ "": 245,
+ "": 246,
+ "": 247,
+ "": 248,
+ "": 249,
+ "": 250,
+ "": 251,
+ "": 252,
+ "": 253,
+ "": 254,
+ "": 255,
+ "": 256,
+ "": 257,
+ "": 258,
+ "": 259,
+ "": 260,
+ "": 261,
+ "": 262,
+ "": 263,
+ "": 264,
+ "": 265,
+ "": 266,
+ "": 267,
+ "": 268,
+ "": 269,
+ "": 270,
+ "": 271,
+ "": 272,
+ "": 273,
+ "": 274,
+ "": 275,
+ "": 276,
+ "": 277,
+ "": 278,
+ "": 279,
+ "": 280,
+ "": 281,
+ "": 282,
+ "": 283,
+ "": 284,
+ "": 285,
+ "": 286,
+ "": 287,
+ "": 288,
+ "": 289,
+ "": 290,
+ "": 291,
+ "": 292,
+ "": 293,
+ "": 294,
+ "": 295,
+ "": 296,
+ "": 297,
+ "": 298,
+ "": 299,
+ "": 300,
+ "": 301,
+ "": 302,
+ "": 303,
+ "": 304,
+ "": 305,
+ "": 306,
+ "": 307,
+ "": 308,
+ "": 309,
+ "": 310,
+ "": 311,
+ "": 312,
+ "": 313,
+ "": 314,
+ "": 315,
+ "": 316,
+ "": 317,
+ "": 318,
+ "": 319,
+ "": 320,
+ "": 321,
+ "": 322,
+ "": 323,
+ "": 324,
+ "": 325,
+ "": 326,
+ "": 327,
+ "": 328,
+ "": 329,
+ "": 330,
+ "": 331,
+ "": 332,
+ "": 333,
+ "": 334,
+ "": 335,
+ "": 336,
+ "": 337,
+ "": 338,
+ "": 339,
+ "": 340,
+ "": 341,
+ "": 342,
+ "": 343,
+ "": 344,
+ "": 345,
+ "": 346,
+ "": 347,
+ "": 348,
+ "": 349,
+ "": 350,
+ "": 351,
+ "": 352,
+ "": 353,
+ "": 354,
+ "": 355,
+ "": 356,
+ "": 357,
+ "": 358,
+ "": 359,
+ "": 360,
+ "": 361,
+ "