|
{ |
|
"metadata": { |
|
"ParamSize": 33, |
|
"ParamBytes": 4282474496.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1050673152, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
128256, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1050673152, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa1a1ca289e5694f2ee44cc47950e0e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "56e7d28c0f5e86717bd131ea46f9f79e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "639abcc4663d290d344b1814c75bb7e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d0e9d6293f83e42e73b64c57765ac24c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00826248df28e584f000ce7b3e4ec8e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fdd75e8ab9e3af1d26b52b19ceda746a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3b339ff27fe9379fd98ed88d035e9b9f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1050673152, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
128256, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1050673152, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f3e51d826d2282756df3e1a3dd698947" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b8724143febdf83f51cd0310f86448be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "20ead6d113a80e2137b42f84623ba729" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b00c92fb6a054a9ad1c8110737659687" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "71679feab581013be052c1836ea597fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9be34908827dd606624ae87390fcb572" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2df41a087cf1c7338302f4f52258f637" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f20a0e71053cb6d3288e7e4578946d2e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a4ce2d4b7733216063c2bbb06c5e44da" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 117440512, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.weight", |
|
"shape": [ |
|
4096, |
|
14336 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 117440512, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "632764617fc0e6258125ed1c2cf3f0ad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 234881024, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
28672, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 234881024, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e4d1ad55fdc9d05c1d91780a9e6c6315" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "115a03ba5f66ac7212e188b2833d66e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f72351733f43fe1b714fdeff9431d06c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
6144, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "57090642e6486710fa337cf707ea318d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.weight", |
|
"shape": [ |
|
4096, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9df3b73c90337942f12d7816b1775e34" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 8192 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 16384 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 24576 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 32768 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 40960 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 49152 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 57344 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 65536 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 73728 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 81920 |
|
} |
|
], |
|
"md5sum": "9b24503c638863920e39ebbaf5ce6635" |
|
} |
|
] |
|
} |