{ "metadata": { "ParamSize": 724, "ParamBytes": 2340679936.0, "BitsPerParam": 4.625711599312261 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 39502848, "records": [ { "name": "lm_head.q_weight", "shape": [ 32064, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39502848, "byteOffset": 0 } ], "md5sum": "d7e1c53f4ed3388ab3651a4ac7fe5c3b" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.18.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "bbe371e4c6556993e74003d072d5a519" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 31588480, "records": [ { "name": "lm_head.q_scale", "shape": [ 32064, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4937856, "byteOffset": 0 }, { "name": "model.h.18.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 4937856 }, { "name": "model.h.18.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 4944000 }, { "name": "model.h.18.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 15020160 }, { "name": "model.h.18.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 16279680 }, { "name": "model.h.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18802816 }, { "name": "model.h.18.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 18808960 }, { "name": "model.h.18.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 30163072 }, { "name": "model.h.19.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31582336 } ], "md5sum": "8eb6f7a52bf759728f53f0245c72a859" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 31520768, "records": [ { "name": "model.h.19.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 0 }, { "name": "model.h.19.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 10076160 }, { "name": "model.h.19.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 11335680 } ], "md5sum": "2ba63895330a62bc1318c87c5d5072ce" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.20.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "9456eef90ffdc61e81ffe0800109e762" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33431552, "records": [ { "name": "model.h.19.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 0 }, { "name": "model.h.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 2523136 }, { "name": "model.h.19.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 2529280 }, { "name": "model.h.19.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 6313984 }, { "name": "model.h.19.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 6787072 }, { "name": "model.h.19.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 18141184 }, { "name": "model.h.20.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19560448 }, { "name": "model.h.20.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 19566592 }, { "name": "model.h.20.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 29642752 }, { "name": "model.h.20.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 30902272 }, { "name": "model.h.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33425408 } ], "md5sum": "cd6b9aa2b41c2140645eefb808a8a2e3" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.21.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "4ec6c96f6227fce6dbe6a251dad201f3" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.20.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.20.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.20.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.20.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.21.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.21.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.21.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.21.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "30f769a15fbdac43ca45e6812ffecbfb" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.22.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "3e20b5efdce2e340dbb820d88b3cb7e0" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.21.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.21.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.21.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.21.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.22.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.22.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.22.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.22.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "5bbad59fbf3fd94682eb75a071e0f264" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.23.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "5c710bda058fe56ce812112e1b0f0ee0" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.22.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.22.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.22.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.22.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.23.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.23.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.23.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.23.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "7a15e2962ed42b63453eaecd837ff275" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.24.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "0a10e859199899afd04e346c84ef3eef" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.23.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.23.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.23.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.23.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.24.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.24.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.24.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.24.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "6bc76013f6066d9569517976841883eb" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.25.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "f734322260c53272938e682d4441e076" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.24.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.24.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.24.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.24.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.25.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.25.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.25.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.25.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "ad714acba2104718fb5491a432b197d1" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.26.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "348d7841fb570d2dc49bbb93d3679134" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.25.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.25.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.25.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.25.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.26.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.26.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.26.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.26.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "6179ab639c1bda89b3c90cb50ae11762" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.27.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "336989816dd64c49e7eadbdea61ca170" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.26.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.26.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.26.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.26.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.27.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.27.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.27.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.27.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "da3b6c34fb6d830d6245f9c61818a068" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.28.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "0640e92aa128f4e05a58d7c5cc356006" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.27.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.27.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.27.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.27.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.28.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.28.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.28.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.28.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.28.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "db3317dfde5ff5e937017d05543c3792" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.29.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "0e9f6c20144e7d9da545ac535987d49c" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.28.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.28.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.28.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.28.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.29.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.29.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.29.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.29.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.29.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "5cd3f1298c7d3e2eba61c2dccaa51fa1" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.30.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "1d625599633f2e8fdd20ea38072c1a3a" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.29.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.29.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.29.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.29.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.30.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.30.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.30.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.30.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.30.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "430a7c4d1d69396f3ec1eb880b7f716f" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.31.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "25077b71bded2893f97467c767447e8f" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.30.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.30.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.30.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.30.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.31.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.31.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.31.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.31.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.31.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "2239b285a54e01bb80bccbf22c562d3f" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 39502848, "records": [ { "name": "model.embd.q_weight", "shape": [ 32064, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39502848, "byteOffset": 0 } ], "md5sum": "5e7537fcb657900a598019c4ea92dc0a" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.0.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "8409accc0efa14b9bd2212b965b6d449" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33316992, "records": [ { "name": "model.h.31.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.31.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.31.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.31.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.embd.q_scale", "shape": [ 32064, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4937856, "byteOffset": 17037312 }, { "name": "model.h.0.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21975168 }, { "name": "model.h.0.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 21981312 }, { "name": "model.h.0.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 32057472 } ], "md5sum": "897b4fd728d5eaf06f7e5e8eb9fff479" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.1.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "d3bc8dfb66a961ed8ff8d82d62f891e6" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33431552, "records": [ { "name": "model.h.0.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 0 }, { "name": "model.h.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 2523136 }, { "name": "model.h.0.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 2529280 }, { "name": "model.h.0.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 6313984 }, { "name": "model.h.0.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 6787072 }, { "name": "model.h.0.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 18141184 }, { "name": "model.h.1.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19560448 }, { "name": "model.h.1.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 19566592 }, { "name": "model.h.1.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 29642752 }, { "name": "model.h.1.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 30902272 }, { "name": "model.h.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33425408 } ], "md5sum": "9fa4180ca7f3d8d2671e424df58f7d33" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.10.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "a3f57885456413befe772a61db9fe003" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.1.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.1.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.1.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.1.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.10.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.10.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.10.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.10.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "939e4d0a910d890d33ab3aa4cfae6527" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.11.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "ba8cc1009d70aa580a3c3464ddcf2e1b" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.10.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.10.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.10.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.10.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.11.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.11.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.11.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.11.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "cf73ece61d143b5685eef41c20229c42" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.12.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "62f2abbb1faa52d0bce9d55753794a88" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.11.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.11.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.11.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.11.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.12.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.12.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.12.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.12.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "d2acdc47a3bc43aa505032d2bb8c15f3" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.13.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "bb5529cdb10895082d3b2e5293f6d1cd" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.12.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.12.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.12.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.12.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.13.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.13.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.13.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.13.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "98ef5b4e63d4701a25201cdc3c05bf12" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.14.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "24522563518632d9a17f4166066cb200" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.13.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.13.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.13.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.13.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.14.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.14.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.14.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.14.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "8d3fab7847a5c022d510e83f15c23ad7" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.15.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "215dc6ff0a1f211c3727dcc991c46cf1" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.14.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.14.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.14.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.14.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.15.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.15.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.15.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.15.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "72866c136adefde27ba78274ed57619a" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.16.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "7f02d5ccf7cc2c4370777d6ae00b1244" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.15.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.15.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.15.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.15.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.16.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.16.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.16.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.16.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "30abcca65e41086ad0667aacaf53384b" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.17.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "69a2467ff910a2811ef2c4542bc4fe56" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.16.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.16.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.16.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.16.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.17.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.17.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.17.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.17.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "a22c8fb5e9fed8eeee2e25e97eebabaa" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.2.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "de1a4314be8c30ee24e9f222493b0f5b" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 32630784, "records": [ { "name": "model.h.17.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.17.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.17.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.17.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.18.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 17031168 }, { "name": "model.h.18.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 20815872 }, { "name": "model.h.2.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21288960 }, { "name": "model.h.2.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 21295104 }, { "name": "model.h.2.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 31371264 } ], "md5sum": "e236c8247a12c058d052abe0ae7fc877" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.3.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "8d311f71a279bc57e82db1bfee4377e0" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33431552, "records": [ { "name": "model.h.2.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 0 }, { "name": "model.h.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 2523136 }, { "name": "model.h.2.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 2529280 }, { "name": "model.h.2.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 6313984 }, { "name": "model.h.2.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 6787072 }, { "name": "model.h.2.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 18141184 }, { "name": "model.h.3.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19560448 }, { "name": "model.h.3.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 19566592 }, { "name": "model.h.3.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 29642752 }, { "name": "model.h.3.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 30902272 }, { "name": "model.h.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33425408 } ], "md5sum": "64e85992e12b41d931b4135988285364" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.4.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "c9311d3be9641447bf58117d7e8c58a7" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.3.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.3.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.3.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.3.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.4.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.4.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.4.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.4.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "d621aecf6605c376c716c5740763cd54" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.5.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "d3b2599308c53564ab04eb7c13b15fad" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.4.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.4.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.4.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.4.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.5.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.5.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.5.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.5.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "efdbce3b0bdac0012a5351dde276b4cc" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.6.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "5e4f4154f7e37074b030ef2ba84f99fe" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.5.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.5.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.5.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.5.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.6.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.6.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.6.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.6.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "852443b009f7b915ac3d3b9058bd8473" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.7.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "e7e25940d67c00c1b8d30e56ea7d5fd4" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.6.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.6.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.6.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.6.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.7.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.7.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.7.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.7.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "94d1dfa1be75385702998e301115055a" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.8.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "4608f00654ceaa9953c3daccb281330b" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.7.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.7.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.7.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.7.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.8.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.8.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.8.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.8.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "9bd35cc0fed109dedcb6e04ff7d162d2" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "model.h.9.mlp.gate_up_proj.q_weight", "shape": [ 16384, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "5f3ccfc709639df0a73db5ee3a960fb7" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 30902272, "records": [ { "name": "model.h.8.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.8.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.8.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.8.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "model.h.9.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17031168 }, { "name": "model.h.9.mlp.down_proj.q_weight", "shape": [ 3072, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10076160, "byteOffset": 17037312 }, { "name": "model.h.9.mlp.down_proj.q_scale", "shape": [ 3072, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1259520, "byteOffset": 27113472 }, { "name": "model.h.9.mlp.gate_up_proj.q_scale", "shape": [ 16384, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 28372992 }, { "name": "model.h.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30896128 } ], "md5sum": "ad1237b06fbd0528c9f8e23cf79a8891" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 27834368, "records": [ { "name": "model.h.9.mixer.out_proj.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 0 }, { "name": "model.h.9.mixer.out_proj.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 3784704 }, { "name": "model.h.9.mixer.qkv_proj.q_weight", "shape": [ 9216, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11354112, "byteOffset": 4257792 }, { "name": "model.h.9.mixer.qkv_proj.q_scale", "shape": [ 9216, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1419264, "byteOffset": 15611904 }, { "name": "vision_embed_tokens.glb_GN", "shape": [ 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17031168 }, { "name": "vision_embed_tokens.img_processor.vision_model.embeddings.class_embedding", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 17039360 }, { "name": "vision_embed_tokens.img_processor.vision_model.embeddings.patch_embedding.weight", "shape": [ 1024, 3, 14, 14 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1204224, "byteOffset": 17041408 }, { "name": "vision_embed_tokens.img_processor.vision_model.embeddings.position_embedding.weight", "shape": [ 577, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1181696, "byteOffset": 18245632 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19427328 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19429376 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19431424 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19433472 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19435520 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 19443712 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27832320 } ], "md5sum": "e9f4130276ed5535299d74f5e68e0597" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "12ccc3786467895471d31bb5b1d2610c" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "dcc2adf61aa07fe189bb15874110e845" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "9f8d49d8894a05f4179ba707925e9d59" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "19b6f5d2cfc839108f9156b8b521d3ce" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "3e6322c0673873144d67964c709fa3be" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "f04370015a40ae2073f5d89ad86f1c42" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "e2c99d455ee851ede30d282481166453" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "11c75f74757caccc61b9f6c3f39a9737" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "a7f57fd25793b85aa6027a514efc39fb" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "e13228942bc433e638fe3903001715f3" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "eab0a6fc1a1e3beac991a0be01fdefe8" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "afcfd0263f305fcc63e7b86393753bf7" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "460a8bc76edf812f85886ec9f950f395" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "bcac984764bba0a4eb9891d2043e1d89" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "3197e582d745d84c2ab4347a1f2d4e2d" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "e6d67ae9f4dab5ea49fac79bceac7a86" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "1a74510bf14d6fc06b53f09ee6a22fdd" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "75a9ee23e852e5562e2176a5ece04f24" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "da3f4d64dad38aa57a47fd6fbca1a334" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "ca1eb7481171d37cc9fab4b3432d81d1" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "03be4317a6855a15625866d5dde56763" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "7464cccd529310def6e3920c637ad62c" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc1.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16801792 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "8be48481c922751c9b723a99e5dcd727" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 26767360, "records": [ { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc2.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8388608 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.k_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8390656 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10487808 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.out_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 10489856 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12587008 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.q_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 12589056 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14686208 }, { "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.v_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14688256 }, { "name": "vision_embed_tokens.img_processor.vision_model.post_layernorm.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "vision_embed_tokens.img_processor.vision_model.post_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "vision_embed_tokens.img_processor.vision_model.pre_layrnorm.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "vision_embed_tokens.img_processor.vision_model.pre_layrnorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16791552 }, { "name": "vision_embed_tokens.img_projection.linear_1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 16793600 }, { "name": "vision_embed_tokens.img_projection.linear_1.q_weight", "shape": [ 3072, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5062656, "byteOffset": 16799744 }, { "name": "vision_embed_tokens.img_projection.linear_1.q_scale", "shape": [ 3072, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 632832, "byteOffset": 21862400 }, { "name": "vision_embed_tokens.img_projection.linear_2.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 22495232 }, { "name": "vision_embed_tokens.img_projection.linear_2.q_weight", "shape": [ 3072, 308 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3784704, "byteOffset": 22501376 }, { "name": "vision_embed_tokens.img_projection.linear_2.q_scale", "shape": [ 3072, 77 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 473088, "byteOffset": 26286080 }, { "name": "vision_embed_tokens.sub_GN", "shape": [ 1, 1, 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26759168 } ], "md5sum": "91fb7eb9ed733756ed0c89a04c1a2657" } ] }