{ "metadata": { "ParamSize": 1037, "ParamBytes": 1795829128.0, "BitsPerParam": 4.507975050351794 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "language_model.lm_head.linear.q_weight", "shape": [ 51200, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "37e46f11810799c6848ec0f42b2bad32" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 32927840, "records": [ { "name": "language_model.lm_head.linear.bias", "shape": [ 51200 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 0 }, { "name": "language_model.lm_head.linear.q_scale", "shape": [ 51200, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 102400 }, { "name": "language_model.lm_head.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8294400 }, { "name": "language_model.lm_head.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8299520 }, { "name": "multi_modal_projector.linear_1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8304640 }, { "name": "multi_modal_projector.linear_1.q_weight", "shape": [ 2560, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1474560, "byteOffset": 8309760 }, { "name": "multi_modal_projector.linear_1.q_scale", "shape": [ 2560, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 184320, "byteOffset": 9784320 }, { "name": "multi_modal_projector.linear_2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9968640 }, { "name": "multi_modal_projector.linear_2.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9973760 }, { "name": "multi_modal_projector.linear_2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13250560 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13660160 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13662464 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13664768 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13667072 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 13669376 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 13677984 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 16157088 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16466976 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 16469280 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 18957600 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19268640 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 19270944 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 19934496 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20017440 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 20019744 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 20683296 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20766240 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 20768544 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 21432096 }, { "name": "vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 21515040 }, { "name": "vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 21517344 }, { "name": "vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 21519648 }, { "name": "vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 21521952 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 21524256 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 21532864 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 24011968 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24321856 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 24324160 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 26812480 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27123520 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 27125824 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27789376 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27872320 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 27874624 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 28538176 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28621120 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 28623424 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 29286976 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29369920 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 29372224 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 30035776 }, { "name": "vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30118720 }, { "name": "vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30121024 }, { "name": "vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30123328 }, { "name": "vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30125632 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 30127936 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 30136544 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 32615648 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32925536 } ], "md5sum": "de99909620504d366a40228af3b0ad80" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "language_model.transformer.embd.q_weight", "shape": [ 51200, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "f51ef4fecf127db6f9a66e63238b1117" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 28783360, "records": [ { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 2488320 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2799360 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 2801664 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 3465216 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 3548160 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 3550464 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 4214016 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4296960 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 4299264 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 4962816 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5045760 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 5048064 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 5711616 }, { "name": "language_model.transformer.embd.q_scale", "shape": [ 51200, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 5794560 }, { "name": "language_model.transformer.h.0.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13986560 }, { "name": "language_model.transformer.h.0.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13991680 }, { "name": "language_model.transformer.h.0.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13996800 }, { "name": "language_model.transformer.h.0.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14012160 }, { "name": "language_model.transformer.h.0.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 23842560 }, { "name": "language_model.transformer.h.0.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25071360 }, { "name": "language_model.transformer.h.0.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25076480 }, { "name": "language_model.transformer.h.0.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 28353280 }, { "name": "language_model.transformer.h.0.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 28762880 } ], "md5sum": "996c972e6a50851504cb62b5ba7555a8" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.0.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.0.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.0.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.0.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.0.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.1.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.1.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.1.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "571c5c04fd037a0fd544b9bbf68ea3d6" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.1.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.1.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.1.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.1.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.1.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.1.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.1.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.1.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.1.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "41028762a061624199b98826713af0e7" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.1.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.1.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.2.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.2.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.2.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.2.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.2.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.2.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.2.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.2.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.2.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "b9413a3de8bfa7fa9a7f0ea681e195c6" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.2.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.2.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.2.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.2.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.2.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.3.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.3.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.3.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "20c80bdc671db2ccc656926d056fbc64" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.3.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.3.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.3.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.3.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.3.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.3.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.3.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.3.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.3.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "553159ff3a6c36a9572225546a56c1b5" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.3.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.3.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.4.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.4.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.4.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.4.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.4.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.4.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.4.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.4.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.4.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "b1ec613af5174989d93a269019161b8e" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.4.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.4.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.10.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.10.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.10.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.10.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.10.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.10.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.10.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.10.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.10.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "ba98c82efae960c3e5870c3ac1bfa6ed" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29511680, "records": [ { "name": "language_model.transformer.h.10.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.10.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.10.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.10.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.10.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.11.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.11.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.4.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29506560 } ], "md5sum": "11d024365e5c74254ba8f9e5518a6f28" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.4.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.4.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.5.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.5.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.5.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.5.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.5.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.5.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.5.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.5.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.5.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "ce0f9becc7c3314efe6649baeb8c2d18" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.5.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.5.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.5.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.5.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.5.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.6.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.6.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.6.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "c69593f6b14c748edf1293223697b48a" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.6.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.6.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.6.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.6.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.6.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.6.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.6.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.6.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.6.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "9c8df91ac211f37acaf60dd1182da437" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.6.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.6.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.7.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.7.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.7.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.7.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.7.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.7.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.7.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.7.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.7.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "9e42d8449007cb58e05e06cad4c2aa99" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.7.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.7.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.7.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.7.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.7.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.8.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.8.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.8.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "5ca02d95c7b4a4775fa3fef99745435a" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.8.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.8.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.8.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.8.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.8.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.8.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.8.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.8.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.8.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "1b10f1d22ff001c6a30e33bf653b6516" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.8.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.8.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.9.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.9.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.9.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.9.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.9.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.9.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.9.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.9.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.9.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "84ce64766b91f153984ba0818c772fdc" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29511680, "records": [ { "name": "language_model.transformer.h.9.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.9.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.9.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.9.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.9.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.11.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29496320 } ], "md5sum": "f434f177f65ca7dca032b7940afc159d" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.11.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.11.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.11.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.11.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.11.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.11.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.11.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.11.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.11.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "e598b7b1e0a99d9b57df329238188ff4" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.11.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.11.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.12.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.12.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.12.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.12.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.12.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.12.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.12.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.12.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.12.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "065394e5d835cc1f63a0d48e4ef1f22b" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.12.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.12.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.12.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.12.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.12.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.13.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.13.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.13.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "039bdc13d850cb2a3abd5074f04df26f" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.13.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.13.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.13.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.13.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.13.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.13.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.13.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.13.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.13.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "6835254a173ad5944844eaec1237b751" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.13.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.13.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.14.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.14.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.14.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.14.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.14.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.14.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.14.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.14.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.14.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "bc21ebf66c385c2ffdb62217b2e2da54" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.14.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.14.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.14.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.14.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.14.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.15.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.15.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.15.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "74ae1316ad3b7071457ace54e2a70a2a" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.15.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.15.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.15.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.15.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.15.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.15.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.15.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.15.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.15.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "435888735218676557f5b7d80cb9a1ac" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.15.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.15.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.16.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.16.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.16.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.16.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.16.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.16.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.16.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.16.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.16.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "f32a623f47e8ef9daf1ec1c5035e57a3" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.16.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.16.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.16.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.16.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.16.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.17.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.17.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.17.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "23a5a79f7e51a694e4a4349ff3f644b2" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.17.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.17.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.17.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.17.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.17.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.17.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.17.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.17.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.17.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "fb82ce20e3b0783d9051dbd15ef4c70c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.17.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.17.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.18.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.18.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.18.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.18.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.18.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.18.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.18.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.18.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.18.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "0b9dee3726af9d8c3b8de59c06549446" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.18.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.18.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.18.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.18.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.18.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.19.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.19.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.19.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "0d2dc36ab864d531c1bfa2db3849d62f" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.19.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.19.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.19.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.19.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.19.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.19.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.19.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.19.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.19.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "012e633c760af890ebaf3874a39191c0" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.19.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.19.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.20.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.20.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.20.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.20.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.20.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.20.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.20.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.20.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.20.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "a3a502934dbd489c58495232eaa4c0bb" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.20.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.20.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.20.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.20.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.20.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.21.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.21.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.21.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "28418c9dd723f07d16e42c7c09af7429" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.21.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.21.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.21.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.21.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.21.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.21.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.21.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.21.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.21.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "d42a248d06df476cd45d4656069964a2" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.21.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.21.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.22.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.22.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.22.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.22.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.22.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.22.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.22.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.22.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.22.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "ca885eec9ea21292d13c9782a310be78" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.22.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.22.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.22.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.22.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.22.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.23.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.23.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.23.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "d54a32a842882b44fb53607299a630b2" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.23.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.23.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.23.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.23.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.23.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.23.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.23.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.23.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.23.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "27e2174166657b7bd355d38b748c5fc7" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.23.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.23.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.24.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.24.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.24.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.24.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.24.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.24.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.24.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.24.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.24.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "0421b3ad1921d87a1ce41c2e1ba81434" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.24.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.24.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.24.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.24.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.24.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.25.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.25.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.25.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "57393a57045d239596e370648af83490" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.25.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.25.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.25.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.25.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.25.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.25.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.25.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.25.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.25.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "4bc4333ceee9333d468765dbc0e4fa9b" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.25.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.25.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.26.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.26.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.26.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.26.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.26.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.26.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.26.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.26.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.26.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "b82398bd49332d746f1291f2b9759277" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.26.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.26.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.26.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.26.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.26.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.27.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.27.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.27.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "2099714907342fa66e6921a4a8a31f67" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.27.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.27.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.27.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.27.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.27.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.27.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.27.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.27.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.27.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "fc59dad9ba58fea18fb47e2a641c39c8" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.27.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.27.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.28.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.28.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.28.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.28.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.28.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.28.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.28.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.28.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.28.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "d81363f377b10aeb22bbf90528a6201e" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.28.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.28.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.28.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.28.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.28.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.29.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.29.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.29.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "e49db70462cfdf0fb692ffd1842c9bb3" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.29.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.29.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.29.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.29.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.29.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.29.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.29.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.29.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.29.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "522540231e1bafcbda4a79ad1869937e" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.29.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.29.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.30.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.30.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.30.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.30.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.30.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.30.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.30.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.30.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.30.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "fd04b7da457b32ae9434c8ca4daf22f2" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.30.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.30.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.30.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.30.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.30.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.31.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.31.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.31.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "d72095c153b5edb008c3153baefb3146" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.31.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.31.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.31.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.31.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.31.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.31.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.31.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.31.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.31.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "d501c8b2cba062d59e66cc236300610d" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33035912, "records": [ { "name": "language_model.transformer.h.31.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.31.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "vision_tower.vision_model.embeddings.patch_embedding.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14745600 }, { "name": "vision_tower.vision_model.embeddings.patch_embedding.weight", "shape": [ 1152, 3, 14, 14 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1354752, "byteOffset": 14747904 }, { "name": "vision_tower.vision_model.embeddings.position_embedding.q_weight", "shape": [ 729, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 419904, "byteOffset": 16102656 }, { "name": "vision_tower.vision_model.embeddings.position_embedding.q_scale", "shape": [ 729, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52488, "byteOffset": 16522560 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16575048 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16577352 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16579656 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16581960 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 16584264 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 16592872 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 19071976 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19381864 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 19384168 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 21872488 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22183528 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 22185832 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 22849384 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22932328 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 22934632 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 23598184 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23681128 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23683432 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24346984 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24429928 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 24432232 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 25095784 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25178728 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25181032 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25183336 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25185640 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 25187944 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 25196552 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 27675656 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27985544 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 27987848 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 30476168 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30787208 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 30789512 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 31453064 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31536008 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31538312 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32201864 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32284808 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 32287112 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32950664 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33033608 } ], "md5sum": "837599c87c0e77b8d16be19f4b8112fd" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 32917120, "records": [ { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 663552 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 746496 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 748800 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 751104 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 753408 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 755712 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 764320 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 3243424 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 3553312 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 3555616 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 6043936 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 6354976 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 6357280 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7020832 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7103776 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7106080 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7769632 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7852576 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7854880 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 8518432 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8601376 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 9267232 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9350176 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9352480 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9354784 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9357088 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 9359392 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 9368000 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 11847104 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12156992 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 12159296 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 14647616 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14958656 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 14960960 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 15624512 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15707456 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 15709760 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 16373312 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16456256 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16458560 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17122112 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17205056 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17870912 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17953856 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17956160 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17958464 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17960768 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 17963072 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 17971680 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 20450784 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20760672 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 20762976 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 23251296 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23562336 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23564640 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24228192 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24311136 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 24313440 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24976992 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25059936 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25062240 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 25725792 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25808736 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25811040 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 26474592 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26557536 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26559840 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26562144 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26564448 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 26566752 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 26575360 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 29054464 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29364352 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 29366656 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 31854976 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32166016 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 32168320 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32831872 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32914816 } ], "md5sum": "fe083e730c8f9458c21cc7c2eae8fe78" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33352576, "records": [ { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 663552 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 746496 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 748800 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 1412352 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1495296 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 1497600 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 2161152 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2244096 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2246400 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2248704 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2251008 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 2253312 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 2261920 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 4741024 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5050912 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 5053216 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 7541536 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7852576 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7854880 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 8518432 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8601376 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 9267232 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9350176 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 9352480 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 10016032 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10098976 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 10101280 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 10764832 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10847776 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10850080 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10852384 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10854688 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 10856992 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 10865600 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 13344704 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13654592 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 13656896 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 16145216 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16456256 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16458560 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17122112 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17205056 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17870912 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17953856 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17956160 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 18619712 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 18702656 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 18704960 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 19368512 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19451456 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19453760 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19456064 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19458368 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 19460672 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 19469280 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 21948384 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22258272 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 22260576 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 24748896 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25059936 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25062240 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 25725792 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25808736 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25811040 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 26474592 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26557536 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 26559840 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27223392 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27306336 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 27308640 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27972192 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28055136 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28057440 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28059744 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28062048 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 28064352 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 28072960 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 30552064 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30861952 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 30864256 } ], "md5sum": "c7457def2000b9f9ff2575ffa20fe522" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 31926400, "records": [ { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 311040 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 313344 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 976896 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1059840 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 1062144 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 1725696 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1808640 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 1810944 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 2474496 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2557440 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 2559744 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 3223296 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 3306240 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 3308544 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 3310848 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 3313152 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 3315456 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 3324064 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 5803168 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 6113056 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 6115360 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8914720 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 8917024 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 9580576 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9663520 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 9665824 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 10329376 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10412320 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 10414624 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 11078176 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11161120 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 11163424 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 11826976 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11909920 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11912224 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11914528 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11916832 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 11919136 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 11927744 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 14406848 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14716736 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 14719040 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17518400 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17520704 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 18184256 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 18267200 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 18269504 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 18933056 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19016000 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 19018304 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 19681856 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19764800 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 19767104 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 20430656 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20513600 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20515904 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20518208 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20520512 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 20522816 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 20531424 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 23010528 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23320416 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 23322720 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 25811040 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26122080 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 26124384 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 26787936 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26870880 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 26873184 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27536736 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27619680 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 27621984 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 28285536 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28368480 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 28370784 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 29034336 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29117280 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29119584 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29121888 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29124192 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 29126496 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 29135104 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 31614208 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31924096 } ], "md5sum": "f66d4ff6894c43830ca9c2f62ab39a79" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 32372224, "records": [ { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 2488320 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2799360 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 2801664 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 3465216 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 3548160 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 3550464 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 4214016 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4296960 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 4299264 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 4962816 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5045760 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 5048064 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 5711616 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5794560 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5796864 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5799168 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5801472 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 5803776 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 5812384 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 8291488 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8601376 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 11092000 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11403040 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 11405344 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 12068896 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12151840 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 12154144 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 12817696 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12900640 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 12902944 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 13566496 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13649440 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 13651744 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 14315296 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14398240 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14400544 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14402848 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14405152 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 14407456 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 14416064 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 16895168 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17205056 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 19695680 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20006720 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 20009024 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 20672576 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20755520 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 20757824 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 21421376 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 21504320 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 21506624 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 22170176 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22253120 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 22255424 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 22918976 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23001920 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23004224 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 23667776 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23750720 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23753024 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23755328 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23757632 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 23759936 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 23768544 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 26247648 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26557536 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 26559840 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 29048160 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29359200 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 29361504 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 30025056 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30108000 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 30110304 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 30773856 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30856800 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 30859104 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 31522656 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31605600 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31607904 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32271456 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32354400 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32356704 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32359008 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32361312 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 32363616 } ], "md5sum": "4cb4eb039a7e4adb8aa9fb50ee2e4148" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 32901600, "records": [ { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 2479104 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2788992 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 2791296 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 5279616 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5590656 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 5592960 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 6256512 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 6339456 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 6341760 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7005312 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7088256 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7090560 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7754112 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7837056 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7839360 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 8502912 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8585856 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8588160 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8590464 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8592768 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 8595072 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 11082784 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11392672 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 11394976 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 13883296 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14194336 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 14196640 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 14860192 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14943136 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 14945440 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 15608992 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15691936 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 15694240 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 16357792 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16440736 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16443040 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17106592 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17189536 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17191840 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17194144 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17196448 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 17198752 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 19686464 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19996352 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 19998656 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 22486976 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22798016 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 22800320 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 23463872 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23546816 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23549120 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24212672 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24295616 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 24297920 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24961472 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25044416 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25046720 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 25710272 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25793216 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25795520 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25797824 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25800128 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 25802432 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 25811040 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 28290144 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28600032 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 28602336 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 31090656 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31401696 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31404000 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32067552 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32150496 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 32152800 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32816352 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32899296 } ], "md5sum": "73999b155901a73b57825b4a62bf5363" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 18702656, "records": [ { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 663552 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 746496 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 748800 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 1412352 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1495296 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1497600 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1499904 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1502208 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 1504512 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 1513120 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 3992224 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4302112 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 4304416 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 6792736 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7103776 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7106080 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7769632 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7852576 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7854880 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 8518432 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8601376 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 9267232 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9350176 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 9352480 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 10016032 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10098976 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10101280 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10103584 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10105888 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 10108192 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 10116800 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 12595904 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12905792 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 12908096 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 15396416 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15707456 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 15709760 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 16373312 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16456256 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16458560 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17122112 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17205056 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17870912 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17953856 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17956160 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 18619712 } ], "md5sum": "8610acfcb3bf679640fdb61a35cf2b21" } ] }