| |
| #include "safetensors_loader.h" |
|
|
| #include <algorithm> |
| #include <chrono> |
| #include <cstdio> |
| #include <cstring> |
| #include <string> |
|
|
| int main(int argc, char** argv) { |
| std::string dir = argc > 1 ? argv[1] |
| : "/path/to/Qwen3-235B-A22B-Instruct-2507-BF16"; |
|
|
| SafetensorsLoader loader; |
| auto t0 = std::chrono::steady_clock::now(); |
| if (!loader.open(dir)) { |
| fprintf(stderr, "FAIL: open(%s)\n", dir.c_str()); |
| return 1; |
| } |
| auto t1 = std::chrono::steady_clock::now(); |
| double ms = std::chrono::duration<double, std::milli>(t1 - t0).count(); |
|
|
| size_t n = loader.tensor_count(); |
| size_t s = loader.shard_count(); |
| size_t bytes = loader.total_bytes(); |
|
|
| printf("Open took %.1f ms\n", ms); |
| printf("Shards: %zu\n", s); |
| printf("Tensors: %zu\n", n); |
| printf("Total bytes: %.2f GB\n", bytes / 1e9); |
|
|
| |
| |
| bool ok_count = (n == 36945); |
| printf("Tensor count check: %s (expected 36945)\n", ok_count ? "OK" : "FAIL"); |
|
|
| |
| auto check = [&](const std::string& name) { |
| auto* m = loader.get(name); |
| if (!m) { |
| printf(" MISSING: %s\n", name.c_str()); |
| return false; |
| } |
| printf(" OK: %s dtype=%s shape=[", name.c_str(), m->dtype.c_str()); |
| for (size_t i = 0; i < m->shape.size(); i++) printf("%s%ld", i ? "," : "", m->shape[i]); |
| printf("] shard=%d offset=%zu nbytes=%zu\n", m->shard_id, m->offset, m->nbytes); |
| return true; |
| }; |
|
|
| bool ok_names = true; |
| ok_names &= check("model.embed_tokens.weight"); |
| ok_names &= check("model.layers.0.input_layernorm.weight"); |
| ok_names &= check("model.layers.0.self_attn.q_proj.weight"); |
| ok_names &= check("model.layers.0.self_attn.k_proj.weight"); |
| ok_names &= check("model.layers.0.self_attn.o_proj.weight"); |
| ok_names &= check("model.layers.0.post_attention_layernorm.weight"); |
| ok_names &= check("model.layers.0.mlp.gate.weight"); |
| ok_names &= check("model.layers.0.mlp.experts.0.gate_proj.weight"); |
| ok_names &= check("model.layers.0.mlp.experts.127.down_proj.weight"); |
| ok_names &= check("model.layers.93.self_attn.q_proj.weight"); |
| ok_names &= check("model.norm.weight"); |
| ok_names &= check("lm_head.weight"); |
|
|
| |
| auto t_read0 = std::chrono::steady_clock::now(); |
| const void* embed_ptr = loader.data_ptr("model.embed_tokens.weight"); |
| if (embed_ptr) { |
| const uint16_t* bf16 = (const uint16_t*)embed_ptr; |
| printf("\nmodel.embed_tokens.weight first 8 BF16 raw: "); |
| for (int i = 0; i < 8; i++) printf("0x%04x ", bf16[i]); |
| printf("\n"); |
| } |
| auto t_read1 = std::chrono::steady_clock::now(); |
| double read_ms = std::chrono::duration<double, std::milli>(t_read1 - t_read0).count(); |
| printf("(first data_ptr() access including mmap: %.1f ms)\n", read_ms); |
|
|
| |
| int expert_count = 0; |
| for (auto& name : loader.list_tensor_names()) { |
| if (name.find("layers.0.mlp.experts.") != std::string::npos) expert_count++; |
| } |
| bool ok_expert = (expert_count == 384); |
| printf("\nLayer 0 expert tensor count: %d (expected 384) %s\n", |
| expert_count, ok_expert ? "OK" : "FAIL"); |
|
|
| bool pass = ok_count && ok_names && ok_expert && (embed_ptr != nullptr); |
| printf("\n%s\n", pass ? "=== test_safetensors PASS ===" : "=== test_safetensors FAIL ==="); |
| return pass ? 0 : 1; |
| } |
|
|