zackliqcom
/

qdc-test-script

Model card Files Files and versions

xet

Community

zackliqcom commited on 12 days ago

Commit

8f606f6

verified ·

1 Parent(s): 3b316a5

Delete run_bench_tests.py

Browse files

Files changed (1) hide show

run_bench_tests.py +0 -149

run_bench_tests.py DELETED Viewed

@@ -1,149 +0,0 @@
-# ---------------------------------------------------------------------
-# Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
-# SPDX-License-Identifier: BSD-3-Clause
-# ---------------------------------------------------------------------
-"""
-On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
-Linux/IoT device version (QCS9075M, etc.)
-Executed by QDC's test framework on the QDC runner.
-The runner has SSH access to the allocated Linux device.
-Placeholders replaced at artifact creation time by run_qdc_jobs.py:
-  <<MODEL_URL>>  Direct URL to the GGUF model file (downloaded on-device via curl)
-"""
-import os
-import sys
-import pytest
-from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_shell_command, verify_binary_exists, write_qdc_log
-MODEL_PATH = "/tmp/model.gguf"
-PROMPT = "What is the capital of France?"
-CLI_OPTS = "--batch-size 128 -n 128 -no-cnv --seed 42"
-@pytest.fixture(scope="session", autouse=True)
-def install():
-    """Push llama_cpp_bundle to the device and download model if needed."""
-    # Check and verify required binaries
-    llama_cli = f"{BIN_PATH}/llama-cli"
-    llama_completion = f"{BIN_PATH}/llama-completion"
-    llama_bench = f"{BIN_PATH}/llama-bench"
-    push_bundle_if_needed(llama_cli)
-    for binary in [llama_cli, llama_completion, llama_bench]:
-        if not verify_binary_exists(binary):
-            raise RuntimeError(f"Required binary not found or not executable: {binary}")
-    # Check model file
-    print(f"[DEBUG] Checking if model exists: {MODEL_PATH}")
-    result = run_shell_command(f"ls {MODEL_PATH}", check=False)
-    if result.returncode != 0:
-        print(f"[DEBUG] Model not found, downloading from <<MODEL_URL>>")
-        model_url = "<<MODEL_URL>>"
-        if model_url == "<<MODEL_URL>>":
-            print("[ERROR] MODEL_URL placeholder not replaced!")
-            print("[ERROR] This should be replaced by run_qdc_jobs.py during artifact creation")
-            raise RuntimeError("MODEL_URL placeholder not replaced")
-        run_shell_command(f'curl -L -J --output {MODEL_PATH} "{model_url}"')
-        # Verify download succeeded
-        verify_result = run_shell_command(f"test -f {MODEL_PATH}", check=False)
-        if verify_result.returncode != 0:
-            raise RuntimeError(f"Model download failed: {MODEL_PATH}")
-        # Check model file size
-        size_result = run_shell_command(f"ls -lh {MODEL_PATH}", check=False)
-        if size_result.returncode == 0:
-            print(f"[DEBUG] Downloaded model: {size_result.stdout.strip()}")
-    else:
-        print(f"[DEBUG] Model already exists: {MODEL_PATH}")
-@pytest.mark.parametrize(
-    "device,extra_flags",
-    [
-        pytest.param("none", "-ctk q8_0 -ctv q8_0", id="cpu"),
-        pytest.param("GPUOpenCL", "", id="gpu"),
-        pytest.param("HTP0", "-ctk q8_0 -ctv q8_0", id="npu"),
-    ],
-)
-def test_llama_completion(device, extra_flags):
-    print(f"[TEST] Running llama-completion test for device={device}")
-    # Verify binary and model exist
-    binary = f"{BIN_PATH}/llama-completion"
-    if not verify_binary_exists(binary):
-        pytest.fail(f"Binary not found: {binary}")
-    model_check = run_shell_command(f"test -f {MODEL_PATH}", check=False)
-    if model_check.returncode != 0:
-        pytest.fail(f"Model file not found: {MODEL_PATH}")
-    print(f"[TEST] Executing llama-completion on device={device}")
-    result = run_shell_command(
-        f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
-        f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
-        f' -p "{PROMPT}"',
-        check=False,
-    )
-    write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
-    if result.returncode != 0:
-        print(f"[TEST FAILED] llama-completion device={device} failed with exit code {result.returncode}")
-    assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
-    print(f"[TEST PASSED] device={device}")
-_DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
-@pytest.mark.parametrize(
-    "device",
-    [
-        pytest.param("none", id="cpu"),
-        pytest.param("GPUOpenCL", id="gpu"),
-        pytest.param("HTP0", id="npu"),
-    ],
-)
-def test_llama_bench(device):
-    print(f"[TEST] Running llama-bench test for device={device}")
-    # Verify binary and model exist
-    binary = f"{BIN_PATH}/llama-bench"
-    if not verify_binary_exists(binary):
-        pytest.fail(f"Binary not found: {binary}")
-    model_check = run_shell_command(f"test -f {MODEL_PATH}", check=False)
-    if model_check.returncode != 0:
-        pytest.fail(f"Model file not found: {MODEL_PATH}")
-    print(f"[TEST] Executing llama-bench on device={device}")
-    result = run_shell_command(
-        f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
-        f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
-        check=False,
-    )
-    write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
-    if result.returncode != 0:
-        print(f"[TEST FAILED] llama-bench device={device} failed with exit code {result.returncode}")
-    assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
-    print(f"[TEST PASSED] device={device}")
-if __name__ == "__main__":
-    ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
-    if os.path.exists("results.xml"):
-        with open("results.xml") as f:
-            write_qdc_log("results.xml", f.read())
-    sys.exit(ret)