zackliqcom commited on
Commit
8f606f6
·
verified ·
1 Parent(s): 3b316a5

Delete run_bench_tests.py

Browse files
Files changed (1) hide show
  1. run_bench_tests.py +0 -149
run_bench_tests.py DELETED
@@ -1,149 +0,0 @@
1
- # ---------------------------------------------------------------------
2
- # Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
3
- # SPDX-License-Identifier: BSD-3-Clause
4
- # ---------------------------------------------------------------------
5
- """
6
- On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
7
- Linux/IoT device version (QCS9075M, etc.)
8
-
9
- Executed by QDC's test framework on the QDC runner.
10
- The runner has SSH access to the allocated Linux device.
11
-
12
- Placeholders replaced at artifact creation time by run_qdc_jobs.py:
13
- <<MODEL_URL>> Direct URL to the GGUF model file (downloaded on-device via curl)
14
- """
15
-
16
- import os
17
- import sys
18
-
19
- import pytest
20
-
21
- from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_shell_command, verify_binary_exists, write_qdc_log
22
-
23
- MODEL_PATH = "/tmp/model.gguf"
24
- PROMPT = "What is the capital of France?"
25
- CLI_OPTS = "--batch-size 128 -n 128 -no-cnv --seed 42"
26
-
27
-
28
- @pytest.fixture(scope="session", autouse=True)
29
- def install():
30
- """Push llama_cpp_bundle to the device and download model if needed."""
31
- # Check and verify required binaries
32
- llama_cli = f"{BIN_PATH}/llama-cli"
33
- llama_completion = f"{BIN_PATH}/llama-completion"
34
- llama_bench = f"{BIN_PATH}/llama-bench"
35
-
36
- push_bundle_if_needed(llama_cli)
37
-
38
- for binary in [llama_cli, llama_completion, llama_bench]:
39
- if not verify_binary_exists(binary):
40
- raise RuntimeError(f"Required binary not found or not executable: {binary}")
41
-
42
- # Check model file
43
- print(f"[DEBUG] Checking if model exists: {MODEL_PATH}")
44
- result = run_shell_command(f"ls {MODEL_PATH}", check=False)
45
- if result.returncode != 0:
46
- print(f"[DEBUG] Model not found, downloading from <<MODEL_URL>>")
47
- model_url = "<<MODEL_URL>>"
48
- if model_url == "<<MODEL_URL>>":
49
- print("[ERROR] MODEL_URL placeholder not replaced!")
50
- print("[ERROR] This should be replaced by run_qdc_jobs.py during artifact creation")
51
- raise RuntimeError("MODEL_URL placeholder not replaced")
52
-
53
- run_shell_command(f'curl -L -J --output {MODEL_PATH} "{model_url}"')
54
-
55
- # Verify download succeeded
56
- verify_result = run_shell_command(f"test -f {MODEL_PATH}", check=False)
57
- if verify_result.returncode != 0:
58
- raise RuntimeError(f"Model download failed: {MODEL_PATH}")
59
-
60
- # Check model file size
61
- size_result = run_shell_command(f"ls -lh {MODEL_PATH}", check=False)
62
- if size_result.returncode == 0:
63
- print(f"[DEBUG] Downloaded model: {size_result.stdout.strip()}")
64
- else:
65
- print(f"[DEBUG] Model already exists: {MODEL_PATH}")
66
-
67
-
68
- @pytest.mark.parametrize(
69
- "device,extra_flags",
70
- [
71
- pytest.param("none", "-ctk q8_0 -ctv q8_0", id="cpu"),
72
- pytest.param("GPUOpenCL", "", id="gpu"),
73
- pytest.param("HTP0", "-ctk q8_0 -ctv q8_0", id="npu"),
74
- ],
75
- )
76
- def test_llama_completion(device, extra_flags):
77
- print(f"[TEST] Running llama-completion test for device={device}")
78
-
79
- # Verify binary and model exist
80
- binary = f"{BIN_PATH}/llama-completion"
81
- if not verify_binary_exists(binary):
82
- pytest.fail(f"Binary not found: {binary}")
83
-
84
- model_check = run_shell_command(f"test -f {MODEL_PATH}", check=False)
85
- if model_check.returncode != 0:
86
- pytest.fail(f"Model file not found: {MODEL_PATH}")
87
-
88
- print(f"[TEST] Executing llama-completion on device={device}")
89
- result = run_shell_command(
90
- f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
91
- f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
92
- f' -p "{PROMPT}"',
93
- check=False,
94
- )
95
-
96
- write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
97
-
98
- if result.returncode != 0:
99
- print(f"[TEST FAILED] llama-completion device={device} failed with exit code {result.returncode}")
100
-
101
- assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
102
- print(f"[TEST PASSED] device={device}")
103
-
104
-
105
- _DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
106
-
107
-
108
- @pytest.mark.parametrize(
109
- "device",
110
- [
111
- pytest.param("none", id="cpu"),
112
- pytest.param("GPUOpenCL", id="gpu"),
113
- pytest.param("HTP0", id="npu"),
114
- ],
115
- )
116
- def test_llama_bench(device):
117
- print(f"[TEST] Running llama-bench test for device={device}")
118
-
119
- # Verify binary and model exist
120
- binary = f"{BIN_PATH}/llama-bench"
121
- if not verify_binary_exists(binary):
122
- pytest.fail(f"Binary not found: {binary}")
123
-
124
- model_check = run_shell_command(f"test -f {MODEL_PATH}", check=False)
125
- if model_check.returncode != 0:
126
- pytest.fail(f"Model file not found: {MODEL_PATH}")
127
-
128
- print(f"[TEST] Executing llama-bench on device={device}")
129
- result = run_shell_command(
130
- f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
131
- f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
132
- check=False,
133
- )
134
-
135
- write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
136
-
137
- if result.returncode != 0:
138
- print(f"[TEST FAILED] llama-bench device={device} failed with exit code {result.returncode}")
139
-
140
- assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
141
- print(f"[TEST PASSED] device={device}")
142
-
143
-
144
- if __name__ == "__main__":
145
- ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
146
- if os.path.exists("results.xml"):
147
- with open("results.xml") as f:
148
- write_qdc_log("results.xml", f.read())
149
- sys.exit(ret)