zackliqcom commited on
Commit
523c899
·
verified ·
1 Parent(s): 8f606f6

Delete run_scorecard.py

Browse files
Files changed (1) hide show
  1. run_scorecard.py +0 -375
run_scorecard.py DELETED
@@ -1,375 +0,0 @@
1
- # ---------------------------------------------------------------------
2
- # Copyright (c) 2025 Qualcomm Technologies, Inc. and/or its subsidiaries.
3
- # SPDX-License-Identifier: BSD-3-Clause
4
- # ---------------------------------------------------------------------
5
- """
6
- Scorecard benchmark script for llama.cpp on Linux IoT devices via SSH.
7
-
8
- This script runs comprehensive benchmarks:
9
- 1. Performance benchmarks (CPU/GPU/HTP x 3 context lengths)
10
- 2. Fallback ops detection (SCHED=1)
11
- 3. Perplexity (WikiText-2)
12
-
13
- Placeholders are replaced at artifact creation time:
14
- - <<MODEL_URL>>: URL to download the model
15
- - <<NUM_HTPS>>: Number of HTP cores to use
16
- """
17
-
18
- import os
19
- import subprocess
20
- import sys
21
-
22
- import pytest
23
- from appium import webdriver
24
- from appium.options.common import AppiumOptions
25
-
26
- options = AppiumOptions()
27
- options.set_capability("automationName", "QDCLinux")
28
- options.set_capability("platformName", "Linux")
29
- options.set_capability("deviceName", os.getenv("QDC_DEVICE_NAME", "QCS9075M"))
30
-
31
- # Context lengths to benchmark
32
- CONTEXT_LENGTHS = [128, 1024, 4096]
33
-
34
- # System prompt for completion benchmarks
35
- SYSTEM_PROMPT = "You are a helpful assistant. Be helpful but brief."
36
-
37
- # WikiText-2 URL for perplexity
38
- WIKITEXT_URL = "https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw/wiki.test.raw"
39
-
40
-
41
- class TestScorecard:
42
- @pytest.fixture
43
- def driver(self) -> webdriver.Remote:
44
- return webdriver.Remote(
45
- command_executor="http://127.0.0.1:4723/wd/hub", options=options
46
- )
47
-
48
- def test_scorecard(self, driver: webdriver.Remote) -> None:
49
- """Run comprehensive llama.cpp scorecard benchmarks."""
50
- model_url = "<<MODEL_URL>>"
51
- num_htps = "<<NUM_HTPS>>"
52
-
53
- # On-device paths (Linux IoT - using /tmp)
54
- basedir = "/tmp/llama_cpp_bundle"
55
- model_path = "/tmp/gguf/model.gguf"
56
- log_file = "/tmp/QDC_logs/scorecard.log"
57
-
58
- scorecard_script = f"""
59
- cd /tmp/llama_cpp_bundle
60
-
61
- # Set library paths
62
- export LD_LIBRARY_PATH=/tmp/llama_cpp_bundle/lib:$LD_LIBRARY_PATH
63
- export ADSP_LIBRARY_PATH="/tmp/llama_cpp_bundle/lib:/system/lib/rfsa/adsp:/system/vendor/lib/rfsa/adsp:/dsp"
64
-
65
- # Make binaries executable
66
- chmod +x /tmp/llama_cpp_bundle/bin/*
67
-
68
- # Setup paths
69
- BASEDIR=/tmp/llama_cpp_bundle
70
- MODEL={model_path}
71
- LOG_FILE={log_file}
72
- NUM_HTPS={num_htps}
73
-
74
- # Create directories
75
- mkdir -p /tmp/gguf
76
- mkdir -p /tmp/QDC_logs
77
-
78
- # Download model
79
- echo "Downloading model from {model_url}..."
80
- curl -L -J --output $MODEL "{model_url}"
81
-
82
- # Initialize log
83
- echo "============================================================" > $LOG_FILE
84
- echo "LLAMA.CPP SCORECARD (Linux)" >> $LOG_FILE
85
- echo "Date: $(date)" >> $LOG_FILE
86
- echo "Model: {model_url}" >> $LOG_FILE
87
- echo "============================================================" >> $LOG_FILE
88
-
89
- #############################################
90
- # SECTION 1: PERFORMANCE BENCHMARKS
91
- #############################################
92
- echo "" >> $LOG_FILE
93
- echo "########################################################" >> $LOG_FILE
94
- echo "# SECTION 1: PERFORMANCE BENCHMARKS" >> $LOG_FILE
95
- echo "########################################################" >> $LOG_FILE
96
-
97
- # Common HTP flags
98
- HTP_FLAGS="--no-mmap --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 -fa on -ngl 99"
99
-
100
- # Run benchmarks for each compute unit and context length
101
- for COMPUTE in CPU GPU HTP; do
102
- echo "" >> $LOG_FILE
103
- echo "--- COMPUTE: $COMPUTE ---" >> $LOG_FILE
104
-
105
- for CTX_LEN in 128 1024 4096; do
106
- echo "" >> $LOG_FILE
107
- echo "=== $COMPUTE | CTX=$CTX_LEN ===" >> $LOG_FILE
108
-
109
- # Select prompt file
110
- PROMPT_FILE="/tmp/llama_cpp_bundle/sample_prompt_${{CTX_LEN}}.txt"
111
-
112
- if [ "$COMPUTE" = "CPU" ]; then
113
- CMD="GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion --model $MODEL --n-predict -1 --ctx-size $CTX_LEN --system-prompt \\"{SYSTEM_PROMPT}\\" --file $PROMPT_FILE --seed 1 --single-turn --no-display-prompt --n-gpu-layers 0"
114
- echo "COMMAND: $CMD" >> $LOG_FILE
115
- GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion \\
116
- --model $MODEL \\
117
- --n-predict -1 \\
118
- --ctx-size $CTX_LEN \\
119
- --system-prompt "{SYSTEM_PROMPT}" \\
120
- --file "$PROMPT_FILE" \\
121
- --seed 1 \\
122
- --single-turn \\
123
- --no-display-prompt \\
124
- --n-gpu-layers 0 \\
125
- 2>&1 | tee -a $LOG_FILE
126
-
127
- elif [ "$COMPUTE" = "GPU" ]; then
128
- CMD="GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion --model $MODEL --n-predict -1 --ctx-size $CTX_LEN --system-prompt \\"{SYSTEM_PROMPT}\\" --file $PROMPT_FILE --seed 1 --single-turn --no-display-prompt -fa off"
129
- echo "COMMAND: $CMD" >> $LOG_FILE
130
- GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion \\
131
- --model $MODEL \\
132
- --n-predict -1 \\
133
- --ctx-size $CTX_LEN \\
134
- --system-prompt "{SYSTEM_PROMPT}" \\
135
- --file "$PROMPT_FILE" \\
136
- --seed 1 \\
137
- --single-turn \\
138
- --no-display-prompt \\
139
- -fa off \\
140
- 2>&1 | tee -a $LOG_FILE
141
-
142
- elif [ "$COMPUTE" = "HTP" ]; then
143
- CMD="GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-completion --model $MODEL --n-predict -1 --ctx-size $CTX_LEN --system-prompt \\"{SYSTEM_PROMPT}\\" --file $PROMPT_FILE --seed 1 --single-turn --no-display-prompt $HTP_FLAGS --device HTP0 -ctk f16 -ctv f16 --batch-size 128"
144
- echo "COMMAND: $CMD" >> $LOG_FILE
145
- GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-completion \\
146
- --model $MODEL \\
147
- --n-predict -1 \\
148
- --ctx-size $CTX_LEN \\
149
- --system-prompt "{SYSTEM_PROMPT}" \\
150
- --file "$PROMPT_FILE" \\
151
- --seed 1 \\
152
- --single-turn \\
153
- --no-display-prompt \\
154
- $HTP_FLAGS \\
155
- --device HTP0 \\
156
- -ctk f16 \\
157
- -ctv f16 \\
158
- --batch-size 128 \\
159
- 2>&1 | tee -a $LOG_FILE
160
- fi
161
- done
162
- done
163
-
164
- #############################################
165
- # SECTION 2: FALLBACK OPS DETECTION (SCHED=1)
166
- #############################################
167
- echo "" >> $LOG_FILE
168
- echo "########################################################" >> $LOG_FILE
169
- echo "# SECTION 2: FALLBACK OPS (GGML_SCHED_DEBUG=2)" >> $LOG_FILE
170
- echo "########################################################" >> $LOG_FILE
171
-
172
- # GPU fallback ops
173
- echo "" >> $LOG_FILE
174
- echo "=== FALLBACK_OPS | GPU ===" >> $LOG_FILE
175
- echo "Running with GGML_SCHED_DEBUG=2 on GPU..." >> $LOG_FILE
176
-
177
- GGML_SCHED_DEBUG=2 GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-completion \\
178
- --model $MODEL \\
179
- --n-predict 64 \\
180
- --ctx-size 128 \\
181
- -p "Hello world" \\
182
- --seed 1 \\
183
- --single-turn \\
184
- --no-display-prompt \\
185
- -fa off \\
186
- -v \\
187
- 2>&1 | tee -a $LOG_FILE
188
-
189
- # HTP fallback ops
190
- echo "" >> $LOG_FILE
191
- echo "=== FALLBACK_OPS | HTP ===" >> $LOG_FILE
192
- echo "Running with GGML_SCHED_DEBUG=2 on HTP..." >> $LOG_FILE
193
-
194
- GGML_SCHED_DEBUG=2 GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-completion \\
195
- --model $MODEL \\
196
- --n-predict 64 \\
197
- --ctx-size 128 \\
198
- -p "Hello world" \\
199
- --seed 1 \\
200
- --single-turn \\
201
- --no-display-prompt \\
202
- $HTP_FLAGS \\
203
- --device HTP0 \\
204
- -ctk f16 \\
205
- -ctv f16 \\
206
- --batch-size 128 \\
207
- -v \\
208
- 2>&1 | tee -a $LOG_FILE
209
-
210
- #############################################
211
- # SECTION 3: PERPLEXITY (WikiText-2)
212
- #############################################
213
- echo "" >> $LOG_FILE
214
- echo "########################################################" >> $LOG_FILE
215
- echo "# SECTION 3: PERPLEXITY (WikiText-2)" >> $LOG_FILE
216
- echo "########################################################" >> $LOG_FILE
217
-
218
- echo "" >> $LOG_FILE
219
- echo "Downloading WikiText-2 dataset..." >> $LOG_FILE
220
- curl -L -o /tmp/wiki.test.raw "{WIKITEXT_URL}"
221
-
222
- echo "" >> $LOG_FILE
223
- echo "Running perplexity on HTP..." >> $LOG_FILE
224
- echo "COMMAND: GGML_HEXAGON_NDEV=$NUM_HTPS llama-perplexity -m $MODEL -f wiki.test.raw --device HTP0 --chunks 10" >> $LOG_FILE
225
-
226
- GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-perplexity \\
227
- -m $MODEL \\
228
- -f /tmp/wiki.test.raw \\
229
- --device HTP0 \\
230
- --chunks 10 \\
231
- $HTP_FLAGS \\
232
- 2>&1 | tee -a $LOG_FILE
233
-
234
- #############################################
235
- # SECTION 4: QUALITY CHECKS (Q&A Validation)
236
- #############################################
237
- echo "" >> $LOG_FILE
238
- echo "########################################################" >> $LOG_FILE
239
- echo "# SECTION 4: QUALITY CHECKS" >> $LOG_FILE
240
- echo "########################################################" >> $LOG_FILE
241
-
242
- # Run simple Q&A tests on GPU and HTP and check for expected answers
243
- # Format: QUALITY_CHECK: DEVICE | question | expected | PASS/FAIL
244
-
245
- run_quality_check() {{
246
- local DEVICE="$1"
247
- local QUESTION="$2"
248
- local EXPECTED="$3"
249
-
250
- echo "" >> $LOG_FILE
251
- echo "--- Quality Check ($DEVICE) ---" >> $LOG_FILE
252
- echo "Question: $QUESTION" >> $LOG_FILE
253
- echo "Expected to contain: $EXPECTED" >> $LOG_FILE
254
-
255
- # Filter: strip loading spinners, progress bars, and non-printable characters
256
- FILTER='grep -v -E "^(Loading|\\||/|\\\\|-|\\[|model|warning|log|ggml|llama)" | sed "s/[^[:print:][:space:]]//g" | sed "/^[[:space:]]*$/d" | head -20'
257
-
258
- if [ "$DEVICE" = "CPU" ]; then
259
- RAW_RESPONSE=$(GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-cli \\
260
- --model $MODEL \\
261
- --system-prompt "{SYSTEM_PROMPT}" \\
262
- -p "$QUESTION" \\
263
- --n-predict 64 \\
264
- --ctx-size 512 \\
265
- --seed 1 \\
266
- --no-display-prompt \\
267
- --n-gpu-layers 0 \\
268
- 2>/dev/null)
269
-
270
- elif [ "$DEVICE" = "GPU" ]; then
271
- RAW_RESPONSE=$(GGML_HEXAGON_NDEV=0 $BASEDIR/bin/llama-cli \\
272
- --model $MODEL \\
273
- --system-prompt "{SYSTEM_PROMPT}" \\
274
- -p "$QUESTION" \\
275
- --n-predict 64 \\
276
- --ctx-size 512 \\
277
- --seed 1 \\
278
- --no-display-prompt \\
279
- -fa off \\
280
- 2>/dev/null)
281
-
282
- elif [ "$DEVICE" = "HTP" ]; then
283
- RAW_RESPONSE=$(GGML_HEXAGON_NDEV=$NUM_HTPS $BASEDIR/bin/llama-cli \\
284
- --model $MODEL \\
285
- --system-prompt "{SYSTEM_PROMPT}" \\
286
- -p "$QUESTION" \\
287
- --n-predict 64 \\
288
- --ctx-size 512 \\
289
- --seed 1 \\
290
- --no-display-prompt \\
291
- $HTP_FLAGS \\
292
- --device HTP0 \\
293
- -ctk f16 \\
294
- -ctv f16 \\
295
- --batch-size 128 \\
296
- 2>/dev/null)
297
- fi
298
-
299
- RESPONSE=$(echo "$RAW_RESPONSE" | eval $FILTER)
300
-
301
- echo "RESPONSE_START" >> $LOG_FILE
302
- echo "$RESPONSE" >> $LOG_FILE
303
- echo "RESPONSE_END" >> $LOG_FILE
304
-
305
- # Check if expected string is in response (case-insensitive)
306
- if echo "$RESPONSE" | grep -qi "$EXPECTED"; then
307
- echo "QUALITY_CHECK: $DEVICE | $QUESTION | $EXPECTED | PASS" >> $LOG_FILE
308
- else
309
- echo "QUALITY_CHECK: $DEVICE | $QUESTION | $EXPECTED | FAIL" >> $LOG_FILE
310
- fi
311
- }}
312
-
313
- # CPU Quality Checks
314
- echo "" >> $LOG_FILE
315
- echo "=== QUALITY_CHECKS | CPU ===" >> $LOG_FILE
316
-
317
- run_quality_check CPU "What is the capital of France?" "Paris"
318
- run_quality_check CPU "What is 2 + 2?" "4"
319
- run_quality_check CPU "What planet is closest to the Sun?" "Mercury"
320
-
321
- # GPU Quality Checks
322
- echo "" >> $LOG_FILE
323
- echo "=== QUALITY_CHECKS | GPU ===" >> $LOG_FILE
324
-
325
- run_quality_check GPU "What is the capital of France?" "Paris"
326
- run_quality_check GPU "What is 2 + 2?" "4"
327
- run_quality_check GPU "What planet is closest to the Sun?" "Mercury"
328
-
329
- # HTP Quality Checks
330
- echo "" >> $LOG_FILE
331
- echo "=== QUALITY_CHECKS | HTP ===" >> $LOG_FILE
332
-
333
- run_quality_check HTP "What is the capital of France?" "Paris"
334
- run_quality_check HTP "What is 2 + 2?" "4"
335
- run_quality_check HTP "What planet is closest to the Sun?" "Mercury"
336
-
337
- #############################################
338
- # COMPLETE
339
- #############################################
340
- echo "" >> $LOG_FILE
341
- echo "============================================================" >> $LOG_FILE
342
- echo "=== SCORECARD COMPLETE ===" >> $LOG_FILE
343
- echo "============================================================" >> $LOG_FILE
344
- """
345
-
346
- # Push the bundle to the device via SSH/SCP
347
- device_host = os.getenv("QDC_DEVICE_HOST", "localhost")
348
- subprocess.run(
349
- ["scp", "-r", "/qdc/appium/llama_cpp_bundle/", f"{device_host}:/tmp/"],
350
- capture_output=True,
351
- encoding="utf-8",
352
- errors="replace",
353
- check=True,
354
- )
355
-
356
- # Run the scorecard script via SSH
357
- result = subprocess.run(
358
- [
359
- "ssh",
360
- device_host,
361
- "sh",
362
- "-c",
363
- scorecard_script,
364
- ],
365
- capture_output=True,
366
- encoding="utf-8",
367
- errors="replace",
368
- check=True,
369
- )
370
- print(result.stdout)
371
- print(result.stderr)
372
-
373
-
374
- if __name__ == "__main__":
375
- sys.exit(pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)]))