v6-fp16: update chute_config.yml
Browse files- chute_config.yml +52 -1
chute_config.yml
CHANGED
|
@@ -2,10 +2,61 @@ Image:
|
|
| 2 |
from_base: parachutes/python:3.12
|
| 3 |
run_command:
|
| 4 |
- pip install --upgrade setuptools wheel
|
| 5 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
NodeSelector:
|
| 7 |
gpu_count: 1
|
| 8 |
min_vram_gb_per_gpu: 16
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
Chute:
|
| 10 |
timeout_seconds: 300
|
| 11 |
concurrency: 4
|
|
|
|
| 2 |
from_base: parachutes/python:3.12
|
| 3 |
run_command:
|
| 4 |
- pip install --upgrade setuptools wheel
|
| 5 |
+
- >-
|
| 6 |
+
pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16'
|
| 7 |
+
'opencv-python-headless>=4.7' 'pillow>=9.5'
|
| 8 |
+
'huggingface_hub>=0.19.4' 'pydantic>=2.0'
|
| 9 |
+
'pyyaml>=6.0' 'aiohttp>=3.9'
|
| 10 |
+
'tensorrt' 'tensorrt-lean'
|
| 11 |
+
- >-
|
| 12 |
+
python3 -c "
|
| 13 |
+
import os, nvidia.cudnn, nvidia.cublas;
|
| 14 |
+
cudnn=os.path.join(os.path.dirname(nvidia.cudnn.__file__),'lib');
|
| 15 |
+
cublas=os.path.join(os.path.dirname(nvidia.cublas.__file__),'lib');
|
| 16 |
+
open('/etc/ld.so.conf.d/nvidia-ort.conf','w').write(cudnn+chr(10)+cublas+chr(10))
|
| 17 |
+
" && ldconfig
|
| 18 |
+
# Bake model weights into image at build time — eliminates HF download on cold start.
|
| 19 |
+
- >-
|
| 20 |
+
python3 -c "
|
| 21 |
+
import os; os.makedirs('/opt/model', exist_ok=True);
|
| 22 |
+
from huggingface_hub import hf_hub_download;
|
| 23 |
+
[hf_hub_download(repo_id='meaculpitt/Detect-Vehicle', filename=f, local_dir='/opt/model')
|
| 24 |
+
for f in ['weights.onnx','class_names.txt','model_type.json','main.py','miner.py']];
|
| 25 |
+
print('Model baked into image at /opt/model/')
|
| 26 |
+
"
|
| 27 |
+
# Attempt TRT engine pre-compilation (succeeds only if builder has GPU).
|
| 28 |
+
- >-
|
| 29 |
+
python3 -c "
|
| 30 |
+
import os, ctypes, numpy as np; os.makedirs('/opt/trt_cache', exist_ok=True);
|
| 31 |
+
_TRT='/usr/local/lib/python3.12/dist-packages/tensorrt_libs';
|
| 32 |
+
[ctypes.CDLL(os.path.join(_TRT,l),mode=ctypes.RTLD_GLOBAL)
|
| 33 |
+
for l in ['libnvinfer.so.10','libnvinfer_plugin.so.10','libnvonnxparser.so.10']
|
| 34 |
+
if os.path.exists(os.path.join(_TRT,l))];
|
| 35 |
+
import onnxruntime as ort;
|
| 36 |
+
sess=ort.InferenceSession('/opt/model/weights.onnx',
|
| 37 |
+
providers=[('TensorrtExecutionProvider',
|
| 38 |
+
{'device_id':0,'trt_fp16_enable':True,'trt_engine_cache_enable':True,
|
| 39 |
+
'trt_engine_cache_path':'/opt/trt_cache','trt_max_workspace_size':2*1024**3}),
|
| 40 |
+
'CUDAExecutionProvider']);
|
| 41 |
+
sess.run(None, {sess.get_inputs()[0].name: np.zeros((1,3,1280,1280),dtype='float32')});
|
| 42 |
+
print('TRT engine baked:', os.listdir('/opt/trt_cache'))
|
| 43 |
+
" || echo 'TRT pre-warm skipped (no GPU at build time)'
|
| 44 |
+
environment:
|
| 45 |
+
MODEL_PATH: /opt/model
|
| 46 |
+
TRT_CACHE_PATH: /opt/trt_cache
|
| 47 |
+
|
| 48 |
NodeSelector:
|
| 49 |
gpu_count: 1
|
| 50 |
min_vram_gb_per_gpu: 16
|
| 51 |
+
include:
|
| 52 |
+
- 4090
|
| 53 |
+
- a100
|
| 54 |
+
- a100_sxm
|
| 55 |
+
- h100
|
| 56 |
+
- h100_sxm
|
| 57 |
+
- l40s
|
| 58 |
+
- a40
|
| 59 |
+
|
| 60 |
Chute:
|
| 61 |
timeout_seconds: 300
|
| 62 |
concurrency: 4
|