meaculpitt
/

Detect-Vehicle

element_type:detect

model:yolov11-nano

Model card Files Files and versions

meaculpitt commited on Mar 24

Commit

651f424

·

verified ·

1 Parent(s): 3b6f0fa

v6-fp16: update chute_config.yml

Files changed (1) hide show

chute_config.yml +52 -1

chute_config.yml CHANGED Viewed

@@ -2,10 +2,61 @@ Image:
   from_base: parachutes/python:3.12
   run_command:
     - pip install --upgrade setuptools wheel
-    - pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'opencv-python-headless>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
 NodeSelector:
   gpu_count: 1
   min_vram_gb_per_gpu: 16
 Chute:
   timeout_seconds: 300
   concurrency: 4

   from_base: parachutes/python:3.12
   run_command:
     - pip install --upgrade setuptools wheel
+    - >-
+      pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16'
+      'opencv-python-headless>=4.7' 'pillow>=9.5'
+      'huggingface_hub>=0.19.4' 'pydantic>=2.0'
+      'pyyaml>=6.0' 'aiohttp>=3.9'
+      'tensorrt' 'tensorrt-lean'
+    - >-
+      python3 -c "
+      import os, nvidia.cudnn, nvidia.cublas;
+      cudnn=os.path.join(os.path.dirname(nvidia.cudnn.__file__),'lib');
+      cublas=os.path.join(os.path.dirname(nvidia.cublas.__file__),'lib');
+      open('/etc/ld.so.conf.d/nvidia-ort.conf','w').write(cudnn+chr(10)+cublas+chr(10))
+      " && ldconfig
+    # Bake model weights into image at build time — eliminates HF download on cold start.
+    - >-
+      python3 -c "
+      import os; os.makedirs('/opt/model', exist_ok=True);
+      from huggingface_hub import hf_hub_download;
+      [hf_hub_download(repo_id='meaculpitt/Detect-Vehicle', filename=f, local_dir='/opt/model')
+       for f in ['weights.onnx','class_names.txt','model_type.json','main.py','miner.py']];
+      print('Model baked into image at /opt/model/')
+      "
+    # Attempt TRT engine pre-compilation (succeeds only if builder has GPU).
+    - >-
+      python3 -c "
+      import os, ctypes, numpy as np; os.makedirs('/opt/trt_cache', exist_ok=True);
+      _TRT='/usr/local/lib/python3.12/dist-packages/tensorrt_libs';
+      [ctypes.CDLL(os.path.join(_TRT,l),mode=ctypes.RTLD_GLOBAL)
+       for l in ['libnvinfer.so.10','libnvinfer_plugin.so.10','libnvonnxparser.so.10']
+       if os.path.exists(os.path.join(_TRT,l))];
+      import onnxruntime as ort;
+      sess=ort.InferenceSession('/opt/model/weights.onnx',
+        providers=[('TensorrtExecutionProvider',
+          {'device_id':0,'trt_fp16_enable':True,'trt_engine_cache_enable':True,
+           'trt_engine_cache_path':'/opt/trt_cache','trt_max_workspace_size':2*1024**3}),
+          'CUDAExecutionProvider']);
+      sess.run(None, {sess.get_inputs()[0].name: np.zeros((1,3,1280,1280),dtype='float32')});
+      print('TRT engine baked:', os.listdir('/opt/trt_cache'))
+      " || echo 'TRT pre-warm skipped (no GPU at build time)'
+  environment:
+    MODEL_PATH: /opt/model
+    TRT_CACHE_PATH: /opt/trt_cache
 NodeSelector:
   gpu_count: 1
   min_vram_gb_per_gpu: 16
+  include:
+    - 4090
+    - a100
+    - a100_sxm
+    - h100
+    - h100_sxm
+    - l40s
+    - a40
 Chute:
   timeout_seconds: 300
   concurrency: 4