meaculpitt commited on
Commit
651f424
·
verified ·
1 Parent(s): 3b6f0fa

v6-fp16: update chute_config.yml

Browse files
Files changed (1) hide show
  1. chute_config.yml +52 -1
chute_config.yml CHANGED
@@ -2,10 +2,61 @@ Image:
2
  from_base: parachutes/python:3.12
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
- - pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'opencv-python-headless>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  NodeSelector:
7
  gpu_count: 1
8
  min_vram_gb_per_gpu: 16
 
 
 
 
 
 
 
 
 
9
  Chute:
10
  timeout_seconds: 300
11
  concurrency: 4
 
2
  from_base: parachutes/python:3.12
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
+ - >-
6
+ pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16'
7
+ 'opencv-python-headless>=4.7' 'pillow>=9.5'
8
+ 'huggingface_hub>=0.19.4' 'pydantic>=2.0'
9
+ 'pyyaml>=6.0' 'aiohttp>=3.9'
10
+ 'tensorrt' 'tensorrt-lean'
11
+ - >-
12
+ python3 -c "
13
+ import os, nvidia.cudnn, nvidia.cublas;
14
+ cudnn=os.path.join(os.path.dirname(nvidia.cudnn.__file__),'lib');
15
+ cublas=os.path.join(os.path.dirname(nvidia.cublas.__file__),'lib');
16
+ open('/etc/ld.so.conf.d/nvidia-ort.conf','w').write(cudnn+chr(10)+cublas+chr(10))
17
+ " && ldconfig
18
+ # Bake model weights into image at build time — eliminates HF download on cold start.
19
+ - >-
20
+ python3 -c "
21
+ import os; os.makedirs('/opt/model', exist_ok=True);
22
+ from huggingface_hub import hf_hub_download;
23
+ [hf_hub_download(repo_id='meaculpitt/Detect-Vehicle', filename=f, local_dir='/opt/model')
24
+ for f in ['weights.onnx','class_names.txt','model_type.json','main.py','miner.py']];
25
+ print('Model baked into image at /opt/model/')
26
+ "
27
+ # Attempt TRT engine pre-compilation (succeeds only if builder has GPU).
28
+ - >-
29
+ python3 -c "
30
+ import os, ctypes, numpy as np; os.makedirs('/opt/trt_cache', exist_ok=True);
31
+ _TRT='/usr/local/lib/python3.12/dist-packages/tensorrt_libs';
32
+ [ctypes.CDLL(os.path.join(_TRT,l),mode=ctypes.RTLD_GLOBAL)
33
+ for l in ['libnvinfer.so.10','libnvinfer_plugin.so.10','libnvonnxparser.so.10']
34
+ if os.path.exists(os.path.join(_TRT,l))];
35
+ import onnxruntime as ort;
36
+ sess=ort.InferenceSession('/opt/model/weights.onnx',
37
+ providers=[('TensorrtExecutionProvider',
38
+ {'device_id':0,'trt_fp16_enable':True,'trt_engine_cache_enable':True,
39
+ 'trt_engine_cache_path':'/opt/trt_cache','trt_max_workspace_size':2*1024**3}),
40
+ 'CUDAExecutionProvider']);
41
+ sess.run(None, {sess.get_inputs()[0].name: np.zeros((1,3,1280,1280),dtype='float32')});
42
+ print('TRT engine baked:', os.listdir('/opt/trt_cache'))
43
+ " || echo 'TRT pre-warm skipped (no GPU at build time)'
44
+ environment:
45
+ MODEL_PATH: /opt/model
46
+ TRT_CACHE_PATH: /opt/trt_cache
47
+
48
  NodeSelector:
49
  gpu_count: 1
50
  min_vram_gb_per_gpu: 16
51
+ include:
52
+ - 4090
53
+ - a100
54
+ - a100_sxm
55
+ - h100
56
+ - h100_sxm
57
+ - l40s
58
+ - a40
59
+
60
  Chute:
61
  timeout_seconds: 300
62
  concurrency: 4