|
ONNX_PATH=dinov2_vitl14/1 |
|
MODEL_PATH=model_repository/dinov2_vitl14/1 |
|
|
|
|
|
model: |
|
git lfs install |
|
git clone git@hf.co:RoundtTble/dinov2_vitl14_onnx |
|
cp -r dinov2_vitl14_onnx/model_repository/dinov2_vitl14/ . |
|
rm -rf dinov2_vitl14_onnx |
|
|
|
|
|
trt: |
|
mkdir -p $(MODEL_PATH) |
|
docker run --gpus all -it \ |
|
-v $(CURDIR)/$(ONNX_PATH):/workspace/onnx \ |
|
-v $(CURDIR)/$(MODEL_PATH):/workspace/$(MODEL_PATH) \ |
|
--rm nvcr.io/nvidia/pytorch:23.04-py3 \ |
|
/bin/bash -c \ |
|
"trtexec --onnx=onnx/model.onnx --saveEngine=$(MODEL_PATH)/model.plan \ |
|
--minShapes=input:1x3x560x560 \ |
|
--optShapes=input:2x3x560x560 \ |
|
--maxShapes=input:8x3x560x560 \ |
|
--fp16" |
|
|
|
|
|
triton: |
|
docker run --rm -d \ |
|
--name dinov2_vitl14_triton \ |
|
-p 8000:8000 --gpus "device=0" -p 8001:8001 -p 8002:8002 \ |
|
-v $(PWD)/model_repository:/models \ |
|
nvcr.io/nvidia/tritonserver:23.04-py3 \ |
|
tritonserver --model-repository=/models |
|
|
|
perf: |
|
docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:23.04-py3-sdk perf_analyzer -m dinov2_vitl14 --percentile=95 -i grpc -u 0.0.0.0:8001 --concurrency-range 16:16 --shape input:3,560,560 |
|
|
|
|