RoundtTble's picture
Change model to FP16
b6a8249
raw
history blame
No virus
1.11 kB
ONNX_PATH=dinov2_vitl14/1
MODEL_PATH=model_repository/dinov2_vitl14/1
model:
git lfs install
git clone git@hf.co:RoundtTble/dinov2_vitl14_onnx
cp -r dinov2_vitl14_onnx/model_repository/dinov2_vitl14/ .
rm -rf dinov2_vitl14_onnx
trt:
mkdir -p $(MODEL_PATH)
docker run --gpus all -it \
-v $(CURDIR)/$(ONNX_PATH):/workspace/onnx \
-v $(CURDIR)/$(MODEL_PATH):/workspace/$(MODEL_PATH) \
--rm nvcr.io/nvidia/pytorch:23.04-py3 \
/bin/bash -c \
"trtexec --onnx=onnx/model.onnx --saveEngine=$(MODEL_PATH)/model.plan \
--minShapes=input:1x3x560x560 \
--optShapes=input:2x3x560x560 \
--maxShapes=input:8x3x560x560 \
--fp16"
triton:
docker run --rm -d \
--name dinov2_vitl14_triton \
-p 8000:8000 --gpus "device=0" -p 8001:8001 -p 8002:8002 \
-v $(PWD)/model_repository:/models \
nvcr.io/nvidia/tritonserver:23.04-py3 \
tritonserver --model-repository=/models
perf:
docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:23.04-py3-sdk perf_analyzer -m dinov2_vitl14 --percentile=95 -i grpc -u 0.0.0.0:8001 --concurrency-range 16:16 --shape input:3,560,560