ONNX_PATH=dinov2_vitl14/1 MODEL_PATH=model_repository/dinov2_vitl14/1 model: git lfs install git clone git@hf.co:RoundtTble/dinov2_vitl14_onnx cp -r dinov2_vitl14_onnx/model_repository/dinov2_vitl14/ . rm -rf dinov2_vitl14_onnx trt: mkdir -p $(MODEL_PATH) docker run --gpus all -it \ -v $(CURDIR)/$(ONNX_PATH):/workspace/onnx \ -v $(CURDIR)/$(MODEL_PATH):/workspace/$(MODEL_PATH) \ --rm nvcr.io/nvidia/pytorch:23.04-py3 \ /bin/bash -c \ "trtexec --onnx=onnx/model.onnx --saveEngine=$(MODEL_PATH)/model.plan \ --minShapes=input:1x3x560x560 \ --optShapes=input:2x3x560x560 \ --maxShapes=input:8x3x560x560 \ --fp16" triton: docker run --rm -d \ --name dinov2_vitl14_triton \ -p 8000:8000 --gpus "device=0" -p 8001:8001 -p 8002:8002 \ -v $(PWD)/model_repository:/models \ nvcr.io/nvidia/tritonserver:23.04-py3 \ tritonserver --model-repository=/models perf: docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:23.04-py3-sdk perf_analyzer -m dinov2_vitl14 --percentile=95 -i grpc -u 0.0.0.0:8001 --concurrency-range 16:16 --shape input:3,560,560