triton: docker run --rm -d \ --name dinov2_vitl14_triton \ -p 8000:8000 --gpus "device=0" -p 8001:8001 -p 8002:8002 \ -v $(PWD)/model_repository:/models \ nvcr.io/nvidia/tritonserver:23.04-py3 \ tritonserver --model-repository=/models perf: docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:23.04-py3-sdk perf_analyzer -m dinov2_vitl14 --percentile=95 -i grpc -u 0.0.0.0:8001 --concurrency-range 16:16 --shape input:3,560,560