triton: | |
docker run --rm -d \ | |
--name dinov2_vitl14_triton \ | |
-p 8000:8000 --gpus "device=0" -p 8001:8001 -p 8002:8002 \ | |
-v $(PWD)/model_repository:/models \ | |
nvcr.io/nvidia/tritonserver:23.04-py3 \ | |
tritonserver --model-repository=/models | |
perf: | |
docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:23.04-py3-sdk perf_analyzer -m dinov2_vitl14 --percentile=95 -i grpc -u 0.0.0.0:8001 --concurrency-range 16:16 --shape input:3,560,560 | |