File size: 1,107 Bytes
c02a81c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6a8249
 
c02a81c
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
ONNX_PATH=dinov2_vitl14/1
MODEL_PATH=model_repository/dinov2_vitl14/1


model:
	git lfs install
	git clone git@hf.co:RoundtTble/dinov2_vitl14_onnx
	cp -r dinov2_vitl14_onnx/model_repository/dinov2_vitl14/ .
	rm -rf dinov2_vitl14_onnx


trt:
	mkdir -p $(MODEL_PATH)
	docker run --gpus all -it \
		-v $(CURDIR)/$(ONNX_PATH):/workspace/onnx \
		-v $(CURDIR)/$(MODEL_PATH):/workspace/$(MODEL_PATH) \
		--rm nvcr.io/nvidia/pytorch:23.04-py3 \
		/bin/bash -c \
		"trtexec --onnx=onnx/model.onnx --saveEngine=$(MODEL_PATH)/model.plan \
			--minShapes=input:1x3x560x560 \
			--optShapes=input:2x3x560x560 \
			--maxShapes=input:8x3x560x560 \
			--fp16"


triton:
	docker run --rm -d \
		--name dinov2_vitl14_triton \
		-p 8000:8000 --gpus "device=0" -p 8001:8001 -p 8002:8002 \
		-v $(PWD)/model_repository:/models \
		nvcr.io/nvidia/tritonserver:23.04-py3 \
		tritonserver --model-repository=/models

perf:
	docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:23.04-py3-sdk perf_analyzer -m dinov2_vitl14 --percentile=95 -i grpc -u 0.0.0.0:8001 --concurrency-range 16:16 --shape input:3,560,560