File size: 1,096 Bytes
0ea9ef5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
FROM nvcr.io/nvidia/tritonserver:22.11-py3

WORKDIR /workspace

RUN apt-get update && apt-get install cmake -y

RUN pip install --upgrade pip && pip install --upgrade tensorrt

RUN git clone https://github.com/NVIDIA/TensorRT.git -b main --single-branch \
    && cd TensorRT \
    && git submodule update --init --recursive

ENV TRT_OSSPATH=/workspace/TensorRT
WORKDIR ${TRT_OSSPATH}

RUN mkdir -p build \
    && cd build \
    && cmake .. -DTRT_OUT_DIR=$PWD/out \
    && cd plugin \
    && make -j$(nproc)

ENV PLUGIN_LIBS="${TRT_OSSPATH}/build/out/libnvinfer_plugin.so"

WORKDIR /weights
RUN wget https://huggingface.co/remyxai/SpaceLLaVA/resolve/main/ggml-model-q4_0.gguf
RUN wget https://huggingface.co/remyxai/SpaceLLaVA/resolve/main/mmproj-model-f16.gguf

RUN python3 -m pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.45 --force-reinstall --no-cache-dir

WORKDIR /models
COPY ./models/ .

WORKDIR /workspace
CMD ["tritonserver", "--model-store=/models"]