FROM nvcr.io/nvidia/tritonserver:22.11-py3 | |
WORKDIR /workspace | |
RUN apt-get update && apt-get install cmake -y | |
RUN pip install --upgrade pip && pip install --upgrade tensorrt | |
RUN git clone https://github.com/NVIDIA/TensorRT.git -b main --single-branch \ | |
&& cd TensorRT \ | |
&& git submodule update --init --recursive | |
ENV TRT_OSSPATH=/workspace/TensorRT | |
WORKDIR ${TRT_OSSPATH} | |
RUN mkdir -p build \ | |
&& cd build \ | |
&& cmake .. -DTRT_OUT_DIR=$PWD/out \ | |
&& cd plugin \ | |
&& make -j$(nproc) | |
ENV PLUGIN_LIBS="${TRT_OSSPATH}/build/out/libnvinfer_plugin.so" | |
WORKDIR /weights | |
RUN wget https://huggingface.co/remyxai/SpaceLLaVA/resolve/main/ggml-model-q4_0.gguf | |
RUN wget https://huggingface.co/remyxai/SpaceLLaVA/resolve/main/mmproj-model-f16.gguf | |
RUN python3 -m pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118 | |
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.45 --force-reinstall --no-cache-dir | |
WORKDIR /models | |
COPY ./models/ . | |
WORKDIR /workspace | |
CMD ["tritonserver", "--model-store=/models"] | |