Spaces:
Runtime error
Runtime error
# Rust builder | |
FROM lukemathwalker/cargo-chef:latest-rust-1.69 AS chef | |
WORKDIR /usr/src | |
FROM chef as planner | |
COPY Cargo.toml Cargo.toml | |
COPY rust-toolchain.toml rust-toolchain.toml | |
COPY proto proto | |
COPY router router | |
COPY launcher launcher | |
RUN cargo chef prepare --recipe-path recipe.json | |
FROM chef AS builder | |
ARG GIT_SHA | |
ARG DOCKER_LABEL | |
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ | |
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ | |
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ | |
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ | |
rm -f $PROTOC_ZIP | |
COPY --from=planner /usr/src/recipe.json recipe.json | |
RUN cargo chef cook --release --recipe-path recipe.json | |
COPY Cargo.toml Cargo.toml | |
COPY rust-toolchain.toml rust-toolchain.toml | |
COPY proto proto | |
COPY router router | |
COPY launcher launcher | |
RUN cargo build --release | |
# Python builder | |
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile | |
FROM debian:bullseye-slim as pytorch-install | |
ARG PYTORCH_VERSION=2.0.0 | |
ARG PYTHON_VERSION=3.9 | |
ARG CUDA_VERSION=11.8 | |
ARG MAMBA_VERSION=23.1.0-1 | |
ARG CUDA_CHANNEL=nvidia | |
ARG INSTALL_CHANNEL=pytorch | |
# Automatically set by buildx | |
ARG TARGETPLATFORM | |
ENV PATH /opt/conda/bin:$PATH | |
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | |
build-essential \ | |
ca-certificates \ | |
ccache \ | |
curl \ | |
git && \ | |
rm -rf /var/lib/apt/lists/* | |
# Install conda | |
# translating Docker's TARGETPLATFORM into mamba arches | |
RUN case ${TARGETPLATFORM} in \ | |
"linux/arm64") MAMBA_ARCH=aarch64 ;; \ | |
*) MAMBA_ARCH=x86_64 ;; \ | |
esac && \ | |
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" | |
RUN chmod +x ~/mambaforge.sh && \ | |
bash ~/mambaforge.sh -b -p /opt/conda && \ | |
rm ~/mambaforge.sh | |
# Install pytorch | |
# On arm64 we exit with an error code | |
RUN case ${TARGETPLATFORM} in \ | |
"linux/arm64") exit 1 ;; \ | |
*) /opt/conda/bin/conda update -y conda && \ | |
/opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \ | |
esac && \ | |
/opt/conda/bin/conda clean -ya | |
# CUDA kernels builder image | |
FROM pytorch-install as kernel-builder | |
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | |
ninja-build \ | |
&& rm -rf /var/lib/apt/lists/* | |
RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \ | |
/opt/conda/bin/conda clean -ya | |
# Build Flash Attention CUDA kernels | |
FROM kernel-builder as flash-att-builder | |
WORKDIR /usr/src | |
COPY server/Makefile-flash-att Makefile | |
# Build specific version of flash attention | |
RUN make build-flash-attention | |
# Build Transformers CUDA kernels | |
FROM kernel-builder as transformers-builder | |
WORKDIR /usr/src | |
COPY server/Makefile-transformers Makefile | |
# Build specific version of transformers | |
RUN BUILD_EXTENSIONS="True" make build-transformers | |
# Text Generation Inference base image | |
FROM debian:bullseye-slim as base | |
# Conda env | |
ENV PATH=/opt/conda/bin:$PATH \ | |
CONDA_PREFIX=/opt/conda | |
# Text Generation Inference base env | |
ENV HUGGINGFACE_HUB_CACHE=/data \ | |
HF_HUB_ENABLE_HF_TRANSFER=1 \ | |
MODEL_ID=bigscience/bloom-560m \ | |
QUANTIZE=false \ | |
NUM_SHARD=1 \ | |
PORT=80 | |
LABEL com.nvidia.volumes.needed="nvidia_driver" | |
WORKDIR /usr/src | |
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | |
libssl-dev \ | |
ca-certificates \ | |
make \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Copy conda with PyTorch installed | |
COPY --from=pytorch-install /opt/conda /opt/conda | |
# Copy build artifacts from flash attention builder | |
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages | |
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages | |
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages | |
# Copy build artifacts from transformers builder | |
COPY --from=transformers-builder /usr/src/transformers /usr/src/transformers | |
COPY --from=transformers-builder /usr/src/transformers/build/lib.linux-x86_64-cpython-39/transformers /usr/src/transformers/src/transformers | |
# Install transformers dependencies | |
RUN cd /usr/src/transformers && pip install -e . --no-cache-dir && pip install einops --no-cache-dir | |
# Install server | |
COPY proto proto | |
COPY server server | |
COPY server/Makefile server/Makefile | |
RUN cd server && \ | |
make gen-server && \ | |
pip install -r requirements.txt && \ | |
pip install ".[bnb, accelerate]" --no-cache-dir | |
# Install router | |
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router | |
# Install launcher | |
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher | |
# AWS Sagemaker compatbile image | |
FROM base as sagemaker | |
COPY sagemaker-entrypoint.sh entrypoint.sh | |
RUN chmod +x entrypoint.sh | |
# NVIDIA env vars | |
ENV NVIDIA_VISIBLE_DEVICES all | |
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility | |
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 | |
ENTRYPOINT ["./entrypoint.sh"] | |
# Final image | |
FROM base | |
ENTRYPOINT ["text-generation-launcher"] | |
CMD ["--json-output"] |