Spaces:

nateraw
/

text-generation-inference

Runtime error

App Files Files Community

text-generation-inference / Dockerfile

nateraw

Update Dockerfile

a522ace over 1 year ago

raw

history blame

6.06 kB

	# Rust builder
	FROM lukemathwalker/cargo-chef:latest-rust-1.69 AS chef
	WORKDIR /usr/src

	FROM chef as planner
	COPY Cargo.toml Cargo.toml
	COPY rust-toolchain.toml rust-toolchain.toml
	COPY proto proto
	COPY router router
	COPY launcher launcher
	RUN cargo chef prepare --recipe-path recipe.json

	FROM chef AS builder

	ARG GIT_SHA
	ARG DOCKER_LABEL

	RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
	curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
	unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
	unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
	rm -f $PROTOC_ZIP

	COPY --from=planner /usr/src/recipe.json recipe.json
	RUN cargo chef cook --release --recipe-path recipe.json

	COPY Cargo.toml Cargo.toml
	COPY rust-toolchain.toml rust-toolchain.toml
	COPY proto proto
	COPY router router
	COPY launcher launcher
	RUN cargo build --release

	# Python builder
	# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
	FROM debian:bullseye-slim as pytorch-install

	ARG PYTORCH_VERSION=2.0.0
	ARG PYTHON_VERSION=3.9
	ARG CUDA_VERSION=11.8
	ARG MAMBA_VERSION=23.1.0-1
	ARG CUDA_CHANNEL=nvidia
	ARG INSTALL_CHANNEL=pytorch
	# Automatically set by buildx
	ARG TARGETPLATFORM

	ENV PATH /opt/conda/bin:$PATH

	RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
	build-essential \
	ca-certificates \
	ccache \
	curl \
	git && \
	rm -rf /var/lib/apt/lists/*

	# Install conda
	# translating Docker's TARGETPLATFORM into mamba arches
	RUN case ${TARGETPLATFORM} in \
	"linux/arm64") MAMBA_ARCH=aarch64 ;; \
	*) MAMBA_ARCH=x86_64 ;; \
	esac && \
	curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
	RUN chmod +x ~/mambaforge.sh && \
	bash ~/mambaforge.sh -b -p /opt/conda && \
	rm ~/mambaforge.sh

	# Install pytorch
	# On arm64 we exit with an error code
	RUN case ${TARGETPLATFORM} in \
	"linux/arm64") exit 1 ;; \
	*) /opt/conda/bin/conda update -y conda && \
	/opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION \| cut -d'.' -f 1-2)" ;; \
	esac && \
	/opt/conda/bin/conda clean -ya

	# CUDA kernels builder image
	FROM pytorch-install as kernel-builder

	RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
	ninja-build \
	&& rm -rf /var/lib/apt/lists/*

	RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \
	/opt/conda/bin/conda clean -ya


	# # Build Flash Attention CUDA kernels
	# FROM kernel-builder as flash-att-builder

	# WORKDIR /usr/src

	# COPY server/Makefile-flash-att Makefile

	# # Build specific version of flash attention
	# RUN make build-flash-attention

	# Build Transformers CUDA kernels
	FROM kernel-builder as transformers-builder

	WORKDIR /usr/src

	COPY server/Makefile-transformers Makefile

	# Build specific version of transformers
	RUN BUILD_EXTENSIONS="True" make build-transformers

	# Text Generation Inference base image
	FROM debian:bullseye-slim as base

	# Conda env
	ENV PATH=/opt/conda/bin:$PATH \
	CONDA_PREFIX=/opt/conda

	# Text Generation Inference base env
	ENV HUGGINGFACE_HUB_CACHE=/data \
	HF_HUB_ENABLE_HF_TRANSFER=1 \
	MODEL_ID=google/flan-t5-small \
	QUANTIZE=false \
	NUM_SHARD=1 \
	PORT=7860

	ENV PYTHONPATH=$HOME/app \
	PYTHONUNBUFFERED=1 \
	GRADIO_ALLOW_FLAGGING=never \
	GRADIO_NUM_PORTS=1 \
	GRADIO_SERVER_NAME=0.0.0.0 \
	GRADIO_THEME=huggingface \
	SYSTEM=spaces

	LABEL com.nvidia.volumes.needed="nvidia_driver"

	WORKDIR /usr/src

	RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
	libssl-dev \
	ca-certificates \
	make \
	&& rm -rf /var/lib/apt/lists/*

	# Copy conda with PyTorch installed
	COPY --from=pytorch-install /opt/conda /opt/conda

	# Copy build artifacts from flash attention builder
	# COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
	# COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
	# COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages

	# Copy build artifacts from transformers builder
	COPY --from=transformers-builder /usr/src/transformers /usr/src/transformers
	COPY --from=transformers-builder /usr/src/transformers/build/lib.linux-x86_64-cpython-39/transformers /usr/src/transformers/src/transformers

	# Install transformers dependencies
	RUN cd /usr/src/transformers && pip install -e . --no-cache-dir && pip install einops --no-cache-dir

	# Install server
	COPY proto proto
	COPY server server
	COPY server/Makefile server/Makefile
	RUN cd server && \
	make gen-server && \
	pip install -r requirements.txt && \
	pip install ".[bnb, accelerate]" --no-cache-dir

	# Install router
	COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
	# Install launcher
	COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher

	# AWS Sagemaker compatbile image
	FROM base as sagemaker

	COPY sagemaker-entrypoint.sh entrypoint.sh
	RUN chmod +x entrypoint.sh

	# NVIDIA env vars
	ENV NVIDIA_VISIBLE_DEVICES all
	ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
	ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64

	# ENTRYPOINT ["./entrypoint.sh"]

	# Final image
	FROM base
	ENV DEBIAN_FRONTEND=noninteractive
	# ENTRYPOINT ["text-generation-launcher"]
	# CMD ["--json-output"]
	# COPY app.py /usr/src/app.py
	# CMD ["python", "app.py"]
	CMD ["text-generation-launcher", "--port", "7860", "--json-output"]