Spaces:
Runtime error
Runtime error
| set -exou pipefail | |
| # List of all supported libraries (update this list when adding new libraries) | |
| # This also defines the order in which they will be installed by --libraries "all" | |
| ALL_LIBRARIES=( | |
| "trtllm" | |
| "te" | |
| "mcore" | |
| "vllm" | |
| "extra" | |
| ) | |
| export INSTALL_OPTION=${1:-dev} | |
| export HEAVY_DEPS=${HEAVY_DEPS:-false} | |
| export INSTALL_DIR=${INSTALL_DIR:-"/opt"} | |
| export CURR=$(pwd) | |
| export WHEELS_DIR=${WHEELS_DIR:-"$INSTALL_DIR/wheels"} | |
| export PIP=pip | |
| export TRTLLM_REPO=${TRTLLM_REPO:-$(cat "$CURR/requirements/manifest.json" | jq -r '."vcs-dependencies"."trt-llm".repo')} | |
| export TRTLLM_TAG=${TRTLLM_TAG:-$(cat "$CURR/requirements/manifest.json" | jq -r '."vcs-dependencies"."trt-llm".ref')} | |
| export TRTLLM_DIR="$INSTALL_DIR/TensorRT-LLM" | |
| export TE_REPO=${TE_REPO:-$(cat "$CURR/requirements/manifest.json" | jq -r '."vcs-dependencies"."transformer_engine".repo')} | |
| export TE_TAG=${TE_TAG:-$(cat "$CURR/requirements/manifest.json" | jq -r '."vcs-dependencies"."transformer_engine".ref')} | |
| export NVIDIA_PYTORCH_VERSION=${NVIDIA_PYTORCH_VERSION:-""} | |
| export CONDA_PREFIX=${CONDA_PREFIX:-""} | |
| trt() { | |
| local mode="$1" | |
| local WHEELS_DIR=$WHEELS_DIR/trt/ | |
| mkdir -p $WHEELS_DIR | |
| # Skip TRT installation on macOS ARM | |
| if [[ "$(uname)" == "Darwin" ]] && [[ "$(uname -m)" == "arm64" ]]; then | |
| echo "Skipping TRT installation on macOS ARM" | |
| return | |
| fi | |
| if [ "$(id -u)" -ne 0 ]; then | |
| if ! command -v sudo &>/dev/null; then | |
| echo "Not running as root and sudo is not available, skipping TRT installation" | |
| return | |
| fi | |
| fi | |
| if [ "$(id -u)" -eq 0 ]; then | |
| # Already root, run directly | |
| curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash | |
| apt-get install git-lfs | |
| git lfs install | |
| apt-get clean | |
| else | |
| # Need to gain sudo | |
| curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash | |
| sudo apt-get install git-lfs | |
| git lfs install | |
| sudo apt-get clean | |
| fi | |
| if [ ! -d "$TRTLLM_DIR/.git" ]; then | |
| rm -rf "$TRTLLM_DIR" | |
| cd $(dirname "$TRTLLM_DIR") | |
| git clone ${TRTLLM_REPO} | |
| fi | |
| pushd $TRTLLM_DIR | |
| git checkout -f $TRTLLM_TAG | |
| git submodule update --init --recursive | |
| sed -i "/torch/d" requirements.txt | |
| git lfs pull | |
| patch -p1 < $CURR/external/patches/trt_llm.patch | |
| popd | |
| if [[ "$mode" == "install" ]]; then | |
| if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then | |
| cd $TRTLLM_DIR | |
| set +u | |
| bash docker/common/install_base.sh | |
| bash docker/common/install_cmake.sh | |
| bash docker/common/install_ccache.sh | |
| . docker/common/install_tensorrt.sh \ | |
| --TRT_VER="10.10.0.31" \ | |
| --CUDA_VER="12.9" \ | |
| --CUDNN_VER="9.9.0.52-1" \ | |
| --NCCL_VER="2.26.5-1+cuda12.9" \ | |
| --CUBLAS_VER="12.9.0.13-1" \ | |
| --NVRTC_VER="12.9.41-1" | |
| set -u | |
| fi | |
| fi | |
| } | |
| trtllm() { | |
| local mode="$1" | |
| local WHEELS_DIR=$WHEELS_DIR/trtllm/ | |
| mkdir -p $WHEELS_DIR | |
| # Skip TRT installation on macOS ARM | |
| if [[ "$(uname)" == "Darwin" ]] && [[ "$(uname -m)" == "arm64" ]]; then | |
| echo "Skipping TRT installation on macOS ARM" | |
| return | |
| fi | |
| if [ "$(id -u)" -ne 0 ]; then | |
| if ! command -v sudo &>/dev/null; then | |
| echo "Not running as root and sudo is not available, skipping TRT installation" | |
| return | |
| fi | |
| fi | |
| if [ "$(id -u)" -eq 0 ]; then | |
| # Already root, run directly | |
| curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash | |
| apt-get install git-lfs | |
| git lfs install | |
| apt-get clean | |
| else | |
| # Need to gain sudo | |
| curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash | |
| sudo apt-get install git-lfs | |
| git lfs install | |
| sudo apt-get clean | |
| fi | |
| if [ ! -d "$TRTLLM_DIR/.git" ]; then | |
| rm -rf "$TRTLLM_DIR" | |
| cd $(dirname "$TRTLLM_DIR") | |
| git clone ${TRTLLM_REPO} | |
| fi | |
| pushd $TRTLLM_DIR | |
| git checkout -f $TRTLLM_TAG | |
| git submodule update --init --recursive | |
| sed -i "/torch/d" requirements.txt | |
| git lfs pull | |
| patch -p1 < $CURR/external/patches/trt_llm.patch | |
| popd | |
| build() { | |
| if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then | |
| # CONDA_PREFIX causes an error in trt-llm's build script | |
| unset CONDA_PREFIX | |
| cd $TRTLLM_DIR | |
| TORCH_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" python3 ./scripts/build_wheel.py --job_count $(nproc) --clean --trt_root /usr/local/tensorrt --dist_dir $WHEELS_DIR --python_bindings --benchmarks | |
| fi | |
| } | |
| if [[ "$mode" == "build" ]]; then | |
| build | |
| else | |
| if [ -d "$WHEELS_DIR" ] && [ -z "$(ls -A "$WHEELS_DIR")" ]; then | |
| build | |
| fi | |
| pip install --no-cache-dir $WHEELS_DIR/tensorrt_llm*.whl --extra-index-url https://pypi.nvidia.com || true | |
| fi | |
| } | |
| te() { | |
| local mode="$1" | |
| local WHEELS_DIR=$WHEELS_DIR/te/ | |
| mkdir -p $WHEELS_DIR | |
| TE_DIR="$INSTALL_DIR/TransformerEngine" | |
| if [ ! -d "$TE_DIR/.git" ]; then | |
| rm -rf "$TE_DIR" && | |
| cd $(dirname "$TE_DIR") | |
| git clone ${TE_REPO} | |
| fi | |
| pushd $TE_DIR | |
| git checkout -f $TE_TAG | |
| popd | |
| build() { | |
| if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then | |
| cd $TE_DIR | |
| git submodule init | |
| git submodule update | |
| pip install nvidia-mathdx==25.1.1 | |
| pip wheel --wheel-dir $WHEELS_DIR/ --no-build-isolation $TE_DIR | |
| fi | |
| } | |
| if [[ "$mode" == "build" ]]; then | |
| build | |
| else | |
| if [ -d "$WHEELS_DIR" ] && [ -z "$(ls -A "$WHEELS_DIR")" ]; then | |
| build | |
| fi | |
| pip install --no-cache-dir $WHEELS_DIR/transformer_engine*.whl && patch -p1 --force $(python -c "import triton; print(triton.__path__[0])")/runtime/autotuner.py $CURR/external/patches/triton-lang_triton_6570_lazy_init.patch || true | |
| fi | |
| } | |
| mcore() { | |
| local mode="$1" | |
| local WHEELS_DIR=$WHEELS_DIR/mcore/ | |
| mkdir -p $WHEELS_DIR | |
| export CAUSAL_CONV1D_FORCE_BUILD=TRUE | |
| export CAUSAL_CONV_TAG=v1.5.3 | |
| CAUSAL_CONV1D_DIR="$INSTALL_DIR/causal-conv1d" | |
| if [ ! -d "$CAUSAL_CONV1D_DIR/.git" ]; then | |
| rm -rf "$CAUSAL_CONV1D_DIR" | |
| mkdir -p $(dirname "$CAUSAL_CONV1D_DIR") | |
| cd $(dirname "$CAUSAL_CONV1D_DIR") | |
| git clone https://github.com/Dao-AILab/$(basename $CAUSAL_CONV1D_DIR).git | |
| fi | |
| pushd $CAUSAL_CONV1D_DIR | |
| git checkout -f $CAUSAL_CONV_TAG | |
| popd | |
| export MAMBA_FORCE_BUILD=TRUE | |
| export MAMBA_TAG=6b32be06d026e170b3fdaf3ae6282c5a6ff57b06 | |
| MAMBA_DIR="$INSTALL_DIR/mamba" | |
| if [ ! -d "$MAMBA_DIR/.git" ]; then | |
| rm -rf "$MAMBA_DIR" | |
| cd $(dirname "$MAMBA_DIR") | |
| git clone https://github.com/state-spaces/$(basename $MAMBA_DIR).git | |
| fi | |
| pushd $MAMBA_DIR | |
| git checkout -f $MAMBA_TAG | |
| perl -ni -e 'print unless /triton/' setup.py | |
| perl -ni -e 'print unless /triton/' pyproject.toml | |
| popd | |
| MLM_REPO=${MLM_REPO:-$(cat "$CURR/requirements/manifest.json" | jq -r '."vcs-dependencies"."megatron-lm".repo')} | |
| MLM_TAG=${MLM_TAG:-$(cat "$CURR/requirements/manifest.json" | jq -r '."vcs-dependencies"."megatron-lm".ref')} | |
| MLM_DIR="$INSTALL_DIR/Megatron-LM" | |
| if [ ! -d "$MLM_DIR/.git" ]; then | |
| rm -rf "$MLM_DIR" | |
| mkdir -p $(dirname "$MLM_DIR") | |
| cd $(dirname "$MLM_DIR") | |
| git clone ${MLM_REPO} | |
| fi | |
| pushd $MLM_DIR | |
| git checkout -f $MLM_TAG | |
| perl -ni -e 'print unless /triton==3.1.0/' requirements/pytorch_24.10/requirements.txt | |
| perl -ni -e 'print unless /nvidia-resiliency-ext/' requirements/pytorch_24.10/requirements.txt | |
| popd | |
| build() { | |
| if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then | |
| pip wheel --no-deps --no-cache-dir --no-build-isolation --wheel-dir $WHEELS_DIR $MAMBA_DIR | |
| pip wheel --no-deps --no-cache-dir --no-build-isolation --wheel-dir $WHEELS_DIR $CAUSAL_CONV1D_DIR | |
| fi | |
| pip wheel --no-deps --wheel-dir $WHEELS_DIR $MLM_DIR | |
| } | |
| if [[ "$mode" == "build" ]]; then | |
| build | |
| else | |
| if [ -d "$WHEELS_DIR" ] && [ -z "$(ls -A "$WHEELS_DIR")" ]; then | |
| build | |
| fi | |
| pip install --no-cache-dir $WHEELS_DIR/*.whl "nvidia-pytriton ; platform_machine == 'x86_64'" || true | |
| pip install --no-cache-dir -e $MLM_DIR | |
| fi | |
| } | |
| vllm() { | |
| local mode="$1" | |
| local WHEELS_DIR=$WHEELS_DIR/vllm/ | |
| mkdir -p $WHEELS_DIR | |
| VLLM_DIR="$INSTALL_DIR/vllm" | |
| build() { | |
| if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then | |
| ${PIP} install --no-cache-dir virtualenv | |
| virtualenv $INSTALL_DIR/venv | |
| $INSTALL_DIR/venv/bin/pip install --no-cache-dir setuptools coverage | |
| $INSTALL_DIR/venv/bin/pip wheel --no-cache-dir --no-build-isolation \ | |
| --wheel-dir $WHEELS_DIR/ \ | |
| -r $CURR/requirements/requirements_vllm.txt | |
| fi | |
| } | |
| if [[ "$mode" == "build" ]]; then | |
| build | |
| else | |
| if [ -d "$WHEELS_DIR" ] && [ -z "$(ls -A "$WHEELS_DIR")" ]; then | |
| build | |
| fi | |
| ${PIP} install --no-cache-dir virtualenv | |
| virtualenv $INSTALL_DIR/venv | |
| $INSTALL_DIR/venv/bin/pip install --no-cache-dir coverage | |
| $INSTALL_DIR/venv/bin/pip install --no-cache-dir --no-build-isolation $WHEELS_DIR/*.whl || true | |
| fi | |
| } | |
| extra() { | |
| local mode="$1" | |
| DEPS=( | |
| "llama-index==0.10.43" # incompatible with nvidia-pytriton | |
| "nemo_run" | |
| "nvidia-modelopt==0.37.0" # We want a specific version of nvidia-modelopt | |
| ) | |
| if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then | |
| DEPS+=( | |
| "git+https://github.com/NVIDIA/nvidia-resiliency-ext.git@b6eb61dbf9fe272b1a943b1b0d9efdde99df0737 ; platform_machine == 'x86_64'" # Compiling NvRX requires CUDA | |
| ) | |
| fi | |
| if [[ "$mode" == "install" ]]; then | |
| pip install --force-reinstall --no-deps --no-cache-dir "${DEPS[@]}" | |
| pip install --no-cache-dir "${DEPS[@]}" | |
| # needs no-deps to avoid installing triton on top of pytorch-triton. | |
| pip install --no-deps --no-cache-dir "liger-kernel==0.5.8; (platform_machine == 'x86_64' and platform_system != 'Darwin')" | |
| pip install --no-deps "cut-cross-entropy @ git+https://github.com/apple/ml-cross-entropy.git@87a86aba72cfd2f0d8abecaf81c13c4528ea07d8; (platform_machine == 'x86_64' and platform_system != 'Darwin')" | |
| fi | |
| } | |
| echo 'Uninstalling stuff' | |
| # Some of these packages are uninstalled for legacy purposes | |
| ${PIP} uninstall -y nemo_toolkit sacrebleu nemo_asr nemo_nlp nemo_tts | |
| echo 'Upgrading tools' | |
| ${PIP} install -U --no-cache-dir "setuptools==76.0.0" pybind11 wheel ${PIP} | |
| if [ "${NVIDIA_PYTORCH_VERSION}" != "" ]; then | |
| echo "Installing NeMo in NVIDIA PyTorch container: ${NVIDIA_PYTORCH_VERSION}" | |
| echo "Will not install numba" | |
| else | |
| if [ "${CONDA_PREFIX}" != "" ]; then | |
| echo 'Installing numba' | |
| conda install -y -c conda-forge numba | |
| else | |
| pip install --no-cache-dir --no-deps torch cython | |
| fi | |
| fi | |
| echo 'Installing nemo dependencies' | |
| cd $CURR | |
| if [[ "$INSTALL_OPTION" == "dev" ]]; then | |
| echo "Running in dev mode" | |
| ${PIP} install --editable ".[all]" | |
| else | |
| # -------------------------- | |
| # Argument Parsing & Validation | |
| # -------------------------- | |
| # Parse command-line arguments | |
| while [[ $# -gt 0 ]]; do | |
| case "$1" in | |
| --library) | |
| LIBRARY_ARG="$2" | |
| shift 2 | |
| ;; | |
| --mode) | |
| MODE="$2" | |
| shift 2 | |
| ;; | |
| *) | |
| echo "Unknown option: $1" | |
| exit 1 | |
| ;; | |
| esac | |
| done | |
| # Validate required arguments | |
| if [[ -z "$LIBRARY_ARG" ]]; then | |
| echo "Error: --library argument is required" | |
| exit 1 | |
| fi | |
| if [[ -z "$MODE" ]]; then | |
| echo "Error: --mode argument is required" | |
| exit 1 | |
| fi | |
| # Validate mode | |
| if [[ "$MODE" != "build" && "$MODE" != "install" ]]; then | |
| echo "Error: Invalid mode. Must be 'build' or 'install'" | |
| exit 1 | |
| fi | |
| # Process library argument | |
| declare -a LIBRARIES | |
| if [[ "$LIBRARY_ARG" == "all" ]]; then | |
| LIBRARIES=("${ALL_LIBRARIES[@]}") | |
| else | |
| IFS=',' read -ra TEMP_ARRAY <<<"$LIBRARY_ARG" | |
| for lib in "${TEMP_ARRAY[@]}"; do | |
| trimmed_lib=$(echo "$lib" | xargs) | |
| if [[ -n "$trimmed_lib" ]]; then | |
| LIBRARIES+=("$trimmed_lib") | |
| fi | |
| done | |
| fi | |
| # Validate libraries array | |
| if [[ ${#LIBRARIES[@]} -eq 0 ]]; then | |
| echo "Error: No valid libraries specified" | |
| exit 1 | |
| fi | |
| # Validate each library is supported | |
| for lib in "${LIBRARIES[@]}"; do | |
| # "trt" is a valid option but not in ALL_LIBRARIES | |
| # It does not get installed at the same time as the rest | |
| if [[ "$lib" == "trt" ]]; then | |
| continue | |
| fi | |
| if [[ ! " ${ALL_LIBRARIES[@]} " =~ " ${lib} " ]]; then | |
| echo "Error: Unsupported library '$lib'" | |
| exit 1 | |
| fi | |
| done | |
| # -------------------------- | |
| # Execution Logic | |
| # -------------------------- | |
| # Run operations for each library | |
| for library in "${LIBRARIES[@]}"; do | |
| echo "Processing $library ($MODE)..." | |
| "$library" "$MODE" | |
| # Check if function succeeded | |
| if [[ $? -ne 0 ]]; then | |
| echo "Error: Operation failed for $library" | |
| exit 1 | |
| fi | |
| done | |
| echo "All operations completed successfully" | |
| exit 0 | |
| fi | |
| echo 'All done!' | |