|
|
#!/usr/bin/env bash |
|
|
set -euo pipefail |
|
|
|
|
|
echo "🚀 Start (wheels + app)" |
|
|
|
|
|
|
|
|
export SELF_HF_REPO_ID="${SELF_HF_REPO_ID:-XCarleX/Apex-l40s}" |
|
|
export HF_HOME="${HF_HOME:-/app/model_cache}" |
|
|
export HF_HUB_CACHE="${HF_HUB_CACHE:-$HF_HOME/hub}" |
|
|
export TORCH_HOME="${TORCH_HOME:-$HF_HOME/torch}" |
|
|
export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-8.9}" |
|
|
export HF_HUB_ENABLE_HF_TRANSFER="${HF_HUB_ENABLE_HF_TRANSFER:-1}" |
|
|
export PATH="$HOME/.local/bin:$PATH" |
|
|
|
|
|
mkdir -p /app/wheels /app/cuda_cache "$HF_HOME" "$TORCH_HOME" /app/wheels/src /app/whells |
|
|
chmod -R 777 /app/wheels /app/whells || true |
|
|
export CUDA_CACHE_PATH="/app/cuda_cache" |
|
|
|
|
|
|
|
|
if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then |
|
|
cp -f /NGC-DL-CONTAINER-LICENSE /app/wheels/NGC-DL-CONTAINER-LICENSE || true |
|
|
fi |
|
|
|
|
|
|
|
|
python -m pip install -q -U pip build setuptools wheel hatchling hatch-vcs scikit-build-core cmake ninja packaging |
|
|
python -m pip install -q -U "huggingface_hub[hf_transfer]" || python -m pip install -q -U huggingface_hub |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
read -r PY_TAG CU_TAG <<EOF |
|
|
$(python - <<'PY' |
|
|
import sys, torch |
|
|
py_tag=f"cp{sys.version_info.major}{sys.version_info.minor}" |
|
|
cu_tag="cu"+(torch.version.cuda or "0").replace(".","") |
|
|
print(py_tag, cu_tag) |
|
|
PY |
|
|
) |
|
|
EOF |
|
|
echo "[env] Python tag=${PY_TAG} | CUDA tag=${CU_TAG}" |
|
|
|
|
|
|
|
|
check_apex() { |
|
|
python - <<'PY' >/dev/null 2>&1 |
|
|
from importlib import import_module |
|
|
from apex.normalization import FusedLayerNorm |
|
|
import_module("fused_layer_norm_cuda") |
|
|
PY |
|
|
[ $? -eq 0 ] && { echo "[apex] import OK"; return 0; } || { echo "[apex] import falhou (vai tentar Hub/Build)"; return 1; } |
|
|
} |
|
|
|
|
|
check_flashattn() { |
|
|
python - <<'PY' >/dev/null 2>&1 |
|
|
import flash_attn |
|
|
PY |
|
|
[ $? -eq 0 ] && { echo "[flash_attn] import OK"; return 0; } || { echo "[flash_attn] import falhou (vai tentar Hub/Build)"; return 1; } |
|
|
} |
|
|
|
|
|
|
|
|
install_from_hf () { |
|
|
echo "[hub] Verificando wheel de $1 no repositório ${SELF_HF_REPO_ID}" |
|
|
python - <<'PY' "$1" "$PY_TAG" "$CU_TAG" 2>/dev/null || exit 1 |
|
|
import os, sys |
|
|
from huggingface_hub import HfApi, hf_hub_download, HfFolder |
|
|
pkg, py_tag, cu_tag = sys.argv[1], sys.argv[2], sys.argv[3] |
|
|
repo = os.environ.get("SELF_HF_REPO_ID","XCarleX/Apex-l40s") |
|
|
api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token()) |
|
|
try: |
|
|
files = api.list_repo_files(repo_id=repo, repo_type="model") |
|
|
except Exception: |
|
|
raise SystemExit(0) |
|
|
cands=[f for f in files if f.endswith(".whl") and f.rsplit("/",1)[-1].startswith(pkg+"-") and py_tag in f] |
|
|
pref=[f for f in cands if cu_tag and cu_tag in f] or cands |
|
|
if not pref: |
|
|
raise SystemExit(0) |
|
|
target=sorted(pref, reverse=True)[0] |
|
|
print(target) |
|
|
path=hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="/app/wheels") |
|
|
print(path) |
|
|
PY |
|
|
} |
|
|
|
|
|
|
|
|
build_apex () { |
|
|
local SRC="/app/wheels/src/apex" |
|
|
echo "[build] Preparando fonte Apex em ${SRC}" |
|
|
if [ -d "$SRC/.git" ]; then |
|
|
git -C "$SRC" fetch --all -p || true |
|
|
git -C "$SRC" reset --hard HEAD || true |
|
|
git -C "$SRC" clean -fdx || true |
|
|
else |
|
|
rm -rf "$SRC" |
|
|
git clone --depth 1 https://github.com/NVIDIA/apex "$SRC" |
|
|
fi |
|
|
echo "[build] Compilando Apex -> wheel" |
|
|
export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0 |
|
|
python -m pip wheel --no-build-isolation --no-deps "$SRC" -w /app/wheels -q || true |
|
|
local W="$(ls -t /app/wheels/apex-*.whl 2>/dev/null | head -n1 || true)" |
|
|
if [ -n "${W}" ]; then |
|
|
python -m pip install -U --no-deps "${W}" -q || true |
|
|
echo "[build] Apex instalado da wheel recém-compilada: ${W}" |
|
|
else |
|
|
echo "[build] Nenhuma wheel Apex gerada; instalando do source (pode falhar)" |
|
|
python -m pip install -q --no-build-isolation "$SRC" || true |
|
|
fi |
|
|
echo "${W:-}" |
|
|
} |
|
|
|
|
|
FLASH_ATTN_VERSION="${FLASH_ATTN_VERSION:-2.7.4.post1}" |
|
|
build_flashattn () { |
|
|
echo "[build] Compilando flash-attn==${FLASH_ATTN_VERSION} -> wheel" |
|
|
python -m pip wheel --no-build-isolation --no-deps --no-binary :all: "flash-attn==${FLASH_ATTN_VERSION}" -w /app/wheels -q || true |
|
|
local W="$(ls -t /app/wheels/flash_attn-${FLASH_ATTN_VERSION}-*.whl 2>/dev/null | head -n1 || true)" |
|
|
if [ -n "${W}" ]; then |
|
|
python -m pip install -U --no-deps "${W}" -q || true |
|
|
echo "[build] flash-attn instalado da wheel recém-compilada: ${W}" |
|
|
else |
|
|
echo "[build] Nenhuma wheel flash-attn gerada; instalação não aplicada" |
|
|
fi |
|
|
echo "${W:-}" |
|
|
} |
|
|
|
|
|
|
|
|
ensure_pkg () { |
|
|
local PKG="$1" |
|
|
local CHECK_FN="$2" |
|
|
local BUILD_FN="$3" |
|
|
local WHEEL_PATH="" |
|
|
|
|
|
echo "[flow] === ${PKG} ===" |
|
|
if ${CHECK_FN}; then |
|
|
echo "[flow] ${PKG}: já instalado (import OK)" |
|
|
return 0 |
|
|
fi |
|
|
|
|
|
echo "[flow] ${PKG}: procurando wheel no Hub (${SELF_HF_REPO_ID})" |
|
|
HF_OUT="$(install_from_hf "$PKG" || true)" |
|
|
if [ -n "${HF_OUT}" ]; then |
|
|
WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)" |
|
|
echo "[hub] Baixado: ${WHEEL_PATH}" |
|
|
python -m pip install -U --no-deps "${WHEEL_PATH}" -q || true |
|
|
if ${CHECK_FN}; then |
|
|
echo "[flow] ${PKG}: sucesso via Hub (${WHEEL_PATH})" |
|
|
return 0 |
|
|
else |
|
|
echo "[flow] ${PKG}: import falhou após instalar wheel do Hub; seguirá para build" |
|
|
fi |
|
|
else |
|
|
echo "[hub] Nenhuma wheel compatível encontrada para ${PKG}" |
|
|
fi |
|
|
|
|
|
echo "[flow] ${PKG}: compilando (fallback)" |
|
|
WHEEL_PATH="$(${BUILD_FN})" |
|
|
if ${CHECK_FN}; then |
|
|
echo "[flow] ${PKG}: sucesso após compilação ${WHEEL_PATH:-'(instalação direta)'}" |
|
|
return 0 |
|
|
fi |
|
|
|
|
|
echo "[flow] ${PKG}: falha após tentativa de build; agendando recompilação em segundo plano e seguindo adiante" |
|
|
nohup bash -lc "${BUILD_FN}; exit 0" >/app/wheels/build_${PKG}.log 2>&1 & disown || true |
|
|
return 1 |
|
|
} |
|
|
|
|
|
|
|
|
ensure_pkg "apex" check_apex build_apex || true |
|
|
ensure_pkg "flash_attn" check_flashattn build_flashattn || true |
|
|
|
|
|
|
|
|
if [ "${HF_UPLOAD_WHEELS:-0}" = "1" ]; then |
|
|
python - <<'PY' |
|
|
import os |
|
|
from huggingface_hub import HfApi, HfFolder |
|
|
repo=os.environ.get("SELF_HF_REPO_ID","XCarleX/Apex-l40s") |
|
|
token=os.getenv("HF_TOKEN") or HfFolder.get_token() |
|
|
if not token: |
|
|
raise SystemExit("HF_TOKEN ausente; upload desabilitado") |
|
|
api=HfApi(token=token) |
|
|
api.upload_folder( |
|
|
folder_path="/app/wheels", |
|
|
repo_id=repo, |
|
|
repo_type="model", |
|
|
allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"], |
|
|
ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"], |
|
|
) |
|
|
print("Upload concluído (wheels + licença).") |
|
|
PY |
|
|
else |
|
|
echo "ℹ️ Upload desabilitado (defina HF_UPLOAD_WHEELS=1)" |
|
|
fi |
|
|
|
|
|
|
|
|
chmod -R 777 /app/wheels /app/whells || true |
|
|
|
|
|
|
|
|
chmod +x ./run.sh |
|
|
./run.sh |
|
|
|
|
|
|
|
|
|