Test

Paused

App Files Files Community

eeuuia commited on Oct 15

Commit

e3108f3

verified ·

1 Parent(s): bc01875

Update builder.sh

Browse files

Files changed (1) hide show

builder.sh +224 -184

builder.sh CHANGED Viewed

@@ -1,13 +1,14 @@
 #!/usr/bin/env bash
 set -euo pipefail
-echo "🚀 Builder (FlashAttn [ROCm/CUDA] + Apex + Q8) — runtime com GPU visível"
 # ===== Config e diretórios =====
 mkdir -p /app/wheels /app/cuda_cache /app/wheels/src
 chmod -R 777 /app/wheels || true
-export CUDA_CACHE_PATH="/app/cuda_cache" # Para CUDA
-# Para ROCm, o cache é gerenciado automaticamente (ex: ~/.cache/miopenkerneldb)
 # Preserva licença NGC (se existir)
 if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then
@@ -17,7 +18,7 @@ fi
 # ===== Dependências mínimas =====
 python -m pip install -v -U pip build setuptools wheel hatchling hatch-vcs scikit-build-core cmake ninja packaging "huggingface_hub[hf_transfer]" || true
-# ===== Tags de ambiente (Python/Plataforma/Torch) =====
 PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
 TORCH_VER="$(python - <<'PY'
 try:
@@ -28,38 +29,38 @@ except Exception:
     print("unknown")
 PY
 )"
-PLATFORM_TAG="$(python - <<'PY'
 try:
     import torch
-    if getattr(torch.version, "cuda", None):
-        print("cu" + torch.version.cuda.replace(".", ""))
-    elif getattr(torch.version, "hip", None):
-        rocm_ver = torch.version.hip.split('.')[0:2]
-        print("rocm" + "".join(rocm_ver))
-    else:
-        print("cpu")
 except Exception:
-    print("cpu")
 PY
 )"
-echo "[env] PY_TAG=${PY_TAG} TORCH_VER=${TORCH_VER} PLATFORM_TAG=${PLATFORM_TAG}"
 # ============================================================================
 #                               CHECKERS
 # ============================================================================
-# Checa a instalação completa do Flash Attention (funciona para CUDA e ROCm)
-check_flash_attn () {
 python - <<'PY'
 import importlib
-try:
-    importlib.import_module("flash_attn")
-    # Tenta importar uma função chave para ter mais certeza
-    from flash_attn import flash_attn_func
-    ok = True
-except Exception as e:
-    print(f"Check failed: {e}")
-    ok = False
 raise SystemExit(0 if ok else 1)
 PY
 }
@@ -70,8 +71,7 @@ try:
     from apex.normalization import FusedLayerNorm
     import importlib; importlib.import_module("fused_layer_norm_cuda")
     ok = True
-except Exception as e:
-    print(f"Check failed: {e}")
     ok = False
 raise SystemExit(0 if ok else 1)
 PY
@@ -89,43 +89,70 @@ PY
 #                         DOWNLOAD DO HUB (GENÉRICO)
 # ============================================================================
-# Instala uma wheel do HF por prefixo (ex.: apex-, flash-attn-)
 install_from_hf_by_prefix () {
   local PREFIX="$1"
-  echo "[hub] Procurando wheels '${PREFIX}*.whl' em ${SELF_HF_REPO_ID} com tags ${PY_TAG}/${PLATFORM_TAG}"
-  # O script python interno fará o download e imprimirá o caminho local do arquivo
-  python - "$PREFIX" "$PY_TAG" "$PLATFORM_TAG" <<'PY' || return 1
 import os, sys
 from huggingface_hub import HfApi, hf_hub_download, HfFolder
-prefix, py_tag, platform_tag = sys.argv[1], sys.argv[2], sys.argv[3]
-repo = os.environ.get("SELF_HF_REPO_ID","eeuuia/Tmp")
 api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
 try:
     files = api.list_repo_files(repo_id=repo, repo_type="model")
-except Exception as e:
-    print(f"Não foi possível listar arquivos do repo: {e}", file=sys.stderr)
-    raise SystemExit(1)
 def match(name: str) -> bool:
-    filename = name.rsplit("/",1)[-1]
-    # Aceita 'flash-attn-' ou 'flash_attn-'
-    normalized_prefix = prefix.replace('-', '_')
-    return name.endswith(".whl") and \
-           (filename.startswith(prefix) or filename.startswith(normalized_prefix)) and \
-           (py_tag in name)
 cands = [f for f in files if match(f)]
-# Prioriza wheels com a tag da plataforma (cu121, rocm57, etc.)
-pref = [f for f in cands if platform_tag and platform_tag in f] or cands
 if not pref:
-    print(f"Nenhuma wheel compatível encontrada para {prefix}", file=sys.stderr)
-    raise SystemExit(1)
 target = sorted(pref, reverse=True)[0]
-print(f"Wheel candidata encontrada no Hub: {target}", file=sys.stderr)
 path = hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="/app/wheels")
-print(path) # Saída principal usada pelo script bash
 PY
 }
@@ -133,80 +160,146 @@ PY
 #                                BUILDERS
 # ============================================================================
-# Compila Flash Attention para ROCm (AMD GPUs)
-build_flash_attn_rocm () {
-  local SRC="/app/wheels/src/flash-attention-rocm"
-  echo "[build] Preparando fonte FlashAttention (ROCm) em ${SRC}"
-  # Clona o repositório específico da ROCm, branch main_perf
   if [ -d "$SRC/.git" ]; then
-    ( cd "$SRC"; git fetch --all -p; git reset --hard origin/main_perf; git clean -fdx; )
   else
     rm -rf "$SRC"
-    git clone --depth 1 --branch main_perf https://github.com/ROCm/flash-attention.git "$SRC"
   fi
-  echo "[build] Compilando FlashAttention (ROCm) -> wheel"
-  # Variável de ambiente CRÍTICA para habilitar o backend Triton no ROCm
-  export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
-  # Gera a wheel usando setup.py bdist_wheel, que é confiável para este repo
-  ( cd "$SRC"; python setup.py bdist_wheel -d /app/wheels; )
-  local W="$(ls -t /app/wheels/flash_attn-*.whl 2>/dev/null | head -n1 || true)"
   if [ -n "${W}" ]; then
-    python -m pip install -v -U --no-deps "${W}"
-    echo "[build] FlashAttention (ROCm) instalado da wheel recém-compilada: ${W}"
   else
-    echo "[build] ERRO: Nenhuma wheel FlashAttention (ROCm) foi gerada."
-    return 1
   fi
 }
-# Compila Apex (NVIDIA GPUs)
 build_apex () {
   local SRC="/app/wheels/src/apex"
   echo "[build] Preparando fonte Apex em ${SRC}"
   if [ -d "$SRC/.git" ]; then
-    ( cd "$SRC"; git fetch --all -p; git reset --hard HEAD; git clean -fdx; )
   else
     rm -rf "$SRC"
     git clone --depth 1 https://github.com/NVIDIA/apex "$SRC"
   fi
   echo "[build] Compilando Apex -> wheel"
-  export APEX_CPP_EXT=1 APEX_CUDA_EXT=1
-  python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w /app/wheels
   local W="$(ls -t /app/wheels/apex-*.whl 2>/dev/null | head -n1 || true)"
   if [ -n "${W}" ]; then
-    python -m pip install -v -U --no-deps "${W}"
     echo "[build] Apex instalado da wheel recém-compilada: ${W}"
   else
-    echo "[build] ERRO: Nenhuma wheel do Apex foi gerada."
-    return 1
   fi
 }
-# Compila Q8 Kernels
 Q8_REPO="${Q8_REPO:-https://github.com/Lightricks/LTX-Video-Q8-Kernels}"
 Q8_COMMIT="${Q8_COMMIT:-f3066edea210082799ca5a2bbf9ef0321c5dd8fc}"
 build_q8 () {
   local SRC="/app/wheels/src/q8_kernels"
   rm -rf "$SRC"
   git clone --filter=blob:none "$Q8_REPO" "$SRC"
-  ( cd "$SRC"; git checkout "$Q8_COMMIT"; git submodule update --init --recursive; )
   echo "[build] Compilando Q8 Kernels -> wheel"
-  python -m pip wheel -v --no-build-isolation "$SRC" -w /app/wheels
   local W="$(ls -t /app/wheels/q8_kernels-*.whl 2>/dev/null | head -n1 || true)"
   if [ -n "${W}" ]; then
-    python -m pip install -v -U --no-deps "${W}"
     echo "[build] Q8 instalado da wheel recém-compilada: ${W}"
   else
-    echo "[build] ERRO: Nenhuma wheel q8_kernels foi gerada."
-    return 1
   fi
 }
@@ -214,115 +307,62 @@ build_q8 () {
 #                               EXECUÇÃO
 # ============================================================================
-# --- FLASH ATTENTION (Condicional para ROCm) ---
-if [[ "${PLATFORM_TAG}" == rocm* ]]; then
-  echo "[flow] === Flash Attention (ROCm) ==="
-  if check_flash_attn; then
-    echo "[flow] Flash Attention (ROCm) já parece estar instalado. Pulando."
-  else
-    HF_WHEEL_PATH="$(install_from_hf_by_prefix "flash-attn-" || true)"
-    if [ -n "${HF_WHEEL_PATH:-}" ]; then
-      echo "[hub] Wheel encontrada: ${HF_WHEEL_PATH}"
-      python -m pip install -v -U --no-deps "${HF_WHEEL_PATH}"
-      if check_flash_attn; then
-        echo "[flow] Flash Attention (ROCm): OK via wheel do Hub."
-      else
-        echo "[flow] Wheel do Hub falhou na verificação. Compilando do zero..."
-        build_flash_attn_rocm
-      fi
-    else
-      echo "[hub] Nenhuma wheel compatível encontrada. Compilando do zero..."
-      build_flash_attn_rocm
-    fi
-    # Verificação final
-    if ! check_flash_attn; then echo "[flow] ERRO: Falha ao instalar Flash Attention (ROCm)." >&2; fi
-  fi
-# Adicionar aqui a lógica para CUDA se for necessário compilar o Flash Attention completo
-# else
-#   echo "[flow] Pulando Flash Attention (não é ambiente ROCm)."
-fi
-# --- APEX (Condicional para CUDA) ---
-if [[ "${PLATFORM_TAG}" == cu* ]]; then
-  echo "[flow] === Apex (CUDA) ==="
-  if check_apex; then
-    echo "[flow] Apex já parece estar instalado. Pulando."
-  else
-    HF_WHEEL_PATH="$(install_from_hf_by_prefix "apex-" || true)"
-    if [ -n "${HF_WHEEL_PATH:-}" ]; then
-      echo "[hub] Wheel encontrada: ${HF_WHEEL_PATH}"
-      python -m pip install -v -U --no-deps "${HF_WHEEL_PATH}"
-      if check_apex; then
-        echo "[flow] Apex: OK via wheel do Hub."
-      else
-        echo "[flow] Wheel do Hub falhou na verificação. Compilando do zero..."
-        build_apex
-      fi
-    else
-      echo "[hub] Nenhuma wheel compatível encontrada. Compilando do zero..."
-      build_apex
-    fi
-    if ! check_apex; then echo "[flow] ERRO: Falha ao instalar Apex." >&2; fi
-  fi
-else
-    echo "[flow] Pulando Apex (não é ambiente CUDA)."
-fi
-# --- Q8 KERNELS (Independente de plataforma, mas requer GPU) ---
-echo "[flow] === q8_kernels ==="
-if check_q8; then
-  echo "[flow] q8_kernels já parece estar instalado. Pulando."
-else
-  HF_WHEEL_PATH="$(install_from_hf_by_prefix "q8_kernels-" || true)"
-  if [ -n "${HF_WHEEL_PATH:-}" ]; then
-    echo "[hub] Wheel encontrada: ${HF_WHEEL_PATH}"
-    python -m pip install -v -U --no-deps "${HF_WHEEL_PATH}"
-    if check_q8; then
-      echo "[flow] q8_kernels: OK via wheel do Hub."
-    else
-      echo "[flow] Wheel do Hub falhou na verificação. Compilando do zero..."
-      build_q8
-    fi
-  else
-    echo "[hub] Nenhuma wheel compatível encontrada. Compilando do zero..."
-    build_q8
-  fi
-  if ! check_q8; then echo "[flow] ERRO: Falha ao instalar q8_kernels." >&2; fi
-fi
-# --- UPLOAD DE WHEELS PARA O HUB ---
-echo "[upload] Sincronizando wheels geradas para o Hugging Face Hub..."
 python - <<'PY'
 import os
 from huggingface_hub import HfApi, HfFolder
-repo = os.environ.get("SELF_HF_REPO_ID","eeuuia/Tmp")
 token = os.getenv("HF_TOKEN") or HfFolder.get_token()
 if not token:
-    print("HF_TOKEN não encontrado. Upload desabilitado.")
-    raise SystemExit(0)
-wheels_dir = "/app/wheels"
-if not any(f.endswith('.whl') for f in os.listdir(wheels_dir)):
-    print("Nenhuma wheel nova para fazer upload.")
-    raise SystemExit(0)
 api = HfApi(token=token)
-try:
-    api.upload_folder(
-        folder_path=wheels_dir,
-        repo_id=repo,
-        repo_type="model",
-        allow_patterns=["*.whl", "NGC-DL-CONTAINER-LICENSE"],
-        ignore_patterns=["src/**", "*.log"],
-    )
-    print("Upload de wheels concluído.")
-except Exception as e:
-    print(f"Falha no upload: {e}")
 PY
 chmod -R 777 /app/wheels || true
-echo "✅ Builder finalizado."

 #!/usr/bin/env bash
 set -euo pipefail
+echo "🚀 Builder (FlashAttn LayerNorm extra + Apex + Q8) — runtime com GPU visível"
 # ===== Config e diretórios =====
 mkdir -p /app/wheels /app/cuda_cache /app/wheels/src
 chmod -R 777 /app/wheels || true
+export CUDA_CACHE_PATH="/app/cuda_cache"
 # Preserva licença NGC (se existir)
 if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then
 # ===== Dependências mínimas =====
 python -m pip install -v -U pip build setuptools wheel hatchling hatch-vcs scikit-build-core cmake ninja packaging "huggingface_hub[hf_transfer]" || true
+# ===== Tags de ambiente (Python/CUDA/Torch) =====
 PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
 TORCH_VER="$(python - <<'PY'
 try:
     print("unknown")
 PY
 )"
+CU_TAG="$(python - <<'PY'
 try:
     import torch
+    cu = getattr(torch.version, "cuda", None)
+    print("cu"+cu.replace(".","")) if cu else print("")
 except Exception:
+    print("")
 PY
 )"
+echo "[env] PY_TAG=${PY_TAG} TORCH_VER=${TORCH_VER} CU_TAG=${CU_TAG}"
 # ============================================================================
 #                               CHECKERS
 # ============================================================================
+# Checa especificamente o módulo nativo requerido pelo layer_norm (sem checar 'flash-attn' geral)
+check_flash_layer_norm_bin () {
 python - <<'PY'
 import importlib
+ok = False
+# extensões conhecidas produzidas por csrc/layer_norm
+for name in [
+    "dropout_layer_norm",                 # nome do módulo nativo
+    "flash_attn.ops.layer_norm",          # wrapper python que usa o nativo
+    "flash_attn.ops.rms_norm",            # pode depender do mesmo backend em alguns empacotamentos
+]:
+    try:
+        importlib.import_module(name)
+        ok = True
+        break
+    except Exception:
+        pass
 raise SystemExit(0 if ok else 1)
 PY
 }
     from apex.normalization import FusedLayerNorm
     import importlib; importlib.import_module("fused_layer_norm_cuda")
     ok = True
+except Exception:
     ok = False
 raise SystemExit(0 if ok else 1)
 PY
 #                         DOWNLOAD DO HUB (GENÉRICO)
 # ============================================================================
+# Instala uma wheel do HF por prefixo simples (ex.: apex-, q8_kernels-)
 install_from_hf_by_prefix () {
   local PREFIX="$1"
+  echo "[hub] Procurando wheels '${PREFIX}-*.whl' em ${SELF_HF_REPO_ID} com tags ${PY_TAG}/${CU_TAG}"
+  python - "$PREFIX" "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
 import os, sys
 from huggingface_hub import HfApi, hf_hub_download, HfFolder
+prefix, py_tag, cu_tag = sys.argv[1], sys.argv[2], sys.argv[3]
+repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
 api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
 try:
     files = api.list_repo_files(repo_id=repo, repo_type="model")
+except Exception:
+    raise SystemExit(0)
 def match(name: str) -> bool:
+    return name.endswith(".whl") and name.rsplit("/",1)[-1].startswith(prefix + "-") and (py_tag in name)
 cands = [f for f in files if match(f)]
+pref = [f for f in cands if cu_tag and cu_tag in f] or cands
 if not pref:
+    raise SystemExit(0)
 target = sorted(pref, reverse=True)[0]
+print(target)
 path = hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="/app/wheels")
+print(path)
+PY
+}
+# Instala wheels do submódulo layer_norm aceitando variantes de nome
+install_flash_layer_norm_from_hf () {
+  echo "[hub] Procurando wheels FlashAttention LayerNorm em ${SELF_HF_REPO_ID}"
+  python - "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
+import os, sys, re
+from huggingface_hub import HfApi, hf_hub_download, HfFolder
+py_tag, cu_tag = sys.argv[1], sys.argv[2]
+repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
+api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
+try:
+    files = api.list_repo_files(repo_id=repo, repo_type="model")
+except Exception:
+    raise SystemExit(0)
+pats = [
+    r"^flash[_-]?attn[_-]?.*layer[_-]?norm-.*\.whl$",
+    r"^dropout[_-]?layer[_-]?norm-.*\.whl$",
+]
+def ok(fn: str) -> bool:
+    name = fn.rsplit("/",1)[-1]
+    if py_tag not in name: return False
+    return any(re.search(p, name, flags=re.I) for p in pats)
+cands = [f for f in files if ok(f)]
+pref = [f for f in cands if cu_tag and cu_tag in f] or cands
+if not pref:
+    raise SystemExit(0)
+target = sorted(pref, reverse=True)[0]
+print(target)
+path = hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="/app/wheels")
+print(path)
 PY
 }
 #                                BUILDERS
 # ============================================================================
+# Passo extra: SIEMPRE tenta instalar o submódulo layer_norm via wheel do HF;
+# se não houver wheel compatível, compila a partir de csrc/layer_norm e gera wheel.
+build_or_install_flash_layer_norm () {
+  echo "[flow] === FlashAttn LayerNorm (passo extra) ==="
+  # 1) Tentar instalar wheel do HF primeiro (evita recompilar)
+  HF_OUT="$(install_flash_layer_norm_from_hf || true)"
+  if [ -n "${HF_OUT:-}" ]; then
+    WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
+    echo "[hub] Baixado: ${WHEEL_PATH}"
+    python -m pip install -v -U --no-build-isolation --no-deps "${WHEEL_PATH}" || true
+    if check_flash_layer_norm_bin; then
+      echo "[flow] FlashAttn LayerNorm: OK via wheel do Hub"
+      return 0
+    fi
+    echo "[flow] Wheel do Hub não resolveu import; seguirá com build"
+  else
+    echo "[hub] Nenhuma wheel compatível encontrada para FlashAttn LayerNorm"
+  fi
+  # 2) Build from source do submódulo csrc/layer_norm -> wheel
+  local SRC="/app/wheels/src/flash-attn"
+  echo "[build] Preparando fonte FlashAttention (layer_norm) em ${SRC}"
   if [ -d "$SRC/.git" ]; then
+    git -C "$SRC" fetch --all -p || true
+    git -C "$SRC" reset --hard origin/main || true
+    git -C "$SRC" clean -fdx || true
   else
     rm -rf "$SRC"
+    git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
   fi
+  # Define CC alvo a partir da GPU ativa (reduz tempo/ruído de build)
+  export TORCH_CUDA_ARCH_LIST="$(python - <<'PY'
+import torch
+try:
+    cc = "%d.%d" % torch.cuda.get_device_capability(0)
+    print(cc)
+except Exception:
+    print("8.9")  # fallback p/ Ada (L40S) caso build sem GPU visível
+PY
+  )"
+  echo "[build] TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}"
+  pushd "$SRC/csrc/layer_norm" >/dev/null
+  export MAX_JOBS="${MAX_JOBS:-90}"
+  # Gera wheel reutilizável
+  python -m pip wheel -v --no-build-isolation --no-deps . -w /app/wheels || true
+  popd >/dev/null
+  # Instala a wheel gerada
+  local W="$(ls -t /app/wheels/*flash*attn*layer*norm*-*.whl 2>/dev/null | head -n1 || true)"
+  if [ -z "${W}" ]; then
+    W="$(ls -t /app/wheels/*dropout*layer*norm*-*.whl 2>/dev/null | head -n1 || true)"
+  fi
+  if [ -z "${W}" ]; then
+    # fallback para qualquer .whl recém gerado
+    W="$(ls -t /app/wheels/*.whl 2>/dev/null | head -n1 || true)"
+  fi
   if [ -n "${W}" ]; then
+    python -m pip install -v -U --no-deps "${W}" || true
+    echo "[build] FlashAttn LayerNorm instalado da wheel: ${W}"
   else
+    echo "[build] Nenhuma wheel gerada; instalando direto do source (último recurso)"
+    python -m pip install -v --no-build-isolation "$SRC/csrc/layer_norm" || true
+  fi
+  # Checagem final do binário
+  if check_flash_layer_norm_bin; then
+    echo "[flow] FlashAttn LayerNorm: import OK após build"
+    return 0
   fi
+  echo "[flow] FlashAttn LayerNorm: falhou import após build"
+  return 1
 }
+## Instalação do FlashAttention completo do GitHub
+echo "Instalando FlashAttention completo do GitHub"
+# clonagem do repositório
+git clone --depth 1 https://github.com/Dao-AILab/flash-attention appwheelssrc/flash-attention-full
+# build de wheel para a GPU ativa (ajusta para L40s via TORCHCUDAARCHLIST)
+pushd appwheelssrc/flash-attention-full > /dev/null
+export TORCH_CUDA_ARCH_LIST="${TORCHCUDAARCHLIST}"
+python -m pip wheel -v --no-build-isolation --no-deps . -w ../../appwheels
+popd > /dev/null
+# instalação do wheel gerado
+WHEEL=$(ls -t appwheels/flash_attn-*.whl | head -n1)
+if [ -n "$WHEEL" ]; then
+  python -m pip install -v --no-build-isolation --no-deps "$WHEEL"
+else
+  # fallback para pip direto do Git
+  python -m pip install -v --no-build-isolation --no-deps git+https://github.com/Dao-AILab/flash-attention
+fi
+echo "FlashAttention completo instalado com sucesso"
 build_apex () {
   local SRC="/app/wheels/src/apex"
   echo "[build] Preparando fonte Apex em ${SRC}"
   if [ -d "$SRC/.git" ]; then
+    git -C "$SRC" fetch --all -p || true
+    git -C "$SRC" reset --hard HEAD || true
+    git -C "$SRC" clean -fdx || true
   else
     rm -rf "$SRC"
     git clone --depth 1 https://github.com/NVIDIA/apex "$SRC"
   fi
   echo "[build] Compilando Apex -> wheel"
+  export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0
+  python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w /app/wheels || true
   local W="$(ls -t /app/wheels/apex-*.whl 2>/dev/null | head -n1 || true)"
   if [ -n "${W}" ]; then
+    python -m pip install -v -U --no-deps "${W}" || true
     echo "[build] Apex instalado da wheel recém-compilada: ${W}"
   else
+    echo "[build] Nenhuma wheel Apex gerada; instalando do source"
+    python -m pip install -v --no-build-isolation "$SRC" || true
   fi
 }
 Q8_REPO="${Q8_REPO:-https://github.com/Lightricks/LTX-Video-Q8-Kernels}"
 Q8_COMMIT="${Q8_COMMIT:-f3066edea210082799ca5a2bbf9ef0321c5dd8fc}"
 build_q8 () {
   local SRC="/app/wheels/src/q8_kernels"
   rm -rf "$SRC"
   git clone --filter=blob:none "$Q8_REPO" "$SRC"
+  git -C "$SRC" checkout "$Q8_COMMIT"
+  git -C "$SRC" submodule update --init --recursive
   echo "[build] Compilando Q8 Kernels -> wheel"
+  python -m pip wheel -v --no-build-isolation "$SRC" -w /app/wheels || true
   local W="$(ls -t /app/wheels/q8_kernels-*.whl 2>/dev/null | head -n1 || true)"
   if [ -n "${W}" ]; then
+    python -m pip install -v -U --no-deps "${W}" || true
     echo "[build] Q8 instalado da wheel recém-compilada: ${W}"
   else
+    echo "[build] Nenhuma wheel q8_kernels gerada; instalando do source"
+    python -m pip install -v --no-build-isolation "$SRC" || true
   fi
 }
 #                               EXECUÇÃO
 # ============================================================================
+# Passo adicional SEM depender de "flash-attn" já instalado: trata somente o layer_norm
+#build_q8 || true
+# Apex (mantido)
+# Tenta primeiro via wheel no HF e, se não houver, compila e instala em wheel
+#echo "[flow] === apex ==="
+#HF_OUT="$(install_from_hf_by_prefix "apex" || true)"
+#if [ -n "${HF_OUT:-}" ]; then
+#  WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
+#  echo "[hub] Baixado: ${WHEEL_PATH}"
+#  python -m pip install -v -U --no-build-isolation "${WHEEL_PATH}" || true
+#  if ! check_apex; then
+#    echo "[flow] apex: import falhou após wheel; compilando"
+#    #build_apex || true
+#  fi
+#else
+#  echo "[hub] Nenhuma wheel apex compatível; compilando"
+#  build_apex || true
+#fi
+ #Q8 (opcional)
+ echo "[flow] === q8_kernels ==="
+ HF_OUT="$(install_from_hf_by_prefix "q8_kernels" || true)"
+ if [ -n "${HF_OUT:-}" ]; then
+   WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
+   echo "[hub] Baixado: ${WHEEL_PATH}"
+   python -m pip install -v -U --no-build-isolation "${WHEEL_PATH}" || true
+   if ! check_q8; then
+     echo "[flow] q8_kernels: import falhou após wheel; compilando"
+     build_q8 || true
+   fi
+ else
+   echo "[hub] Nenhuma wheel q8_kernels compatível; compilando"
+   build_q8 || true
+ fi
+# Upload de wheels produzidas para o HF (cache cross-restarts)
 python - <<'PY'
 import os
 from huggingface_hub import HfApi, HfFolder
+repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
 token = os.getenv("HF_TOKEN") or HfFolder.get_token()
 if not token:
+    raise SystemExit("HF_TOKEN ausente; upload desabilitado")
 api = HfApi(token=token)
+api.upload_folder(
+    folder_path="/app/wheels",
+    repo_id=repo,
+    repo_type="model",
+    allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
+    ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"],
+)
+print("Upload concluído (wheels + licença).")
 PY
 chmod -R 777 /app/wheels || true
+echo "✅ Builder finalizado."