thecollabagepatch commited on
Commit
a628b37
·
1 Parent(s): c985b41

solving for those nightlys that no longer exist lol

Browse files
Files changed (1) hide show
  1. Dockerfile +101 -116
Dockerfile CHANGED
@@ -1,23 +1,20 @@
1
- # thecollabagepatch/magenta:latest
2
  FROM nvidia/cuda:12.6.2-cudnn-runtime-ubuntu22.04
3
 
4
- # CUDA libs present + on loader path
5
  RUN apt-get update && apt-get install -y --no-install-recommends \
6
  cuda-libraries-12-4 && rm -rf /var/lib/apt/lists/*
7
  ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda-12.4/lib64:/usr/local/cuda-12.4/compat:/usr/local/cuda/targets/x86_64-linux/lib:${LD_LIBRARY_PATH}
8
  RUN ln -sf /usr/local/cuda/targets/x86_64-linux/lib /usr/local/cuda/lib64 || true
9
 
10
- # Ensure the NVIDIA repo key is present (non-interactive) and install cuDNN 9.8
11
  RUN set -eux; \
12
  apt-get update && apt-get install -y --no-install-recommends gnupg ca-certificates curl; \
13
  install -d -m 0755 /usr/share/keyrings; \
14
- # Refresh the *same* keyring the base source uses (no second source file)
15
  curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub \
16
  | gpg --batch --yes --dearmor -o /usr/share/keyrings/cuda-archive-keyring.gpg; \
17
  apt-get update; \
18
- # If libcudnn is "held", unhold it so we can move to 9.8
19
  apt-mark unhold libcudnn9-cuda-12 || true; \
20
- # Install cuDNN 9.8 for CUDA 12 (correct dev package name!)
21
  apt-get install -y --no-install-recommends \
22
  'libcudnn9-cuda-12=9.8.*' \
23
  'libcudnn9-dev-cuda-12=9.8.*' \
@@ -26,135 +23,123 @@ RUN set -eux; \
26
  ldconfig; \
27
  rm -rf /var/lib/apt/lists/*
28
 
29
- # (optional) preload workaround if still needed
30
  ENV LD_PRELOAD=/usr/local/cuda/lib64/libcusparse.so.12:/usr/local/cuda/lib64/libcublas.so.12:/usr/local/cuda/lib64/libcublasLt.so.12:/usr/local/cuda/lib64/libcufft.so.11:/usr/local/cuda/lib64/libcusolver.so.11
31
-
32
- # Better allocator (less fragmentation than BFC during XLA autotune)
33
  ENV TF_GPU_ALLOCATOR=cuda_malloc_async
34
-
35
- # Let cuBLAS use TF32 fast path on Ada (L40S) for big GEMMs
36
  ENV TF_ENABLE_CUBLAS_TF32=1 NVIDIA_TF32_OVERRIDE=1
37
 
38
  ENV DEBIAN_FRONTEND=noninteractive \
39
  PYTHONUNBUFFERED=1 \
40
  PIP_NO_CACHE_DIR=1 \
41
  TF_FORCE_GPU_ALLOW_GROWTH=true \
42
- XLA_PYTHON_CLIENT_PREALLOCATE=false
 
43
 
44
- ENV JAX_PLATFORMS=""
45
 
46
- # --- OS deps ---
47
  RUN apt-get update && apt-get install -y --no-install-recommends \
48
- software-properties-common curl ca-certificates git \
 
49
  libsndfile1 ffmpeg \
50
- build-essential pkg-config \
51
  && add-apt-repository ppa:deadsnakes/ppa -y \
52
  && apt-get update && apt-get install -y --no-install-recommends \
53
- python3.11 python3.11-venv python3.11-distutils python3-pip \
54
  && rm -rf /var/lib/apt/lists/*
55
 
56
- # Make python3 => 3.11 for convenience
57
- RUN ln -sf /usr/bin/python3.11 /usr/bin/python && python -m pip install --upgrade pip
58
-
59
- # --- Python deps (pin order matters!) ---
60
- # 1) JAX CUDA pins
61
- RUN python -m pip install "jax[cuda12]==0.7.1" "jaxlib==0.7.1"
62
-
63
- # 2) Lock seqio early to avoid backtracking madness
64
- RUN python -m pip install "seqio==0.0.11"
65
-
66
- # 3) Install Magenta RT *without* deps so we control pins
67
- RUN python -m pip install --no-deps 'git+https://github.com/magenta/magenta-realtime#egg=magenta_rt[gpu]'
68
-
69
- # 4) TF nightlies (MATCH DATES!)
70
- RUN python -m pip install \
71
- "tf_nightly==2.20.0.dev20250619" \
72
- "tensorflow-text-nightly==2.20.0.dev20250316" \
73
- "tf-hub-nightly"
74
-
75
- # 5) tf2jax pinned alongside tf_nightly so pip doesn’t drag stable TF
76
- RUN python -m pip install tf2jax "tf_nightly==2.20.0.dev20250619"
77
-
78
- # 6) The rest of MRT deps + API runtime deps
79
- RUN python -m pip install \
80
- gin-config librosa resampy soundfile \
81
- google-auth google-auth-oauthlib google-auth-httplib2 \
82
- google-api-core googleapis-common-protos google-resumable-media \
83
- google-cloud-storage requests tqdm typing-extensions numpy==2.1.3 \
84
- fastapi uvicorn[standard] python-multipart pyloudnorm
85
-
86
- # 7) Exact commits for T5X/Flaxformer as in pyproject
87
- RUN python -m pip install \
88
- "t5x @ git+https://github.com/google-research/t5x.git@92c5b46" \
89
- "flaxformer @ git+https://github.com/google/flaxformer@399ea3a"
90
-
91
- # ---- FINAL: enforce TF nightlies and clean any stable TF ----
92
- RUN python - <<'PY'
93
- import sys, sysconfig, glob, os, shutil
94
- # Find a writable site dir (site-packages OR dist-packages)
95
- cands = [sysconfig.get_paths().get('purelib'), sysconfig.get_paths().get('platlib')]
96
- cands += [p for p in sys.path if p and p.endswith(('site-packages','dist-packages'))]
97
- site = next(p for p in cands if p and os.path.isdir(p))
98
-
99
- patterns = [
100
- "tensorflow", "tensorflow-*.dist-info", "tensorflow-*.egg-info",
101
- "tf-nightly-*.dist-info", "tf_nightly-*.dist-info",
102
- "tensorflow_text", "tensorflow_text-*.dist-info",
103
- "tf-hub-nightly-*.dist-info", "tf_hub_nightly-*.dist-info",
104
- "tf_keras-nightly-*.dist-info", "tf_keras_nightly-*.dist-info",
105
- "tensorboard*", "tb-nightly-*.dist-info",
106
- "keras*", # remove stray keras
107
- "tensorflow_hub*", "tensorflow_io*",
108
- ]
109
- for pat in patterns:
110
- for path in glob.glob(os.path.join(site, pat)):
111
- if os.path.isdir(path): shutil.rmtree(path, ignore_errors=True)
112
- else:
113
- try: os.remove(path)
114
- except FileNotFoundError: pass
115
-
116
- print("TF/Hub/Text cleared in:", site)
117
- PY
118
-
119
- # Reinstall pinned nightlies in ONE transaction
120
- RUN python -m pip install --no-cache-dir --force-reinstall \
121
- "tf-nightly==2.20.0.dev20250619" \
122
- "tensorflow-text-nightly==2.20.0.dev20250316" \
123
- "tf-hub-nightly"
124
-
125
- RUN python -m pip install huggingface_hub
126
-
127
- RUN python -m pip install --no-cache-dir --force-reinstall "protobuf==4.25.3"
128
-
129
- RUN python -m pip install gradio
130
-
131
- RUN python -m pip install soxr
132
-
133
- # Switch to Spaces’ preferred user
134
- # Switch to Spaces’ preferred user
135
  RUN useradd -m -u 1000 appuser
136
  WORKDIR /home/appuser/app
137
 
138
- # Copy from *build context* into image, owned by appuser
139
- COPY --chown=appuser:appuser app.py /home/appuser/app/app.py
140
-
141
- # NEW: shared utils + worker
142
- COPY --chown=appuser:appuser utils.py /home/appuser/app/utils.py
143
- COPY --chown=appuser:appuser jam_worker.py /home/appuser/app/jam_worker.py
144
-
145
- COPY --chown=appuser:appuser one_shot_generation.py /home/appuser/app/one_shot_generation.py
146
-
147
- COPY --chown=appuser:appuser model_management.py /home/appuser/app/model_management.py
148
-
149
- COPY --chown=appuser:appuser documentation.html /home/appuser/app/documentation.html
150
-
151
- COPY --chown=appuser:appuser lil_demo_540p.mp4 /home/appuser/app/lil_demo_540p.mp4
152
-
153
- COPY --chown=appuser:appuser magentaRT_rt_tester.html /home/appuser/app/magentaRT_rt_tester.html
154
-
155
- COPY --chown=appuser:appuser magenta_prompts.js /home/appuser/app/magenta_prompts.js
156
-
157
- # Create docs directory and copy documentation files
158
  COPY --chown=appuser:appuser docs/ /home/appuser/app/docs/
159
 
160
  USER appuser
 
1
+ # thecollabagepatch/magenta:latest - Duplicable x86_64 build
2
  FROM nvidia/cuda:12.6.2-cudnn-runtime-ubuntu22.04
3
 
4
+ # Ensure CUDA libraries are on loader path
5
  RUN apt-get update && apt-get install -y --no-install-recommends \
6
  cuda-libraries-12-4 && rm -rf /var/lib/apt/lists/*
7
  ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda-12.4/lib64:/usr/local/cuda-12.4/compat:/usr/local/cuda/targets/x86_64-linux/lib:${LD_LIBRARY_PATH}
8
  RUN ln -sf /usr/local/cuda/targets/x86_64-linux/lib /usr/local/cuda/lib64 || true
9
 
10
+ # Install cuDNN 9.8 for better compatibility
11
  RUN set -eux; \
12
  apt-get update && apt-get install -y --no-install-recommends gnupg ca-certificates curl; \
13
  install -d -m 0755 /usr/share/keyrings; \
 
14
  curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub \
15
  | gpg --batch --yes --dearmor -o /usr/share/keyrings/cuda-archive-keyring.gpg; \
16
  apt-get update; \
 
17
  apt-mark unhold libcudnn9-cuda-12 || true; \
 
18
  apt-get install -y --no-install-recommends \
19
  'libcudnn9-cuda-12=9.8.*' \
20
  'libcudnn9-dev-cuda-12=9.8.*' \
 
23
  ldconfig; \
24
  rm -rf /var/lib/apt/lists/*
25
 
26
+ # Performance optimizations for L40S/Ada
27
  ENV LD_PRELOAD=/usr/local/cuda/lib64/libcusparse.so.12:/usr/local/cuda/lib64/libcublas.so.12:/usr/local/cuda/lib64/libcublasLt.so.12:/usr/local/cuda/lib64/libcufft.so.11:/usr/local/cuda/lib64/libcusolver.so.11
 
 
28
  ENV TF_GPU_ALLOCATOR=cuda_malloc_async
 
 
29
  ENV TF_ENABLE_CUBLAS_TF32=1 NVIDIA_TF32_OVERRIDE=1
30
 
31
  ENV DEBIAN_FRONTEND=noninteractive \
32
  PYTHONUNBUFFERED=1 \
33
  PIP_NO_CACHE_DIR=1 \
34
  TF_FORCE_GPU_ALLOW_GROWTH=true \
35
+ XLA_PYTHON_CLIENT_PREALLOCATE=false \
36
+ JAX_PLATFORMS=""
37
 
38
+ SHELL ["/bin/bash", "-c"]
39
 
40
+ # Install system dependencies
41
  RUN apt-get update && apt-get install -y --no-install-recommends \
42
+ software-properties-common curl ca-certificates \
43
+ build-essential pkg-config git \
44
  libsndfile1 ffmpeg \
 
45
  && add-apt-repository ppa:deadsnakes/ppa -y \
46
  && apt-get update && apt-get install -y --no-install-recommends \
47
+ python3.11 python3.11-dev python3.11-venv python3-pip \
48
  && rm -rf /var/lib/apt/lists/*
49
 
50
+ # Make python3.11 default and install uv for faster package installs
51
+ RUN ln -sf /usr/bin/python3.11 /usr/bin/python && \
52
+ ln -sf /usr/bin/python3.11 /usr/bin/python3 && \
53
+ python -m pip install --upgrade pip && \
54
+ python -m pip install uv
55
+
56
+ # CRITICAL: Install TensorFlow FIRST to block tensorflow-cpu
57
+ # Using generic tf-nightly (no specific date) for duplicability
58
+ RUN uv pip install --system tf-nightly
59
+
60
+ # Install JAX with CUDA support (pinned for stability)
61
+ RUN uv pip install --system "jax[cuda12]==0.4.35" "jaxlib==0.4.35"
62
+
63
+ # Install base dependencies
64
+ RUN uv pip install --system \
65
+ absl-py chex gin-config numpy requests tqdm typing-extensions \
66
+ google-cloud-storage librosa resampy soundfile sentencepiece
67
+
68
+ # Clone and install t5x WITHOUT dependencies (avoid tensorflow-cpu)
69
+ RUN git clone https://github.com/google-research/t5x.git /t5x && \
70
+ cd /t5x && \
71
+ git checkout 92c5b467a5964d06c351c7eae4aa4bcd341c7ded && \
72
+ uv pip install --system --no-deps -e .
73
+
74
+ # Install flaxformer without deps
75
+ RUN git clone https://github.com/google/flaxformer.git /flaxformer && \
76
+ cd /flaxformer && \
77
+ git checkout 399ea3a && \
78
+ uv pip install --system --no-deps -e .
79
+
80
+ # Install seqio without deps and PATCH OUT tensorflow_text
81
+ RUN git clone https://github.com/google/seqio.git /seqio && \
82
+ cd /seqio && \
83
+ uv pip install --system --no-deps -e . && \
84
+ # CRITICAL FIX: Remove unused tensorflow_text import (not needed by Magenta RT)
85
+ sed -i '/import tensorflow_text as tf_text/d' /seqio/seqio/vocabularies.py
86
+
87
+ # Install airio (t5x dependency) without deps
88
+ RUN git clone https://github.com/google/airio.git /airio && \
89
+ cd /airio && \
90
+ uv pip install --system --no-deps -e .
91
+
92
+ # Install clu without deps
93
+ RUN git clone https://github.com/google/CommonLoopUtils.git /clu && \
94
+ cd /clu && \
95
+ uv pip install --system --no-deps -e .
96
+
97
+ # Now install all remaining dependencies these packages need
98
+ RUN uv pip install --system \
99
+ flax optax orbax-checkpoint \
100
+ fiddle cached_property tf2jax \
101
+ aqtp etils jestimator \
102
+ tensorflow-datasets tfds-nightly \
103
+ apache-beam pyyaml rouge-score sacrebleu scipy \
104
+ grain-nightly editdistance pyglove
105
+
106
+ # Patch jestimator for newer JAX - PartitionSpec moved to jax.sharding
107
+ RUN sed -i 's|from jax.experimental.pjit import PartitionSpec|from jax.sharding import PartitionSpec|g' \
108
+ /usr/local/lib/python3.11/dist-packages/jestimator/amos_helper.py || \
109
+ sed -i 's|from jax.experimental.pjit import PartitionSpec|from jax.sharding import PartitionSpec|g' \
110
+ /usr/lib/python3.11/dist-packages/jestimator/amos_helper.py || true
111
+
112
+ # Install magenta-realtime without deps
113
+ RUN git clone https://github.com/magenta/magenta-realtime.git /magenta-realtime-src && \
114
+ cd /magenta-realtime-src && \
115
+ uv pip install --system --no-deps -e .
116
+
117
+ # API and audio processing dependencies
118
+ RUN uv pip install --system \
119
+ fastapi uvicorn[standard] python-multipart \
120
+ pyloudnorm gradio soxr huggingface_hub
121
+
122
+ # Ensure compatible protobuf version
123
+ RUN uv pip install --system --force-reinstall "protobuf>=5.27.0"
124
+
125
+ # Create HuggingFace Space user
 
 
 
126
  RUN useradd -m -u 1000 appuser
127
  WORKDIR /home/appuser/app
128
 
129
+ # Set cache directory
130
+ ENV MAGENTA_RT_CACHE_DIR=/home/appuser/.cache/magenta_rt
131
+ RUN mkdir -p $MAGENTA_RT_CACHE_DIR && chown -R appuser:appuser /home/appuser/.cache
132
+
133
+ # Copy application files with proper ownership
134
+ COPY --chown=appuser:appuser app.py /home/appuser/app/
135
+ COPY --chown=appuser:appuser utils.py /home/appuser/app/
136
+ COPY --chown=appuser:appuser jam_worker.py /home/appuser/app/
137
+ COPY --chown=appuser:appuser one_shot_generation.py /home/appuser/app/
138
+ COPY --chown=appuser:appuser model_management.py /home/appuser/app/
139
+ COPY --chown=appuser:appuser documentation.html /home/appuser/app/
140
+ COPY --chown=appuser:appuser lil_demo_540p.mp4 /home/appuser/app/
141
+ COPY --chown=appuser:appuser magentaRT_rt_tester.html /home/appuser/app/
142
+ COPY --chown=appuser:appuser magenta_prompts.js /home/appuser/app/
 
 
 
 
 
 
143
  COPY --chown=appuser:appuser docs/ /home/appuser/app/docs/
144
 
145
  USER appuser