Agnostic cloud gpu docker image and Jupyter lab (#1097)
Browse files
.github/workflows/main.yml
CHANGED
@@ -113,7 +113,7 @@ jobs:
|
|
113 |
id: metadata
|
114 |
uses: docker/metadata-action@v5
|
115 |
with:
|
116 |
-
images: winglian/axolotl-
|
117 |
- name: Login to Docker Hub
|
118 |
uses: docker/login-action@v3
|
119 |
with:
|
@@ -128,9 +128,11 @@ jobs:
|
|
128 |
build-args: |
|
129 |
BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
130 |
CUDA=${{ matrix.cuda }}
|
131 |
-
file: ./docker/Dockerfile-
|
132 |
push: ${{ github.event_name != 'pull_request' }}
|
133 |
tags: |
|
134 |
${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
|
|
135 |
${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
|
|
|
136 |
labels: ${{ steps.metadata.outputs.labels }}
|
|
|
113 |
id: metadata
|
114 |
uses: docker/metadata-action@v5
|
115 |
with:
|
116 |
+
images: winglian/axolotl-cloud
|
117 |
- name: Login to Docker Hub
|
118 |
uses: docker/login-action@v3
|
119 |
with:
|
|
|
128 |
build-args: |
|
129 |
BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
130 |
CUDA=${{ matrix.cuda }}
|
131 |
+
file: ./docker/Dockerfile-cloud
|
132 |
push: ${{ github.event_name != 'pull_request' }}
|
133 |
tags: |
|
134 |
${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
135 |
+
winglian/axolotl-runpod:main-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
136 |
${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
|
137 |
+
${{ (matrix.is_latest) && format('{0}-latest', 'winglian/axolotl-runpod:main') || '' }}
|
138 |
labels: ${{ steps.metadata.outputs.labels }}
|
README.md
CHANGED
@@ -25,7 +25,7 @@ Features:
|
|
25 |
- [Installation](#installation)
|
26 |
- [Docker](#docker)
|
27 |
- [Conda/Pip venv](#condapip-venv)
|
28 |
-
- [
|
29 |
- [LambdaLabs](#lambdalabs)
|
30 |
- [Windows](#windows)
|
31 |
- [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
|
@@ -172,9 +172,11 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
|
|
172 |
```
|
173 |
Get the token at huggingface.co/settings/tokens
|
174 |
|
175 |
-
####
|
176 |
|
177 |
-
|
|
|
|
|
178 |
|
179 |
#### LambdaLabs
|
180 |
<details>
|
|
|
25 |
- [Installation](#installation)
|
26 |
- [Docker](#docker)
|
27 |
- [Conda/Pip venv](#condapip-venv)
|
28 |
+
- [Cloud GPU](#cloud-gpu) - Runpod, Latitude
|
29 |
- [LambdaLabs](#lambdalabs)
|
30 |
- [Windows](#windows)
|
31 |
- [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
|
|
|
172 |
```
|
173 |
Get the token at huggingface.co/settings/tokens
|
174 |
|
175 |
+
#### Cloud GPU
|
176 |
|
177 |
+
For cloud GPU providers that support docker images, use [`winglian/axolotl-cloud:main-latest`](https://hub.docker.com/r/winglian/axolotl-cloud/tags)
|
178 |
+
|
179 |
+
- on RunPod use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
|
180 |
|
181 |
#### LambdaLabs
|
182 |
<details>
|
docker/{Dockerfile-runpod β Dockerfile-cloud}
RENAMED
@@ -7,14 +7,16 @@ ENV TRANSFORMERS_CACHE="/workspace/data/huggingface-cache/hub"
|
|
7 |
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
|
8 |
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
|
9 |
|
10 |
-
COPY scripts/
|
11 |
|
|
|
|
|
12 |
RUN apt install --yes --no-install-recommends openssh-server tmux && \
|
13 |
mkdir -p ~/.ssh && \
|
14 |
chmod 700 ~/.ssh && \
|
15 |
printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
|
16 |
-
chmod +x /workspace/axolotl/scripts/
|
17 |
-
chmod +x /root/
|
18 |
|
19 |
-
ENTRYPOINT ["/root/
|
20 |
CMD ["sleep", "infinity"]
|
|
|
7 |
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
|
8 |
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
|
9 |
|
10 |
+
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
11 |
|
12 |
+
RUN pip install jupyterlab notebook && \
|
13 |
+
jupyter lab clean
|
14 |
RUN apt install --yes --no-install-recommends openssh-server tmux && \
|
15 |
mkdir -p ~/.ssh && \
|
16 |
chmod 700 ~/.ssh && \
|
17 |
printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
|
18 |
+
chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
|
19 |
+
chmod +x /root/cloud-entrypoint.sh
|
20 |
|
21 |
+
ENTRYPOINT ["/root/cloud-entrypoint.sh"]
|
22 |
CMD ["sleep", "infinity"]
|
scripts/{runpod-entrypoint.sh β cloud-entrypoint.sh}
RENAMED
@@ -17,5 +17,16 @@ else
|
|
17 |
echo "No PUBLIC_KEY ENV variable provided, not starting openSSH daemon"
|
18 |
fi
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
# Execute the passed arguments (CMD)
|
21 |
exec "$@"
|
|
|
17 |
echo "No PUBLIC_KEY ENV variable provided, not starting openSSH daemon"
|
18 |
fi
|
19 |
|
20 |
+
# Check if JUPYTER_PASSWORD is set and not empty
|
21 |
+
if [ -n "$JUPYTER_PASSWORD" ]; then
|
22 |
+
# Set JUPYTER_TOKEN to the value of JUPYTER_PASSWORD
|
23 |
+
export JUPYTER_TOKEN="$JUPYTER_PASSWORD"
|
24 |
+
fi
|
25 |
+
|
26 |
+
if [ "$JUPYTER_DISABLE" != "1" ]; then
|
27 |
+
# Run Jupyter Lab in the background
|
28 |
+
jupyter lab --allow-root --ip 0.0.0.0 &
|
29 |
+
fi
|
30 |
+
|
31 |
# Execute the passed arguments (CMD)
|
32 |
exec "$@"
|