Spaces:
Running
Running
update to py3.12, use uv and add hf-mount
Browse files- Dockerfile +7 -71
- README.md +1 -1
- data/hf-mount.ipynb +49 -0
- data/spark.ipynb +3 -0
- on_startup.sh +4 -0
Dockerfile
CHANGED
|
@@ -1,69 +1,11 @@
|
|
| 1 |
-
FROM
|
| 2 |
-
|
| 3 |
-
ENV DEBIAN_FRONTEND=noninteractive \
|
| 4 |
-
TZ=Europe/Paris
|
| 5 |
-
|
| 6 |
-
# Remove any third-party apt sources to avoid issues with expiring keys.
|
| 7 |
-
# Install some basic utilities
|
| 8 |
-
RUN rm -f /etc/apt/sources.list.d/*.list && \
|
| 9 |
-
apt-get update && apt-get install -y --no-install-recommends \
|
| 10 |
-
curl \
|
| 11 |
-
ca-certificates \
|
| 12 |
-
sudo \
|
| 13 |
-
git \
|
| 14 |
-
wget \
|
| 15 |
-
procps \
|
| 16 |
-
git-lfs \
|
| 17 |
-
zip \
|
| 18 |
-
unzip \
|
| 19 |
-
htop \
|
| 20 |
-
vim \
|
| 21 |
-
nano \
|
| 22 |
-
bzip2 \
|
| 23 |
-
libx11-6 \
|
| 24 |
-
build-essential \
|
| 25 |
-
libsndfile-dev \
|
| 26 |
-
software-properties-common \
|
| 27 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 28 |
-
|
| 29 |
-
RUN add-apt-repository ppa:flexiondotorg/nvtop && \
|
| 30 |
-
apt-get upgrade -y && \
|
| 31 |
-
apt-get install -y --no-install-recommends nvtop
|
| 32 |
-
|
| 33 |
-
RUN curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
|
| 34 |
-
apt-get install -y nodejs && \
|
| 35 |
-
npm install -g configurable-http-proxy
|
| 36 |
|
| 37 |
# Create a working directory
|
| 38 |
WORKDIR /app
|
| 39 |
|
| 40 |
-
# Create a non-root user and switch to it
|
| 41 |
-
RUN adduser --disabled-password --gecos '' --shell /bin/bash user \
|
| 42 |
-
&& chown -R user:user /app
|
| 43 |
-
RUN echo "user ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-user
|
| 44 |
-
USER user
|
| 45 |
-
|
| 46 |
-
# All users can use /home/user as their home directory
|
| 47 |
ENV HOME=/home/user
|
| 48 |
-
RUN chmod -R 777 $HOME
|
| 49 |
-
|
| 50 |
-
# Set up the Conda environment
|
| 51 |
-
ENV CONDA_AUTO_UPDATE_CONDA=false \
|
| 52 |
-
PATH=$HOME/miniconda/bin:$PATH
|
| 53 |
-
RUN curl -sLo ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-py39_4.10.3-Linux-x86_64.sh \
|
| 54 |
-
&& chmod +x ~/miniconda.sh \
|
| 55 |
-
&& ~/miniconda.sh -b -p ~/miniconda \
|
| 56 |
-
&& rm ~/miniconda.sh \
|
| 57 |
-
&& conda clean -ya
|
| 58 |
-
|
| 59 |
WORKDIR $HOME/app
|
| 60 |
|
| 61 |
-
#######################################
|
| 62 |
-
# Start root user section
|
| 63 |
-
#######################################
|
| 64 |
-
|
| 65 |
-
USER root
|
| 66 |
-
|
| 67 |
# User Debian packages
|
| 68 |
## Security warning : Potential user code executed as root (build time)
|
| 69 |
RUN --mount=target=/root/packages.txt,source=packages.txt \
|
|
@@ -74,17 +16,11 @@ RUN --mount=target=/root/packages.txt,source=packages.txt \
|
|
| 74 |
RUN --mount=target=/root/on_startup.sh,source=on_startup.sh,readwrite \
|
| 75 |
bash /root/on_startup.sh
|
| 76 |
|
| 77 |
-
RUN mkdir /data
|
| 78 |
-
|
| 79 |
-
#######################################
|
| 80 |
-
# End root user section
|
| 81 |
-
#######################################
|
| 82 |
-
|
| 83 |
-
USER user
|
| 84 |
|
| 85 |
# Jupyterlab
|
| 86 |
-
RUN pip install jupyterlab==3.6.1 jupyter-server==2.3.0 tornado==6.2 ipywidgets plotly
|
| 87 |
-
RUN pip install catppuccin-jupyterlab
|
| 88 |
RUN mkdir -p $HOME/.jupyter/lab/user-settings/@jupyterlab/apputils-extension
|
| 89 |
RUN mkdir -p $HOME/.jupyter/lab/user-settings/catppuccin_jupyterlab
|
| 90 |
RUN echo '{"theme": "Catppuccin Frappé"}' > $HOME/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings
|
|
@@ -92,14 +28,14 @@ RUN echo '{"brandColor": "peach", "accentColor": "green"}' > $HOME/.jupyter/lab/
|
|
| 92 |
|
| 93 |
# User Python packages
|
| 94 |
RUN --mount=target=requirements.txt,source=requirements.txt \
|
| 95 |
-
pip install --no-cache-dir --upgrade -r requirements.txt
|
| 96 |
|
| 97 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
| 98 |
-
COPY
|
| 99 |
|
| 100 |
RUN chmod +x start_server.sh
|
| 101 |
|
| 102 |
-
COPY
|
| 103 |
|
| 104 |
ENV PYTHONUNBUFFERED=1 \
|
| 105 |
GRADIO_ALLOW_FLAGGING=never \
|
|
|
|
| 1 |
+
FROM astral/uv:python3.12-bookworm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
# Create a working directory
|
| 4 |
WORKDIR /app
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
ENV HOME=/home/user
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
WORKDIR $HOME/app
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
# User Debian packages
|
| 10 |
## Security warning : Potential user code executed as root (build time)
|
| 11 |
RUN --mount=target=/root/packages.txt,source=packages.txt \
|
|
|
|
| 16 |
RUN --mount=target=/root/on_startup.sh,source=on_startup.sh,readwrite \
|
| 17 |
bash /root/on_startup.sh
|
| 18 |
|
| 19 |
+
RUN mkdir /data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# Jupyterlab
|
| 22 |
+
RUN uv pip install --system jupyterlab==3.6.1 jupyter-server==2.3.0 tornado==6.2 ipywidgets plotly
|
| 23 |
+
RUN uv pip install --system catppuccin-jupyterlab
|
| 24 |
RUN mkdir -p $HOME/.jupyter/lab/user-settings/@jupyterlab/apputils-extension
|
| 25 |
RUN mkdir -p $HOME/.jupyter/lab/user-settings/catppuccin_jupyterlab
|
| 26 |
RUN echo '{"theme": "Catppuccin Frappé"}' > $HOME/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings
|
|
|
|
| 28 |
|
| 29 |
# User Python packages
|
| 30 |
RUN --mount=target=requirements.txt,source=requirements.txt \
|
| 31 |
+
uv pip install --system --no-cache-dir --upgrade -r requirements.txt
|
| 32 |
|
| 33 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
| 34 |
+
COPY . $HOME/app
|
| 35 |
|
| 36 |
RUN chmod +x start_server.sh
|
| 37 |
|
| 38 |
+
COPY login.html /usr/local/lib/python3.12/site-packages/jupyter_server/templates/login.html
|
| 39 |
|
| 40 |
ENV PYTHONUNBUFFERED=1 \
|
| 41 |
GRADIO_ALLOW_FLAGGING=never \
|
README.md
CHANGED
|
@@ -10,7 +10,7 @@ tags:
|
|
| 10 |
- jupyterlab
|
| 11 |
- spark
|
| 12 |
- datasets
|
| 13 |
-
|
| 14 |
---
|
| 15 |
|
| 16 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 10 |
- jupyterlab
|
| 11 |
- spark
|
| 12 |
- datasets
|
| 13 |
+
rootless: true
|
| 14 |
---
|
| 15 |
|
| 16 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
data/hf-mount.ipynb
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "1d612ca3",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"# Mount Hugging Face Buckets and repos as local filesystems\n",
|
| 9 |
+
"\n",
|
| 10 |
+
"<img width=\"640\" alt=\"image\" src=\"https://github.com/user-attachments/assets/d68eac8c-4e28-4d2d-93b2-b049da846397\" />\n",
|
| 11 |
+
"\n",
|
| 12 |
+
"Mount Hugging Face Buckets and repos as local filesystems using [hf-mount](https://github.com/huggingface/hf-mount).\n",
|
| 13 |
+
"\n",
|
| 14 |
+
"No download, no copy, no waiting."
|
| 15 |
+
]
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"cell_type": "code",
|
| 19 |
+
"execution_count": null,
|
| 20 |
+
"id": "196bc954",
|
| 21 |
+
"metadata": {},
|
| 22 |
+
"outputs": [],
|
| 23 |
+
"source": [
|
| 24 |
+
"!hf-mount start bucket myuser/my-bucket /data/my-bucket"
|
| 25 |
+
]
|
| 26 |
+
}
|
| 27 |
+
],
|
| 28 |
+
"metadata": {
|
| 29 |
+
"kernelspec": {
|
| 30 |
+
"display_name": "Python 3 (ipykernel)",
|
| 31 |
+
"language": "python",
|
| 32 |
+
"name": "python3"
|
| 33 |
+
},
|
| 34 |
+
"language_info": {
|
| 35 |
+
"codemirror_mode": {
|
| 36 |
+
"name": "ipython",
|
| 37 |
+
"version": 3
|
| 38 |
+
},
|
| 39 |
+
"file_extension": ".py",
|
| 40 |
+
"mimetype": "text/x-python",
|
| 41 |
+
"name": "python",
|
| 42 |
+
"nbconvert_exporter": "python",
|
| 43 |
+
"pygments_lexer": "ipython3",
|
| 44 |
+
"version": "3.12.12"
|
| 45 |
+
}
|
| 46 |
+
},
|
| 47 |
+
"nbformat": 4,
|
| 48 |
+
"nbformat_minor": 5
|
| 49 |
+
}
|
data/spark.ipynb
CHANGED
|
@@ -30,7 +30,10 @@
|
|
| 30 |
"metadata": {},
|
| 31 |
"outputs": [],
|
| 32 |
"source": [
|
|
|
|
| 33 |
"df = spark.read.format(\"huggingface\").load(\"fka/awesome-chatgpt-prompts\")\n",
|
|
|
|
|
|
|
| 34 |
"df.show()"
|
| 35 |
]
|
| 36 |
}
|
|
|
|
| 30 |
"metadata": {},
|
| 31 |
"outputs": [],
|
| 32 |
"source": [
|
| 33 |
+
"# Load a dataset\n",
|
| 34 |
"df = spark.read.format(\"huggingface\").load(\"fka/awesome-chatgpt-prompts\")\n",
|
| 35 |
+
"# Or load data from a storage bucket:\n",
|
| 36 |
+
"# df = spark.read.format(\"huggingface\").option(\"data_dir\", \"OpenOrca\").load(\"buckets/lhoestq/datasets\")\n",
|
| 37 |
"df.show()"
|
| 38 |
]
|
| 39 |
}
|
on_startup.sh
CHANGED
|
@@ -3,3 +3,7 @@
|
|
| 3 |
# For example, to clone transformers and install it in dev mode:
|
| 4 |
# git clone https://github.com/huggingface/transformers.git
|
| 5 |
# cd transformers && pip install -e ".[dev]"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
# For example, to clone transformers and install it in dev mode:
|
| 4 |
# git clone https://github.com/huggingface/transformers.git
|
| 5 |
# cd transformers && pip install -e ".[dev]"
|
| 6 |
+
|
| 7 |
+
# Install hf-mount
|
| 8 |
+
INSTALL_DIR=/usr/local/bin
|
| 9 |
+
curl -fsSL https://raw.githubusercontent.com/huggingface/hf-mount/main/install.sh | sh
|