lhoestq HF Staff commited on
Commit
64d005b
·
1 Parent(s): c79c8e2

update to py3.12, use uv and add hf-mount

Browse files
Files changed (5) hide show
  1. Dockerfile +7 -71
  2. README.md +1 -1
  3. data/hf-mount.ipynb +49 -0
  4. data/spark.ipynb +3 -0
  5. on_startup.sh +4 -0
Dockerfile CHANGED
@@ -1,69 +1,11 @@
1
- FROM nvidia/cuda:11.3.1-base-ubuntu20.04
2
-
3
- ENV DEBIAN_FRONTEND=noninteractive \
4
- TZ=Europe/Paris
5
-
6
- # Remove any third-party apt sources to avoid issues with expiring keys.
7
- # Install some basic utilities
8
- RUN rm -f /etc/apt/sources.list.d/*.list && \
9
- apt-get update && apt-get install -y --no-install-recommends \
10
- curl \
11
- ca-certificates \
12
- sudo \
13
- git \
14
- wget \
15
- procps \
16
- git-lfs \
17
- zip \
18
- unzip \
19
- htop \
20
- vim \
21
- nano \
22
- bzip2 \
23
- libx11-6 \
24
- build-essential \
25
- libsndfile-dev \
26
- software-properties-common \
27
- && rm -rf /var/lib/apt/lists/*
28
-
29
- RUN add-apt-repository ppa:flexiondotorg/nvtop && \
30
- apt-get upgrade -y && \
31
- apt-get install -y --no-install-recommends nvtop
32
-
33
- RUN curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
34
- apt-get install -y nodejs && \
35
- npm install -g configurable-http-proxy
36
 
37
  # Create a working directory
38
  WORKDIR /app
39
 
40
- # Create a non-root user and switch to it
41
- RUN adduser --disabled-password --gecos '' --shell /bin/bash user \
42
- && chown -R user:user /app
43
- RUN echo "user ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-user
44
- USER user
45
-
46
- # All users can use /home/user as their home directory
47
  ENV HOME=/home/user
48
- RUN chmod -R 777 $HOME
49
-
50
- # Set up the Conda environment
51
- ENV CONDA_AUTO_UPDATE_CONDA=false \
52
- PATH=$HOME/miniconda/bin:$PATH
53
- RUN curl -sLo ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-py39_4.10.3-Linux-x86_64.sh \
54
- && chmod +x ~/miniconda.sh \
55
- && ~/miniconda.sh -b -p ~/miniconda \
56
- && rm ~/miniconda.sh \
57
- && conda clean -ya
58
-
59
  WORKDIR $HOME/app
60
 
61
- #######################################
62
- # Start root user section
63
- #######################################
64
-
65
- USER root
66
-
67
  # User Debian packages
68
  ## Security warning : Potential user code executed as root (build time)
69
  RUN --mount=target=/root/packages.txt,source=packages.txt \
@@ -74,17 +16,11 @@ RUN --mount=target=/root/packages.txt,source=packages.txt \
74
  RUN --mount=target=/root/on_startup.sh,source=on_startup.sh,readwrite \
75
  bash /root/on_startup.sh
76
 
77
- RUN mkdir /data && chown user:user /data
78
-
79
- #######################################
80
- # End root user section
81
- #######################################
82
-
83
- USER user
84
 
85
  # Jupyterlab
86
- RUN pip install jupyterlab==3.6.1 jupyter-server==2.3.0 tornado==6.2 ipywidgets plotly
87
- RUN pip install catppuccin-jupyterlab
88
  RUN mkdir -p $HOME/.jupyter/lab/user-settings/@jupyterlab/apputils-extension
89
  RUN mkdir -p $HOME/.jupyter/lab/user-settings/catppuccin_jupyterlab
90
  RUN echo '{"theme": "Catppuccin Frappé"}' > $HOME/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings
@@ -92,14 +28,14 @@ RUN echo '{"brandColor": "peach", "accentColor": "green"}' > $HOME/.jupyter/lab/
92
 
93
  # User Python packages
94
  RUN --mount=target=requirements.txt,source=requirements.txt \
95
- pip install --no-cache-dir --upgrade -r requirements.txt
96
 
97
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
98
- COPY --chown=user . $HOME/app
99
 
100
  RUN chmod +x start_server.sh
101
 
102
- COPY --chown=user login.html /home/user/miniconda/lib/python3.9/site-packages/jupyter_server/templates/login.html
103
 
104
  ENV PYTHONUNBUFFERED=1 \
105
  GRADIO_ALLOW_FLAGGING=never \
 
1
+ FROM astral/uv:python3.12-bookworm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  # Create a working directory
4
  WORKDIR /app
5
 
 
 
 
 
 
 
 
6
  ENV HOME=/home/user
 
 
 
 
 
 
 
 
 
 
 
7
  WORKDIR $HOME/app
8
 
 
 
 
 
 
 
9
  # User Debian packages
10
  ## Security warning : Potential user code executed as root (build time)
11
  RUN --mount=target=/root/packages.txt,source=packages.txt \
 
16
  RUN --mount=target=/root/on_startup.sh,source=on_startup.sh,readwrite \
17
  bash /root/on_startup.sh
18
 
19
+ RUN mkdir /data
 
 
 
 
 
 
20
 
21
  # Jupyterlab
22
+ RUN uv pip install --system jupyterlab==3.6.1 jupyter-server==2.3.0 tornado==6.2 ipywidgets plotly
23
+ RUN uv pip install --system catppuccin-jupyterlab
24
  RUN mkdir -p $HOME/.jupyter/lab/user-settings/@jupyterlab/apputils-extension
25
  RUN mkdir -p $HOME/.jupyter/lab/user-settings/catppuccin_jupyterlab
26
  RUN echo '{"theme": "Catppuccin Frappé"}' > $HOME/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings
 
28
 
29
  # User Python packages
30
  RUN --mount=target=requirements.txt,source=requirements.txt \
31
+ uv pip install --system --no-cache-dir --upgrade -r requirements.txt
32
 
33
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
34
+ COPY . $HOME/app
35
 
36
  RUN chmod +x start_server.sh
37
 
38
+ COPY login.html /usr/local/lib/python3.12/site-packages/jupyter_server/templates/login.html
39
 
40
  ENV PYTHONUNBUFFERED=1 \
41
  GRADIO_ALLOW_FLAGGING=never \
README.md CHANGED
@@ -10,7 +10,7 @@ tags:
10
  - jupyterlab
11
  - spark
12
  - datasets
13
- suggested_storage: small
14
  ---
15
 
16
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
10
  - jupyterlab
11
  - spark
12
  - datasets
13
+ rootless: true
14
  ---
15
 
16
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
data/hf-mount.ipynb ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "1d612ca3",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Mount Hugging Face Buckets and repos as local filesystems\n",
9
+ "\n",
10
+ "<img width=\"640\" alt=\"image\" src=\"https://github.com/user-attachments/assets/d68eac8c-4e28-4d2d-93b2-b049da846397\" />\n",
11
+ "\n",
12
+ "Mount Hugging Face Buckets and repos as local filesystems using [hf-mount](https://github.com/huggingface/hf-mount).\n",
13
+ "\n",
14
+ "No download, no copy, no waiting."
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": null,
20
+ "id": "196bc954",
21
+ "metadata": {},
22
+ "outputs": [],
23
+ "source": [
24
+ "!hf-mount start bucket myuser/my-bucket /data/my-bucket"
25
+ ]
26
+ }
27
+ ],
28
+ "metadata": {
29
+ "kernelspec": {
30
+ "display_name": "Python 3 (ipykernel)",
31
+ "language": "python",
32
+ "name": "python3"
33
+ },
34
+ "language_info": {
35
+ "codemirror_mode": {
36
+ "name": "ipython",
37
+ "version": 3
38
+ },
39
+ "file_extension": ".py",
40
+ "mimetype": "text/x-python",
41
+ "name": "python",
42
+ "nbconvert_exporter": "python",
43
+ "pygments_lexer": "ipython3",
44
+ "version": "3.12.12"
45
+ }
46
+ },
47
+ "nbformat": 4,
48
+ "nbformat_minor": 5
49
+ }
data/spark.ipynb CHANGED
@@ -30,7 +30,10 @@
30
  "metadata": {},
31
  "outputs": [],
32
  "source": [
 
33
  "df = spark.read.format(\"huggingface\").load(\"fka/awesome-chatgpt-prompts\")\n",
 
 
34
  "df.show()"
35
  ]
36
  }
 
30
  "metadata": {},
31
  "outputs": [],
32
  "source": [
33
+ "# Load a dataset\n",
34
  "df = spark.read.format(\"huggingface\").load(\"fka/awesome-chatgpt-prompts\")\n",
35
+ "# Or load data from a storage bucket:\n",
36
+ "# df = spark.read.format(\"huggingface\").option(\"data_dir\", \"OpenOrca\").load(\"buckets/lhoestq/datasets\")\n",
37
  "df.show()"
38
  ]
39
  }
on_startup.sh CHANGED
@@ -3,3 +3,7 @@
3
  # For example, to clone transformers and install it in dev mode:
4
  # git clone https://github.com/huggingface/transformers.git
5
  # cd transformers && pip install -e ".[dev]"
 
 
 
 
 
3
  # For example, to clone transformers and install it in dev mode:
4
  # git clone https://github.com/huggingface/transformers.git
5
  # cd transformers && pip install -e ".[dev]"
6
+
7
+ # Install hf-mount
8
+ INSTALL_DIR=/usr/local/bin
9
+ curl -fsSL https://raw.githubusercontent.com/huggingface/hf-mount/main/install.sh | sh