meg-huggingface commited on
Commit
2063a08
·
1 Parent(s): 81e41f4

Adding code to move the output files & model

Browse files
Files changed (3) hide show
  1. Dockerfile +40 -59
  2. train.sh +3 -0
  3. upload_results.py +48 -0
Dockerfile CHANGED
@@ -1,58 +1,68 @@
1
  # Documentation: https://huggingface.co/docs/hub/spaces-sdks-docker
2
-
3
  FROM python:3.9
4
 
5
-
6
-
7
  RUN useradd -m -u 1000 user
8
 
9
-
10
  COPY --chown=user ./requirements.txt /requirements.txt
11
  COPY --chown=user ./train.sh /train.sh
 
12
  RUN chmod +x /train.sh
 
 
13
 
 
14
  RUN mkdir /app
15
 
 
 
16
  RUN pip install --no-cache-dir --upgrade -r /requirements.txt
17
- RUN git clone https://github.com/lee-ny/teaching_arithmetic.git /app/teaching_arithmetic
 
18
  #&& cd teaching_arithmetic && pip install -e .
19
 
 
20
  COPY --chown=user . /app
 
 
21
  RUN mkdir /app/teaching_arithmetic/out
22
  RUN chmod -R 777 /app/
23
 
24
- #&& cd teaching_arithmetic && pip install -e .
25
- #COPY --chown=user /teaching_arithmetic /app/teaching_arithmetic
26
-
27
- #RUN chmod +x /teaching_arithmetic
28
-
29
- #COPY --chown=user . /app
30
-
31
-
32
  USER user
33
 
 
34
  WORKDIR /app
35
 
36
- ENV PATH="/home/user/.local/bin:/opt/conda/bin:$PATH"
37
- ENV HOME="/home/user"
38
-
39
- #WORKDIR $HOME/app
40
 
 
 
 
41
 
42
- #RUN pip install --no-cache-dir --upgrade pip
43
 
44
- ARG PYTORCH_VERSION=2.1.0
45
- ARG PYTHON_VERSION=3.9 #8.10
46
- ARG CUDA_VERSION=11.8
47
- ARG CU_DNN=8.5.0.96
48
- ARG MAMBA_VERSION=24.3.0-0
49
- ARG CUDA_CHANNEL=nvidia
50
- ARG INSTALL_CHANNEL=pytorch
51
- # Automatically set by buildx
52
- ARG TARGETPLATFORM
53
 
54
 
55
- # Update basic dependencies we'll be using.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  #RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
57
  # build-essential \
58
  # ca-certificates \
@@ -62,11 +72,7 @@ ARG TARGETPLATFORM
62
  # python3-pip \
63
  # git && \
64
  # rm -rf /var/lib/apt/lists/*
65
-
66
-
67
- #RUN teaching_arithmetic && pip install -e .
68
- # Install conda
69
- # translating Docker's TARGETPLATFORM into mamba arches
70
  #RUN case ${TARGETPLATFORM} in \
71
  # "linux/arm64") MAMBA_ARCH=aarch64 ;; \
72
  # *) MAMBA_ARCH=x86_64 ;; \
@@ -75,8 +81,7 @@ ARG TARGETPLATFORM
75
  #RUN chmod +x ~/mambaforge.sh && \
76
  # bash ~/mambaforge.sh -b -p /opt/conda && \
77
  # rm ~/mambaforge.sh
78
-
79
- # Install pytorch
80
  # On arm64 we exit with an error code
81
  #RUN case ${TARGETPLATFORM} in \
82
  # "linux/arm64") exit 1 ;; \
@@ -84,31 +89,7 @@ ARG TARGETPLATFORM
84
  # /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \
85
  # esac && \
86
  # /opt/conda/bin/conda clean -ya
87
-
88
- #USER 1000
89
- #COPY --chown=user ./requirements.txt requirements.txt
90
- #COPY ./requirements.txt requirements.txt
91
- #RUN pip install --no-cache-dir --upgrade -r requirements.txt
92
-
93
- #COPY --chown=user teaching_arithmetic /teaching_arithmetic
94
- #COPY . /app
95
- #COPY ./out /out
96
- #RUN chmod 777 -R /out
97
-
98
- #RUN chmod +x /out
99
- #COPY ./out/addition_train /out/
100
- #RUN chmod +x /teaching_arithmetic/train.py
101
- #COPY ./train.sh /train.sh
102
- #RUN chmod +x /train.sh
103
-
104
  # Expose the secret DEBUG at buildtime and use its value as git remote URL
105
  #RUN --mount=type=secret,id=DEBUG,mode=0444,required=true \
106
  # git init && \
107
  # git remote add origin $(cat /run/secrets/DEBUG)
108
-
109
-
110
- # USER 1000
111
- RUN chmod +x train.sh
112
-
113
- #CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
114
- ENTRYPOINT ["/train.sh"]
 
1
  # Documentation: https://huggingface.co/docs/hub/spaces-sdks-docker
 
2
  FROM python:3.9
3
 
4
+ # Create a non-root user and allow them to have appropriate permissions
5
+ # on our Space content
6
  RUN useradd -m -u 1000 user
7
 
 
8
  COPY --chown=user ./requirements.txt /requirements.txt
9
  COPY --chown=user ./train.sh /train.sh
10
+ COPY --chown=user ./upload_results.py /upload_results.py
11
  RUN chmod +x /train.sh
12
+ # May not need to do this. Just tired of permissions errors and going wild.
13
+ RUN chmod +x /upload_results.py
14
 
15
+ # Make the working directory for user.
16
  RUN mkdir /app
17
 
18
+ # Start installing stuff as root so it doesn't complain about install permissions.
19
+ #RUN pip install --no-cache-dir --upgrade pip
20
  RUN pip install --no-cache-dir --upgrade -r /requirements.txt
21
+ # Clone into the working directory for the user.
22
+ RUN git clone https://github.com/lee-ny/teaching_arithmetic.git /app/teaching_arithmetic
23
  #&& cd teaching_arithmetic && pip install -e .
24
 
25
+ # Copy all files we have into the user's working directory.
26
  COPY --chown=user . /app
27
+ # Kept getting permission denied errors when running train.py, which tries to
28
+ # create the out directory. Just doing this to try to help that.
29
  RUN mkdir /app/teaching_arithmetic/out
30
  RUN chmod -R 777 /app/
31
 
32
+ # Switch to the user profile.
33
+ # This will help make sure the permissions of the cloned git stuff
34
+ # don't require root privileges (I am guessing).
 
 
 
 
 
35
  USER user
36
 
37
+ # Switch to the /app working directory.
38
  WORKDIR /app
39
 
40
+ # Permissions. Permissions. Already did this. Doing it again anyway.
41
+ RUN chmod +x train.sh
42
+ RUN chmod +x upload_results.py
 
43
 
44
+ # Could also use CMD. Example:
45
+ # CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
46
+ ENTRYPOINT ["/train.sh"]
47
 
 
48
 
 
 
 
 
 
 
 
 
 
49
 
50
 
51
+ # Keeping these as FYI, commented out, as they are other things we could do.
52
+ #ENV PATH="/home/user/.local/bin:/opt/conda/bin:$PATH"
53
+ #ENV HOME="/home/user"
54
+ #WORKDIR $HOME/app
55
+ # We now install with requirements.txt
56
+ #ARG PYTORCH_VERSION=2.1.0
57
+ #ARG PYTHON_VERSION=3.9 #8.10
58
+ #ARG CUDA_VERSION=11.8
59
+ #ARG CU_DNN=8.5.0.96
60
+ #ARG MAMBA_VERSION=24.3.0-0
61
+ #ARG CUDA_CHANNEL=nvidia
62
+ #ARG INSTALL_CHANNEL=pytorch
63
+ # Automatically set by buildx
64
+ #ARG TARGETPLATFORM
65
+ # Updating basic dependencies we'll be using.
66
  #RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
67
  # build-essential \
68
  # ca-certificates \
 
72
  # python3-pip \
73
  # git && \
74
  # rm -rf /var/lib/apt/lists/*
75
+ # Installing conda, translating Docker's TARGETPLATFORM into mamba arches
 
 
 
 
76
  #RUN case ${TARGETPLATFORM} in \
77
  # "linux/arm64") MAMBA_ARCH=aarch64 ;; \
78
  # *) MAMBA_ARCH=x86_64 ;; \
 
81
  #RUN chmod +x ~/mambaforge.sh && \
82
  # bash ~/mambaforge.sh -b -p /opt/conda && \
83
  # rm ~/mambaforge.sh
84
+ # Installing pytorch
 
85
  # On arm64 we exit with an error code
86
  #RUN case ${TARGETPLATFORM} in \
87
  # "linux/arm64") exit 1 ;; \
 
89
  # /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \
90
  # esac && \
91
  # /opt/conda/bin/conda clean -ya
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  # Expose the secret DEBUG at buildtime and use its value as git remote URL
93
  #RUN --mount=type=secret,id=DEBUG,mode=0444,required=true \
94
  # git init && \
95
  # git remote add origin $(cat /run/secrets/DEBUG)
 
 
 
 
 
 
 
train.sh CHANGED
@@ -12,4 +12,7 @@ python train.py config2/addition/plain/train_addition_bal.py \
12
  --dataset='bal' --train_data_path="train_3digit_10000.txt" \
13
  --eval_addition=True --start='FILE:data/bal/test_10000.txt'
14
 
 
 
 
15
  echo "Done?"
 
12
  --dataset='bal' --train_data_path="train_3digit_10000.txt" \
13
  --eval_addition=True --start='FILE:data/bal/test_10000.txt'
14
 
15
+ echo "Done training! Uploading!"
16
+ python upload_results.py
17
+
18
  echo "Done?"
upload_results.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ from huggingface_hub import HfApi
4
+
5
+ TOKEN = os.environ.get("DATACOMP_TOKEN")
6
+ api = HfApi(token=TOKEN)
7
+
8
+ parser = argparse.ArgumentParser()
9
+ parser.add_argument(
10
+ "--out_dir",
11
+ default="out/",
12
+ type=str,
13
+ required=False,
14
+ help="Path to the output directory.",
15
+ )
16
+ parser.add_argument(
17
+ "--model_out_dir",
18
+ default="out/addition_plan/",
19
+ type=str,
20
+ required=False,
21
+ help="Path to the model output directory.",
22
+ )
23
+ args = parser.parse_args()
24
+
25
+ print("Attempting to save the Space output directory, %s" % args.out_dir)
26
+ try:
27
+ api.upload_folder(
28
+ folder_path=args.out_dir,
29
+ path_in_repo=args.out_dir,
30
+ repo_id="datacomp/teaching_arithmetic_out_directory",
31
+ repo_type="dataset",
32
+ )
33
+ except Exception as e:
34
+ print("That didn't work. Error:")
35
+ print(e)
36
+
37
+
38
+ print("Attempting to save the Space model, %s" % args.model_out_dir)
39
+ try:
40
+ api.upload_folder(
41
+ folder_path=args.model_out_dir,
42
+ path_in_repo=args.model_out_dir,
43
+ repo_id="datacomp/addition_plain",
44
+ repo_type="model",
45
+ )
46
+ except Exception as e:
47
+ print("That didn't work. Error:")
48
+ print(e)