Spaces:

datacomp
/

teaching_arithmetic

Paused

App Files Files Community

meg-huggingface commited on Oct 22, 2024

Commit

2063a08

1 Parent(s): 81e41f4

Adding code to move the output files & model

Browse files

Files changed (3) hide show

Dockerfile +40 -59
train.sh +3 -0
upload_results.py +48 -0

Dockerfile CHANGED Viewed

@@ -1,58 +1,68 @@
 # Documentation: https://huggingface.co/docs/hub/spaces-sdks-docker
 FROM python:3.9
 RUN useradd -m -u 1000 user
 COPY --chown=user ./requirements.txt /requirements.txt
 COPY --chown=user ./train.sh /train.sh
 RUN chmod +x /train.sh
 RUN mkdir /app
 RUN pip install --no-cache-dir --upgrade -r /requirements.txt
-RUN git clone https://github.com/lee-ny/teaching_arithmetic.git /app/teaching_arithmetic
 #&& cd teaching_arithmetic && pip install -e .
 COPY --chown=user . /app
 RUN mkdir /app/teaching_arithmetic/out
 RUN chmod -R 777 /app/
-#&& cd teaching_arithmetic && pip install -e .
-#COPY --chown=user /teaching_arithmetic /app/teaching_arithmetic
-#RUN chmod +x /teaching_arithmetic
-#COPY --chown=user . /app
 USER user
 WORKDIR /app
-ENV PATH="/home/user/.local/bin:/opt/conda/bin:$PATH"
-ENV HOME="/home/user"
-#WORKDIR $HOME/app
-#RUN pip install --no-cache-dir --upgrade pip
-ARG PYTORCH_VERSION=2.1.0
-ARG PYTHON_VERSION=3.9 #8.10
-ARG CUDA_VERSION=11.8
-ARG CU_DNN=8.5.0.96
-ARG MAMBA_VERSION=24.3.0-0
-ARG CUDA_CHANNEL=nvidia
-ARG INSTALL_CHANNEL=pytorch
-# Automatically set by buildx
-ARG TARGETPLATFORM
-# Update basic dependencies we'll be using.
 #RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 #        build-essential \
 #        ca-certificates \
@@ -62,11 +72,7 @@ ARG TARGETPLATFORM
 #        python3-pip \
 #        git && \
 #        rm -rf /var/lib/apt/lists/*
-#RUN teaching_arithmetic && pip install -e .
-# Install conda
-# translating Docker's TARGETPLATFORM into mamba arches
 #RUN case ${TARGETPLATFORM} in \
 #         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
 #         *)              MAMBA_ARCH=x86_64   ;; \
@@ -75,8 +81,7 @@ ARG TARGETPLATFORM
 #RUN chmod +x ~/mambaforge.sh && \
 #    bash ~/mambaforge.sh -b -p /opt/conda && \
 #    rm ~/mambaforge.sh
-# Install pytorch
 # On arm64 we exit with an error code
 #RUN case ${TARGETPLATFORM} in \
 #         "linux/arm64")  exit 1 ;; \
@@ -84,31 +89,7 @@ ARG TARGETPLATFORM
 #                         /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)"  ;; \
 #    esac && \
 #    /opt/conda/bin/conda clean -ya
-#USER 1000
-#COPY --chown=user ./requirements.txt requirements.txt
-#COPY ./requirements.txt requirements.txt
-#RUN pip install --no-cache-dir --upgrade -r requirements.txt
-#COPY --chown=user teaching_arithmetic /teaching_arithmetic
-#COPY . /app
-#COPY ./out /out
-#RUN chmod 777 -R /out
-#RUN chmod +x /out
-#COPY ./out/addition_train /out/
-#RUN chmod +x /teaching_arithmetic/train.py
-#COPY ./train.sh /train.sh
-#RUN chmod +x /train.sh
 # Expose the secret DEBUG at buildtime and use its value as git remote URL
 #RUN --mount=type=secret,id=DEBUG,mode=0444,required=true \
 # git init && \
 # git remote add origin $(cat /run/secrets/DEBUG)
-# USER 1000
-RUN chmod +x train.sh
-#CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
-ENTRYPOINT ["/train.sh"]

 # Documentation: https://huggingface.co/docs/hub/spaces-sdks-docker
 FROM python:3.9
+# Create a non-root user and allow them to have appropriate permissions
+# on our Space content
 RUN useradd -m -u 1000 user
 COPY --chown=user ./requirements.txt /requirements.txt
 COPY --chown=user ./train.sh /train.sh
+COPY --chown=user ./upload_results.py /upload_results.py
 RUN chmod +x /train.sh
+# May not need to do this. Just tired of permissions errors and going wild.
+RUN chmod +x /upload_results.py
+# Make the working directory for user.
 RUN mkdir /app
+# Start installing stuff as root so it doesn't complain about install permissions.
+#RUN pip install --no-cache-dir --upgrade pip
 RUN pip install --no-cache-dir --upgrade -r /requirements.txt
+# Clone into the working directory for the user.
+RUN git clone https://github.com/lee-ny/teaching_arithmetic.git /app/teaching_arithmetic
 #&& cd teaching_arithmetic && pip install -e .
+# Copy all files we have into the user's working directory.
 COPY --chown=user . /app
+# Kept getting permission denied errors when running train.py, which tries to
+# create the out directory. Just doing this to try to help that.
 RUN mkdir /app/teaching_arithmetic/out
 RUN chmod -R 777 /app/
+# Switch to the user profile.
+# This will help make sure the permissions of the cloned git stuff
+# don't require root privileges (I am guessing).
 USER user
+# Switch to the /app working directory.
 WORKDIR /app
+# Permissions. Permissions. Already did this. Doing it again anyway.
+RUN chmod +x train.sh
+RUN chmod +x upload_results.py
+# Could also use CMD. Example:
+# CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
+ENTRYPOINT ["/train.sh"]
+# Keeping these as FYI, commented out, as they are other things we could do.
+#ENV PATH="/home/user/.local/bin:/opt/conda/bin:$PATH"
+#ENV HOME="/home/user"
+#WORKDIR $HOME/app
+#  We now install with requirements.txt
+#ARG PYTORCH_VERSION=2.1.0
+#ARG PYTHON_VERSION=3.9 #8.10
+#ARG CUDA_VERSION=11.8
+#ARG CU_DNN=8.5.0.96
+#ARG MAMBA_VERSION=24.3.0-0
+#ARG CUDA_CHANNEL=nvidia
+#ARG INSTALL_CHANNEL=pytorch
+# Automatically set by buildx
+#ARG TARGETPLATFORM
+# Updating basic dependencies we'll be using.
 #RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 #        build-essential \
 #        ca-certificates \
 #        python3-pip \
 #        git && \
 #        rm -rf /var/lib/apt/lists/*
+# Installing conda, translating Docker's TARGETPLATFORM into mamba arches
 #RUN case ${TARGETPLATFORM} in \
 #         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
 #         *)              MAMBA_ARCH=x86_64   ;; \
 #RUN chmod +x ~/mambaforge.sh && \
 #    bash ~/mambaforge.sh -b -p /opt/conda && \
 #    rm ~/mambaforge.sh
+# Installing pytorch
 # On arm64 we exit with an error code
 #RUN case ${TARGETPLATFORM} in \
 #         "linux/arm64")  exit 1 ;; \
 #                         /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)"  ;; \
 #    esac && \
 #    /opt/conda/bin/conda clean -ya
 # Expose the secret DEBUG at buildtime and use its value as git remote URL
 #RUN --mount=type=secret,id=DEBUG,mode=0444,required=true \
 # git init && \
 # git remote add origin $(cat /run/secrets/DEBUG)

train.sh CHANGED Viewed

@@ -12,4 +12,7 @@ python train.py config2/addition/plain/train_addition_bal.py \
 --dataset='bal' --train_data_path="train_3digit_10000.txt" \
 --eval_addition=True --start='FILE:data/bal/test_10000.txt'
 echo "Done?"

 --dataset='bal' --train_data_path="train_3digit_10000.txt" \
 --eval_addition=True --start='FILE:data/bal/test_10000.txt'
+echo "Done training! Uploading!"
+python upload_results.py
 echo "Done?"

upload_results.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import argparse
+import os
+from huggingface_hub import HfApi
+TOKEN = os.environ.get("DATACOMP_TOKEN")
+api = HfApi(token=TOKEN)
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--out_dir",
+    default="out/",
+    type=str,
+    required=False,
+    help="Path to the output directory.",
+)
+parser.add_argument(
+    "--model_out_dir",
+    default="out/addition_plan/",
+    type=str,
+    required=False,
+    help="Path to the model output directory.",
+)
+args = parser.parse_args()
+print("Attempting to save the Space output directory, %s" % args.out_dir)
+try:
+    api.upload_folder(
+        folder_path=args.out_dir,
+        path_in_repo=args.out_dir,
+        repo_id="datacomp/teaching_arithmetic_out_directory",
+        repo_type="dataset",
+    )
+except Exception as e:
+    print("That didn't work. Error:")
+    print(e)
+print("Attempting to save the Space model, %s" % args.model_out_dir)
+try:
+    api.upload_folder(
+        folder_path=args.model_out_dir,
+        path_in_repo=args.model_out_dir,
+        repo_id="datacomp/addition_plain",
+        repo_type="model",
+    )
+except Exception as e:
+    print("That didn't work. Error:")
+    print(e)