Upload folder using huggingface_hub
Browse files- Dockerfile +45 -0
- GPU_AND_SLURM_CONFIG.md +70 -0
- MANIFEST.txt +21 -0
- README.md +63 -0
- SHA256SUMS +12 -0
- skillzero_best_checkpoints_20260506.tar.part-aa +3 -0
- skillzero_best_checkpoints_20260506.tar.part-ab +3 -0
- skillzero_best_checkpoints_20260506.tar.part-ac +3 -0
- skillzero_best_checkpoints_20260506.tar.part-ad +3 -0
- skillzero_best_checkpoints_20260506.tar.part-ae +3 -0
- upload_with_hf.sh +22 -0
Dockerfile
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
|
| 2 |
+
|
| 3 |
+
ENV DEBIAN_FRONTEND=noninteractive \
|
| 4 |
+
PYTHONUNBUFFERED=1 \
|
| 5 |
+
PIP_NO_CACHE_DIR=1
|
| 6 |
+
|
| 7 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 8 |
+
build-essential \
|
| 9 |
+
ca-certificates \
|
| 10 |
+
curl \
|
| 11 |
+
git \
|
| 12 |
+
wget \
|
| 13 |
+
libgl1 \
|
| 14 |
+
libglib2.0-0 \
|
| 15 |
+
libsm6 \
|
| 16 |
+
libxext6 \
|
| 17 |
+
libxrender1 \
|
| 18 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 19 |
+
|
| 20 |
+
RUN wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh \
|
| 21 |
+
&& bash /tmp/miniconda.sh -b -p /opt/conda \
|
| 22 |
+
&& rm /tmp/miniconda.sh
|
| 23 |
+
|
| 24 |
+
ENV PATH=/opt/conda/bin:$PATH
|
| 25 |
+
|
| 26 |
+
RUN conda create -y -n skillzero python=3.12 && conda clean -afy
|
| 27 |
+
SHELL ["conda", "run", "-n", "skillzero", "/bin/bash", "-c"]
|
| 28 |
+
|
| 29 |
+
WORKDIR /workspace/SkillZero
|
| 30 |
+
|
| 31 |
+
# Copy the repository into the image, or bind-mount it at runtime.
|
| 32 |
+
# Example build context should be the SkillZero repo root:
|
| 33 |
+
# docker build -f export_packages/skillzero_best_checkpoints_20260506/Dockerfile -t skillzero:export .
|
| 34 |
+
COPY . /workspace/SkillZero
|
| 35 |
+
|
| 36 |
+
RUN pip install -U pip setuptools wheel \
|
| 37 |
+
&& pip install -r requirements.txt \
|
| 38 |
+
&& pip install -e . \
|
| 39 |
+
&& pip install flash-attn==2.7.4.post1 --no-build-isolation
|
| 40 |
+
|
| 41 |
+
ENV HF_HOME=/workspace/hf \
|
| 42 |
+
HUGGINGFACE_HUB_CACHE=/workspace/hf/hub \
|
| 43 |
+
HYDRA_FULL_ERROR=1
|
| 44 |
+
|
| 45 |
+
CMD ["/bin/bash"]
|
GPU_AND_SLURM_CONFIG.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GPU and Slurm Configuration
|
| 2 |
+
|
| 3 |
+
## ALFWorld Best Checkpoints
|
| 4 |
+
|
| 5 |
+
The best ALFWorld checkpoints were trained with:
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
#SBATCH -p a100
|
| 9 |
+
#SBATCH --gres=gpu:4
|
| 10 |
+
#SBATCH --cpus-per-task=32
|
| 11 |
+
#SBATCH --mem=200G
|
| 12 |
+
#SBATCH -t 2-00:00:00
|
| 13 |
+
```
|
| 14 |
+
|
| 15 |
+
Important training overrides:
|
| 16 |
+
|
| 17 |
+
```bash
|
| 18 |
+
trainer.n_gpus_per_node=4
|
| 19 |
+
trainer.nnodes=1
|
| 20 |
+
trainer.total_training_steps=180
|
| 21 |
+
trainer.save_freq=10
|
| 22 |
+
trainer.test_freq=10
|
| 23 |
+
env.env_name=alfworld/AlfredTWEnv
|
| 24 |
+
env.rollout.n=4
|
| 25 |
+
data.train_batch_size=8
|
| 26 |
+
data.val_batch_size=16
|
| 27 |
+
actor_rollout_ref.rollout.gpu_memory_utilization=0.4
|
| 28 |
+
actor_rollout_ref.rollout.max_model_len=3072
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
## Search Run
|
| 32 |
+
|
| 33 |
+
The Search run used one node with 4 A100 GPUs allocated:
|
| 34 |
+
|
| 35 |
+
```bash
|
| 36 |
+
#SBATCH -p a100
|
| 37 |
+
#SBATCH --gres=gpu:4
|
| 38 |
+
#SBATCH --cpus-per-task=32
|
| 39 |
+
#SBATCH --mem=220G
|
| 40 |
+
#SBATCH -t 2-00:00:00
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
GPU assignment:
|
| 44 |
+
|
| 45 |
+
```bash
|
| 46 |
+
CUDA_VISIBLE_DEVICES=3 # local retriever service
|
| 47 |
+
CUDA_VISIBLE_DEVICES=0,1,2 # training
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
Important Search fix:
|
| 51 |
+
|
| 52 |
+
```bash
|
| 53 |
+
data.max_prompt_length=6144
|
| 54 |
+
actor_rollout_ref.rollout.max_model_len=6144
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
This avoids the observed Qwen2-VL RoPE shape mismatch where generated prompt state exceeded 4096 tokens.
|
| 58 |
+
|
| 59 |
+
## Docker Runtime
|
| 60 |
+
|
| 61 |
+
Suggested runtime command:
|
| 62 |
+
|
| 63 |
+
```bash
|
| 64 |
+
docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \
|
| 65 |
+
-v /path/to/SkillZero:/workspace/SkillZero \
|
| 66 |
+
-v /path/to/checkpoints:/workspace/SkillZero/checkpoints \
|
| 67 |
+
-it skillzero:export
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
For Slurm clusters, prefer running through the provided Slurm scripts rather than plain Docker unless the cluster explicitly supports Docker or Enroot/Singularity.
|
MANIFEST.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SkillZero best checkpoint export manifest
|
| 2 |
+
|
| 3 |
+
Selected by validation success rate:
|
| 4 |
+
|
| 5 |
+
1. checkpoints/SkillZero_alfworld/skillzero_alfworld_vl_3b_safe/global_step_160
|
| 6 |
+
val/success_rate: 0.594
|
| 7 |
+
included archive path: checkpoints/SkillZero_alfworld/skillzero_alfworld_vl_3b_safe/global_step_160
|
| 8 |
+
|
| 9 |
+
2. checkpoints/SkillZero_alfworld/skillzero_alfworld_vl_3b_safe/global_step_150
|
| 10 |
+
val/success_rate: 0.477
|
| 11 |
+
included archive path: checkpoints/SkillZero_alfworld/skillzero_alfworld_vl_3b_safe/global_step_150
|
| 12 |
+
|
| 13 |
+
Metadata included:
|
| 14 |
+
- README.md
|
| 15 |
+
- Dockerfile
|
| 16 |
+
- GPU_AND_SLURM_CONFIG.md
|
| 17 |
+
- MANIFEST.txt
|
| 18 |
+
|
| 19 |
+
Not included by default:
|
| 20 |
+
- Search best checkpoint global_step_180, because it is third by overall validation success rate and about 44GB.
|
| 21 |
+
Include it separately if you want per-task best checkpoints instead of top-two-overall checkpoints.
|
README.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SkillZero Best Checkpoints Export
|
| 2 |
+
|
| 3 |
+
This package contains the two best checkpoints selected by validation success rate from the completed SkillZero runs.
|
| 4 |
+
|
| 5 |
+
## Included Checkpoints
|
| 6 |
+
|
| 7 |
+
1. ALFWorld `global_step_160`
|
| 8 |
+
- Validation metric: `val/success_rate = 0.594`
|
| 9 |
+
- Archive path:
|
| 10 |
+
`checkpoints/SkillZero_alfworld/skillzero_alfworld_vl_3b_safe/global_step_160`
|
| 11 |
+
|
| 12 |
+
2. ALFWorld `global_step_150`
|
| 13 |
+
- Validation metric: `val/success_rate = 0.477`
|
| 14 |
+
- Archive path:
|
| 15 |
+
`checkpoints/SkillZero_alfworld/skillzero_alfworld_vl_3b_safe/global_step_150`
|
| 16 |
+
|
| 17 |
+
## Related Search Checkpoint
|
| 18 |
+
|
| 19 |
+
The best Search checkpoint by validation success rate is not included in the "top two overall" package, but is useful for reproducing the Search run:
|
| 20 |
+
|
| 21 |
+
- Search `global_step_180`
|
| 22 |
+
- Validation metric: `val/success_rate = 0.356`
|
| 23 |
+
- Test metrics:
|
| 24 |
+
- `test/full_skill/success_rate = 0.282`
|
| 25 |
+
- `test/no_skill/success_rate = 0.310`
|
| 26 |
+
- Checkpoint path, if packaged separately:
|
| 27 |
+
`checkpoints/SkillZero_search/skillzero_search_vl_3b_local_retriever/global_step_180`
|
| 28 |
+
|
| 29 |
+
## Hardware Used
|
| 30 |
+
|
| 31 |
+
Training was submitted through Slurm on the `a100` partition.
|
| 32 |
+
|
| 33 |
+
- ALFWorld:
|
| 34 |
+
- GPUs: 4 x A100
|
| 35 |
+
- CPUs per task: 32
|
| 36 |
+
- Memory: 200GB
|
| 37 |
+
- Time limit: 2 days
|
| 38 |
+
|
| 39 |
+
- Search local retriever:
|
| 40 |
+
- GPUs: 4 x A100 allocated
|
| 41 |
+
- Training used GPUs 0,1,2
|
| 42 |
+
- Local retriever used GPU 3
|
| 43 |
+
- CPUs per task: 32
|
| 44 |
+
- Memory: 220GB
|
| 45 |
+
- Time limit: 2 days
|
| 46 |
+
|
| 47 |
+
## Runtime Notes
|
| 48 |
+
|
| 49 |
+
- Python environment name used on the cluster: `skillzero`
|
| 50 |
+
- Retriever environment name: `retriever`
|
| 51 |
+
- Main model: `Qwen/Qwen2.5-VL-3B-Instruct`
|
| 52 |
+
- Training entry point: `python3 -m verl.trainer.main_ppo`
|
| 53 |
+
- Original training logs are not required to use the checkpoints.
|
| 54 |
+
|
| 55 |
+
## Restore
|
| 56 |
+
|
| 57 |
+
After extracting the archive, place checkpoint directories under:
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
checkpoints/SkillZero_alfworld/skillzero_alfworld_vl_3b_safe/
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
Then use `trainer.resume_mode=resume_path` and set `trainer.resume_from_path` to the target `global_step_*` directory.
|
SHA256SUMS
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
f7107194b12d4a0910d2ea372126316220b80b232bd65746748369c0a594f3b8 skillzero_best_checkpoints_20260506.tar.part-aa
|
| 2 |
+
d88b0449da962144344b3ef0a274baf8b25d0494791b37e0ee37fd7fa68662b7 skillzero_best_checkpoints_20260506.tar.part-ab
|
| 3 |
+
d5936d47310a3b83f05535bb864ab14c38c4f852512efa33e4be09f6722c101f skillzero_best_checkpoints_20260506.tar.part-ac
|
| 4 |
+
1bfb853234689307c2557fb35610be408795f351a85e57dc138bdd73f57e001b skillzero_best_checkpoints_20260506.tar.part-ad
|
| 5 |
+
5c4b2dce0b6cd71babc55b0ac73982924e737fd148b19f8115d44a361ddcc68c skillzero_best_checkpoints_20260506.tar.part-ae
|
| 6 |
+
befb32fc0d7eb24186e4accb824a74859e6662082021c8f95cebc3a8bb81b1eb README.md
|
| 7 |
+
863167a1dd18ac713239896c0d97176a800e161b76bc722e6a01ab7c44f54e8a Dockerfile
|
| 8 |
+
24e977d62c458f5dfc350b3df115fd0d6092229a15a096f9444bd13a22b802e7 GPU_AND_SLURM_CONFIG.md
|
| 9 |
+
cbc3d95d90685b3f546f67d6aad220ccb49463d3f12d3cbeb0d439574fa03204 MANIFEST.txt
|
| 10 |
+
409d2bd016bb8dd376a5b04b1719d1736b87882e36563c7f6da59e43bfc03ab0 GOOGLE_DRIVE_UPLOAD.md
|
| 11 |
+
1f4b88433555db3c299077b01946c04803c3efca87f915ca04ec253e635d5971 upload_with_rclone.sh
|
| 12 |
+
d3ed06023bd0c2b47612568029e720c8830c0e8c6667f69a145277e5c2d0388b upload_with_hf.sh
|
skillzero_best_checkpoints_20260506.tar.part-aa
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7107194b12d4a0910d2ea372126316220b80b232bd65746748369c0a594f3b8
|
| 3 |
+
size 21474836480
|
skillzero_best_checkpoints_20260506.tar.part-ab
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d88b0449da962144344b3ef0a274baf8b25d0494791b37e0ee37fd7fa68662b7
|
| 3 |
+
size 21474836480
|
skillzero_best_checkpoints_20260506.tar.part-ac
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5936d47310a3b83f05535bb864ab14c38c4f852512efa33e4be09f6722c101f
|
| 3 |
+
size 21474836480
|
skillzero_best_checkpoints_20260506.tar.part-ad
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bfb853234689307c2557fb35610be408795f351a85e57dc138bdd73f57e001b
|
| 3 |
+
size 21474836480
|
skillzero_best_checkpoints_20260506.tar.part-ae
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c4b2dce0b6cd71babc55b0ac73982924e737fd148b19f8115d44a361ddcc68c
|
| 3 |
+
size 6737141760
|
upload_with_hf.sh
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
REPO_ID="${1:?Usage: $0 <namespace/repo-name> [repo-type]}"
|
| 5 |
+
REPO_TYPE="${2:-model}"
|
| 6 |
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
| 7 |
+
|
| 8 |
+
if ! command -v hf >/dev/null 2>&1; then
|
| 9 |
+
echo "hf command is not available. Install with: pip install -U huggingface_hub" >&2
|
| 10 |
+
exit 1
|
| 11 |
+
fi
|
| 12 |
+
|
| 13 |
+
cd "$SCRIPT_DIR"
|
| 14 |
+
|
| 15 |
+
hf upload "$REPO_ID" . \
|
| 16 |
+
--repo-type "$REPO_TYPE" \
|
| 17 |
+
--include "skillzero_best_checkpoints_20260506.tar.part-*" \
|
| 18 |
+
--include "SHA256SUMS" \
|
| 19 |
+
--include "*.md" \
|
| 20 |
+
--include "Dockerfile" \
|
| 21 |
+
--include "MANIFEST.txt" \
|
| 22 |
+
--include "upload_with_hf.sh"
|