Muennighoff commited on
Commit
37bbf74
1 Parent(s): 7a006aa

Upload srun_83m.sh

Browse files
Files changed (1) hide show
  1. srun_83m.sh +5 -21
srun_83m.sh CHANGED
@@ -1,19 +1,5 @@
1
- #!/bin/bash
2
- #SBATCH --exclude=nid007571,nid007112,nid006774,nid007502,nid007506,nid007507,nid005145,nid006692,nid007218,nid007123,nid006124,nid006123,nid007496,nid007237,nid006852,nid007206,nid006947,nid007212,nid006977,nid007222,nid005444,nid007219,nid007493,nid007221,nid005300,nid005619,nid006118,nid005203,nid006113,nid006481,nid007077,nid005208,nid005207,nid005879,nid005901
3
- #SBATCH --nodes=8
4
- #SBATCH --ntasks-per-node=1
5
- #SBATCH --cpus-per-task=32
6
- #SBATCH --mem=256G
7
- #SBATCH -p eap
8
- #SBATCH -t 2-0:00:00
9
- #SBATCH --gpus-per-node=mi250:8
10
- #SBATCH --exclusive=user
11
- #SBATCH --hint=nomultithread
12
- #SBATCH --account=project_462000119
13
- #SBATCH -o logs/%j.out
14
- #SBATCH -e logs/%j.err
15
-
16
  SLURM_JOB_NUM_NODES=32
 
17
 
18
  # if run without sbatch, invoke here
19
  if [ -z $SLURM_JOB_ID ]; then
@@ -28,18 +14,16 @@ set -euo pipefail
28
  ln -f -s $SLURM_JOB_ID.out logs/latest.out
29
  ln -f -s $SLURM_JOB_ID.err logs/latest.err
30
 
31
- KILL_SWITCH_PATH=kill-switch-1
32
- CHECKPOINT_PATH=checkpoints_83m
33
- TENSORBOARD_PATH=tensorboard_83m
34
- # Start from scratch
35
- rm -rf "$CHECKPOINT_PATH" "$TENSORBOARD_PATH"
36
  mkdir -p $CHECKPOINT_PATH
37
  mkdir -p $TENSORBOARD_PATH
38
 
39
  # Data
40
  VOCAB_FILE="gpt2/vocab.json"
41
  MERGE_FILE="gpt2/merges.txt"
42
- DATA_PATH="/scratch/project_462000119/data/pile/megatron_data"
43
 
44
  PP_SIZE=1
45
  TP_SIZE=1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  SLURM_JOB_NUM_NODES=32
2
+ VARIANT=83m
3
 
4
  # if run without sbatch, invoke here
5
  if [ -z $SLURM_JOB_ID ]; then
 
14
  ln -f -s $SLURM_JOB_ID.out logs/latest.out
15
  ln -f -s $SLURM_JOB_ID.err logs/latest.err
16
 
17
+ KILL_SWITCH_PATH=kill-switch-$VARIANT
18
+ CHECKPOINT_PATH=checkpoints_$VARIANT
19
+ TENSORBOARD_PATH=tensorboard_$VARIANT
 
 
20
  mkdir -p $CHECKPOINT_PATH
21
  mkdir -p $TENSORBOARD_PATH
22
 
23
  # Data
24
  VOCAB_FILE="gpt2/vocab.json"
25
  MERGE_FILE="gpt2/merges.txt"
26
+ DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document"
27
 
28
  PP_SIZE=1
29
  TP_SIZE=1