Muennighoff commited on
Commit
a94304f
1 Parent(s): 6a5cfd9
Files changed (4) hide show
  1. launch.sh +53 -0
  2. sbatch_mtf_xp3ru.sh +147 -0
  3. xp3_train_ru.txt +1 -0
  4. xp3_validation_ru.txt +1 -0
launch.sh ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Launch script using torch.distributed.run(). Used by slurm
4
+ # scripts, don't invoke directly.
5
+
6
+ # Samuel's fix for apparent error in SLURM initialization
7
+ if [ $SLURM_LOCALID -eq 0 ]; then
8
+ rm -rf /dev/shm/*
9
+ rocm-smi || true
10
+ else
11
+ sleep 2
12
+ fi
13
+
14
+ export NCCL_SOCKET_IFNAME=hsn0,hsn1,hsn2,hsn3
15
+ export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
16
+ export FI_CXI_DEFAULT_CQ_SIZE=131072
17
+
18
+ # debugging (noisy)
19
+ #export NCCL_DEBUG=INFO
20
+ #export RCCL_KERNEL_COLL_TRACE_ENABLE=1
21
+ #export NCCL_DEBUG_SUBSYS=INIT,COLL
22
+
23
+ module --quiet purge
24
+ module load cray-python
25
+
26
+ module load CrayEnv
27
+ module load PrgEnv-cray/8.3.3
28
+ module load craype-accel-amd-gfx90a
29
+ module load cray-python
30
+
31
+ module use /pfs/lustrep2/projappl/project_462000125/samantao-public/mymodules
32
+ module load suse-repo-deps/sam-default
33
+ module load rocm/sam-5.2.3.lua
34
+ module load rccl/sam-develop.lua
35
+ module load aws-ofi-rccl/sam-default.lua
36
+
37
+ source /scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/bin/activate
38
+
39
+ MASTER_NODE=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
40
+ MASTER_PORT=9999
41
+
42
+ echo "Launching on $SLURMD_NODENAME ($SLURM_PROCID/$SLURM_JOB_NUM_NODES)," \
43
+ "master $MASTER_NODE port $MASTER_PORT," \
44
+ "GPUs $SLURM_GPUS_ON_NODE," \
45
+ "CUDA: $(python -c 'import torch; print(torch.cuda.is_available())')"
46
+
47
+ python -u -m torch.distributed.run \
48
+ --nnodes $SLURM_JOB_NUM_NODES \
49
+ --nproc_per_node $SLURM_GPUS_ON_NODE \
50
+ --node_rank=$SLURM_PROCID \
51
+ --master_addr $MASTER_NODE \
52
+ --master_port $MASTER_PORT \
53
+ "$@"
sbatch_mtf_xp3ru.sh ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH --nodes=8
3
+ #SBATCH --ntasks-per-node=1
4
+ #SBATCH --cpus-per-task=32
5
+ #SBATCH --mem=256G
6
+ #SBATCH -p pilot
7
+ #SBATCH -t 48:00:00
8
+ #SBATCH --gpus-per-node=mi250:8
9
+ #SBATCH --exclusive=user
10
+ #SBATCH --hint=nomultithread
11
+ #SBATCH --account=project_462000119
12
+ #SBATCH -o logs/%j.out
13
+ #SBATCH -e logs/%j.err
14
+
15
+ # if run without sbatch, invoke here
16
+ #if [ -z $SLURM_JOB_ID ]; then
17
+ # mkdir -p logs
18
+ # sbatch "$0"
19
+ # exit
20
+ #fi
21
+
22
+ VARIANT=7b1xp3ru
23
+
24
+ set -euo pipefail
25
+
26
+ # symlink logs/latest.out and logs/latest.err
27
+ ln -f -s $SLURM_JOB_ID.out logs/latest.out
28
+ ln -f -s $SLURM_JOB_ID.err logs/latest.err
29
+
30
+ KILL_SWITCH_PATH=kill-switch-$VARIANT
31
+ CHECKPOINT_PATH=checkpoints_$VARIANT
32
+ TENSORBOARD_PATH=tensorboard_$VARIANT
33
+
34
+ # Data
35
+ TOKENIZER_NAME_OR_PATH=bigscience/tokenizer
36
+
37
+ TRAIN_DATA_PATH=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3_train_ru.txt
38
+ VALID_DATA_PATH=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3_validation_ru.txt
39
+
40
+ PP_SIZE=1
41
+ TP_SIZE=1
42
+
43
+ MICRO_BATCH_SIZE=2
44
+ GRADIENT_ACCUMULATION_STEPS=16
45
+ WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES))
46
+ GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS))
47
+
48
+ # Model parameters
49
+ NLAYERS=30
50
+ NHIDDEN=4096
51
+ NHEADS=32
52
+ SEQ_LEN=2048
53
+
54
+ TRAIN_SAMPLES=6_348_800
55
+
56
+ SAVE_INTERVAL=500
57
+
58
+ ZERO_STAGE=1
59
+
60
+ mkdir -p ds_configs
61
+ config_json="ds_configs/$SLURM_JOB_ID.json"
62
+
63
+ cat <<EOT > $config_json
64
+ {
65
+ "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE,
66
+ "train_batch_size": $GLOBAL_BATCH_SIZE,
67
+ "gradient_clipping": 1.0,
68
+ "zero_optimization": {
69
+ "stage": $ZERO_STAGE
70
+ },
71
+ "fp16": {
72
+ "enabled": true,
73
+ "loss_scale": 0,
74
+ "loss_scale_window": 500,
75
+ "hysteresis": 2,
76
+ "min_loss_scale": 1,
77
+ "initial_scale_power": 12
78
+ },
79
+ "steps_per_print": 2000,
80
+ "wall_clock_breakdown": false
81
+ }
82
+ EOT
83
+
84
+
85
+ CMD=" \
86
+ Megatron-DeepSpeed/finetune_t0.py \
87
+ --tensor-model-parallel-size $TP_SIZE \
88
+ --pipeline-model-parallel-size $PP_SIZE \
89
+ --num-layers $NLAYERS \
90
+ --hidden-size $NHIDDEN \
91
+ --num-attention-heads $NHEADS \
92
+ --seq-length $SEQ_LEN \
93
+ --max-position-embeddings $SEQ_LEN \
94
+ --micro-batch-size $MICRO_BATCH_SIZE \
95
+ --global-batch-size $GLOBAL_BATCH_SIZE \
96
+ --train-samples $TRAIN_SAMPLES \
97
+ --tokenizer-type PretrainedFromHF \
98
+ --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \
99
+ --init-method-std 0.0048 \
100
+ --embed-layernorm \
101
+ --fp16 \
102
+ --seed 42 \
103
+ --position-embedding-type alibi \
104
+ --abort-on-unmet-fused-kernel-constraints \
105
+ --clip-grad 1.0 \
106
+ --kill-switch-path $KILL_SWITCH_PATH \
107
+ --checkpoint-activations \
108
+ --pad-vocab-size-to 250880 \
109
+ --optimizer adam \
110
+ --adam-beta1 0.9 \
111
+ --adam-beta2 0.95 \
112
+ --adam-eps 1e-8 \
113
+ --lr 2e-5 \
114
+ --lr-decay-style constant \
115
+ --lr-warmup-samples 0 \
116
+ --clip-grad 1.0 \
117
+ --weight-decay 1e-4 \
118
+ --no-load-optim \
119
+ --reset-progress \
120
+ --norm-target-loss \
121
+ --log-interval 10 \
122
+ --save-interval $SAVE_INTERVAL \
123
+ --eval-interval 500 \
124
+ --eval-iters 1 \
125
+ --tensorboard-dir $TENSORBOARD_PATH \
126
+ --tensorboard-queue-size 5 \
127
+ --log-timers-to-tensorboard \
128
+ --log-batch-size-to-tensorboard \
129
+ --log-validation-ppl-to-tensorboard \
130
+ --save $CHECKPOINT_PATH \
131
+ --load $CHECKPOINT_PATH \
132
+ --train-weighted-split-paths-path $TRAIN_DATA_PATH \
133
+ --valid-weighted-split-paths-path $VALID_DATA_PATH \
134
+ --dataloader-type single \
135
+ --data-impl mmap \
136
+ --deepspeed \
137
+ --deepspeed_config $config_json \
138
+ --zero-stage $ZERO_STAGE \
139
+ "
140
+
141
+ echo $CMD
142
+
143
+ echo "START $SLURM_JOBID: $(date)"
144
+
145
+ srun --label launch.sh $CMD
146
+
147
+ echo "END $SLURM_JOBID: $(date)"
xp3_train_ru.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ "train: 0.34682039044965196 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_en, 0.11629763695589916 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ru, 0.07047712423848855 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_es, 0.05705321149447986 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_pt, 0.05162759832018098 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_fr, 0.043535252356335014 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ar, 0.04287450755952809 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_id, 0.04272211963743812 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_zh, 0.04089577291615753 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_hi, 0.040309863136062274 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_code, 0.028867440155442382 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_vi, 0.017854964823074384 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ur, 0.012406528830246035 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_te, 0.00860509613805147 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ta, 0.007750785544287363 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_bn, 0.006201179459433538 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_mr, 0.005220691815104342 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_sw, 0.00521883024752445 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_gu, 0.004870233102513846 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_pa, 0.0036643096242597105 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ne, 0.003283693516874961 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_yo, 0.003027132273014225 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ig, 0.002532886080552859 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ny, 0.0024544768540878015 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_zu, 0.002370333999476676 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_xh, 0.0023500801442074494 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_sn, 0.0023293050500158533 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ts, 0.0023208907645547406 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_rw, 0.002195979579943977 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_lg, 0.002192219213432595 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_tn, 0.002138084828209331 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_nso, 0.0017962265578379366 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_rn, 0.0016292439459216106 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ml, 0.001540037627493179 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_kn, 0.0015028807385985316 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_or, 0.0014542938247633463 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_as, 0.0011756171580534914 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ln, 0.0011406941502547145 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_wo, 0.0010800070471502304 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_tum, 0.0010707364406023676 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ki, 0.0010618009162188852 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_st, 0.0010524558469678267 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_fon, 0.0010294096403287618 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ca, 0.0010062889709865012 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_eu, 0.0010060283515253164 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ak, 0.0009963482001098772 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_bm, 0.000989311474657885 0:0.950 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_tw"
xp3_validation_ru.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ "validation: 0.34682039044965196 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_en, 0.11629763695589916 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ru, 0.07047712423848855 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_es, 0.05705321149447986 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_pt, 0.05162759832018098 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_fr, 0.043535252356335014 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ar, 0.04287450755952809 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_id, 0.04272211963743812 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_zh, 0.04089577291615753 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_hi, 0.040309863136062274 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_code, 0.028867440155442382 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_vi, 0.017854964823074384 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ur, 0.012406528830246035 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_te, 0.00860509613805147 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ta, 0.007750785544287363 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_bn, 0.006201179459433538 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_mr, 0.005220691815104342 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_sw, 0.00521883024752445 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_gu, 0.004870233102513846 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_pa, 0.0036643096242597105 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ne, 0.003283693516874961 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_yo, 0.003027132273014225 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ig, 0.002532886080552859 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ny, 0.0024544768540878015 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_zu, 0.002370333999476676 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_xh, 0.0023500801442074494 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_sn, 0.0023293050500158533 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ts, 0.0023208907645547406 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_rw, 0.002195979579943977 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_lg, 0.002192219213432595 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_tn, 0.002138084828209331 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_nso, 0.0017962265578379366 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_rn, 0.0016292439459216106 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ml, 0.001540037627493179 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_kn, 0.0015028807385985316 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_or, 0.0014542938247633463 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_as, 0.0011756171580534914 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ln, 0.0011406941502547145 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_wo, 0.0010800070471502304 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_tum, 0.0010707364406023676 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ki, 0.0010618009162188852 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_st, 0.0010524558469678267 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_fon, 0.0010294096403287618 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ca, 0.0010062889709865012 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_eu, 0.0010060283515253164 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_ak, 0.0009963482001098772 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_bm, 0.000989311474657885 0.950:1 /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-mtf/xp3/xp3_tw"