File size: 1,526 Bytes
37e0ba8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/bin/bash
DATASET_ROOT=/dataset/imageNet100_sicy/train/ #/raid/common/imagenet-raw/

## train ViT-large for 250 epochs
OUTPUT_ROOT=./exps/vit_large_250ep
NPROC_PER_NODE=40 # GPU numbers
BATCH_SIZE_PER_GPU=16
DEBUG=false # debug = true, then we only load subset of the whole training dataset
python -m torch.distributed.launch --nproc_per_node=$NPROC_PER_NODE main.py \
    --data_path $DATASET_ROOT \
    --output_dir $OUTPUT_ROOT \
    --arch vit_large \
    --instance_queue_size 65536 \
    --local_group_queue_size 65536 \
    --use_bn_in_head false \
    --instance_out_dim 256 \
    --instance_temp 0.2 \
    --local_group_out_dim 256 \
    --local_group_temp 0.2 \
    --local_group_knn_top_n 8 \
    --group_out_dim 65536 \
    --group_student_temp 0.1 \
    --group_warmup_teacher_temp 0.04 \
    --group_teacher_temp 0.07 \
    --group_warmup_teacher_temp_epochs 50 \
    --norm_last_layer true \
    --norm_before_pred true \
    --batch_size_per_gpu $BATCH_SIZE_PER_GPU \
    --epochs 250 \
    --warmup_epochs 10 \
    --clip_grad 3.0 \
    --lr 0.0015 \
    --min_lr 1.5e-4 \
    --patch_embed_lr_mult 0.2 \
    --drop_path_rate 0.3 \
    --weight_decay 0.025 \
    --weight_decay_end 0.12 \
    --freeze_last_layer 3 \
    --momentum_teacher 0.996 \
    --use_fp16 false \
    --local_crops_number 10 \
    --size_crops 96 \
    --global_crops_scale 0.25 1 \
    --local_crops_scale 0.05 0.25 \
    --timm_auto_augment_par rand-m9-mstd0.5-inc1 \
    --prob 0.5 \
    --use_prefetcher true \
    --debug $DEBUG