zhoupans commited on
Commit
91d4604
1 Parent(s): e8fbfae

Upload 16 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  exp_illustration/attention_vis.png filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  exp_illustration/attention_vis.png filter=lfs diff=lfs merge=lfs -text
36
+ pretrained[[:space:]]models/vit_small_800ep/linear_probing_checkpoint.pth.tar filter=lfs diff=lfs merge=lfs -text
pretrained models/vit_small_800ep/1%_fine_tune_semi_checkpoint_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f28d4e49a8ee50e2550e7e867325fbc55d5359e033abe1e8e942ddd70fedf14
3
+ size 392263987
pretrained models/vit_small_800ep/1%_fine_tune_semi_log.txt ADDED
The diff for this file is too large to render. See raw diff
 
pretrained models/vit_small_800ep/1%_logistic_regression_semi.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO:======================================================
2
+ INFO:avgpool_patchtokens = 0
3
+ INFO:multi_scale = 0
4
+ INFO:loading features...
5
+ INFO:Features are ready!
6
+ Start the logistic regression.
7
+ INFO:training data: 12811, test data: 50000
8
+ INFO:Logistic regression result: lambda 0.01, Acc 0.6639
9
+ INFO:Logistic regression result: lambda 0.03, Acc 0.6659
10
+ INFO:Logistic regression result: lambda 0.06, Acc 0.6670
11
+ INFO:Logistic regression result: lambda 0.10, Acc 0.6688
12
+ INFO:Logistic regression result: lambda 0.15, Acc 0.6695
13
+ INFO:Logistic regression result: lambda 0.20, Acc 0.6688
14
+ INFO:Logistic regression result: lambda 0.30, Acc 0.6668
pretrained models/vit_small_800ep/1%_logistic_regression_semi.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b84cb8f437e0232117bba53c9b013e4f811063e5adf66548862974f6c2f53b46
3
+ size 89567065
pretrained models/vit_small_800ep/10%_fine_tune_semi_checkpoint_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:321c2895d2c867fd8ef44d9158f3e5b97d92eb23550b0067e4891d85a1789a40
3
+ size 392263991
pretrained models/vit_small_800ep/10%_fine_tune_semi_log.txt ADDED
The diff for this file is too large to render. See raw diff
 
pretrained models/vit_small_800ep/10%_logistic_regression_semi.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO:======================================================
2
+ INFO:avgpool_patchtokens = 0
3
+ INFO:multi_scale = 0
4
+ INFO:loading features...
5
+ INFO:Features are ready!
6
+ Start the logistic regression.
7
+ INFO:training data: 128115, test data: 50000
8
+ INFO:Logistic regression result: lambda 0.01, Acc 0.7124
9
+ INFO:Logistic regression result: lambda 0.03, Acc 0.7261
10
+ INFO:Logistic regression result: lambda 0.06, Acc 0.7337
11
+ INFO:Logistic regression result: lambda 0.10, Acc 0.7378
12
+ INFO:Logistic regression result: lambda 0.15, Acc 0.7395
13
+ INFO:Logistic regression result: lambda 0.20, Acc 0.7395
14
+ INFO:Logistic regression result: lambda 0.30, Acc 0.7382
pretrained models/vit_small_800ep/10%_logistic_regression_semi.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed858128030aafadc903d2f9761571e7e3eabe74a04379ac8e9c0d4dd69da747
3
+ size 253964509
pretrained models/vit_small_800ep/args.txt ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ DATASET_ROOT=/dataset/imageNet100_sicy/train/ #/raid/common/imagenet-raw/
3
+
4
+ ## train ViT-small for 800 epochs
5
+ NPROC_PER_NODE=16 # GPU numbers
6
+ BATCH_SIZE_PER_GPU=64
7
+ DEBUG=false # debug = true, then we only load subset of the whole training dataset
8
+ python -m torch.distributed.launch --nproc_per_node=$NPROC_PER_NODE main.py \
9
+ --data_path $DATASET_ROOT \
10
+ --output_dir $OUTPUT_ROOT \
11
+ --arch vit_small \
12
+ --instance_queue_size 65536 \
13
+ --local_group_queue_size 65536 \
14
+ --use_bn_in_head false \
15
+ --instance_out_dim 256 \
16
+ --instance_temp 0.2 \
17
+ --local_group_out_dim 256 \
18
+ --local_group_temp 0.2 \
19
+ --local_group_knn_top_n 8 \
20
+ --group_out_dim 65536 \
21
+ --group_student_temp 0.1 \
22
+ --group_warmup_teacher_temp 0.04 \
23
+ --group_teacher_temp 0.07 \
24
+ --group_warmup_teacher_temp_epochs 30 \
25
+ --norm_last_layer false \
26
+ --norm_before_pred true \
27
+ --batch_size_per_gpu $BATCH_SIZE_PER_GPU \
28
+ --epochs 800 \
29
+ --warmup_epochs 10 \
30
+ --clip_grad 3.0 \
31
+ --lr 0.0008 \
32
+ --min_lr 1e-06 \
33
+ --patch_embed_lr_mult 0.2 \
34
+ --drop_path_rate 0.1 \
35
+ --weight_decay 0.04 \
36
+ --weight_decay_end 0.1 \
37
+ --freeze_last_layer 1 \
38
+ --momentum_teacher 0.996 \
39
+ --use_fp16 false \
40
+ --local_crops_number 10 \
41
+ --size_crops 96 \
42
+ --global_crops_scale 0.25 1 \
43
+ --local_crops_scale 0.05 0.25 \
44
+ --timm_auto_augment_par rand-m9-mstd0.5-inc1 \
45
+ --prob 0.5 \
46
+ --use_prefetcher true \
47
+ --debug $DEBUG
pretrained models/vit_small_800ep/fine_tuning_checkpoint.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:210c5ba56b7442c74100d274c9e51c413bc77eb5b9fdfa4df6ca6f8a719241bb
3
+ size 264766743
pretrained models/vit_small_800ep/fine_tuning_log.txt ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 2.9977217314840725e-05, "train_min_lr": 1.2632993704056536e-08, "train_loss": 5.603262532481568, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 2.722763790786981, "test_loss": 1.69426304602441, "test_acc1": 68.6300018359375, "test_acc5": 90.22800263671876, "epoch": 0, "n_parameters": 22050664}
2
+ {"train_lr": 8.997961549222588e-05, "train_min_lr": 3.7919193902096106e-08, "train_loss": 4.027685547332398, "train_loss_scale": 65902.70823341327, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.1822310836260554, "test_acc1": 74.59600241210937, "test_acc5": 93.05400232421874, "epoch": 1, "n_parameters": 22050664}
3
+ {"train_lr": 0.00014998201366961113, "train_min_lr": 6.320539410013567e-08, "train_loss": 3.835417651920486, "train_loss_scale": 65536.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 5.714154615867243, "test_loss": 1.086495587843975, "test_acc1": 75.90000249023437, "test_acc5": 93.674002734375, "epoch": 2, "n_parameters": 22050664}
4
+ {"train_lr": 0.00020998441184699623, "train_min_lr": 8.849159429817523e-08, "train_loss": 3.7547842764930666, "train_loss_scale": 38006.68904876099, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.0365260257065751, "test_acc1": 76.6580021484375, "test_acc5": 93.99400263671875, "epoch": 3, "n_parameters": 22050664}
5
+ {"train_lr": 0.00026998681002438155, "train_min_lr": 1.1377779449621479e-07, "train_loss": 3.686702872685296, "train_loss_scale": 40678.420463629096, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 6.13770672343046, "test_loss": 1.0188552564337054, "test_acc1": 77.14200270507813, "test_acc5": 94.272002890625, "epoch": 4, "n_parameters": 22050664}
6
+ {"train_lr": 0.00032998920820176663, "train_min_lr": 1.3906399469425437e-07, "train_loss": 3.6525438100123386, "train_loss_scale": 49610.385291766586, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 1.0008231160294918, "test_acc1": 77.44000240234375, "test_acc5": 94.31800263671875, "epoch": 5, "n_parameters": 22050664}
7
+ {"train_lr": 0.00038999160637915177, "train_min_lr": 1.6435019489229392e-07, "train_loss": 3.6161093925305314, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 6.363529770970821, "test_loss": 0.9950099191592849, "test_acc1": 77.68000247070313, "test_acc5": 94.33600220703126, "epoch": 6, "n_parameters": 22050664}
8
+ {"train_lr": 0.0004499940045565371, "train_min_lr": 1.896363950903335e-07, "train_loss": 3.5744851427398427, "train_loss_scale": 61842.7242206235, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 6.4471772824355265, "test_loss": 0.9745688497565175, "test_acc1": 77.83000258789062, "test_acc5": 94.40000263671875, "epoch": 7, "n_parameters": 22050664}
9
+ {"train_lr": 0.0005099964027339221, "train_min_lr": 2.1492259528837304e-07, "train_loss": 3.5608610644710246, "train_loss_scale": 52936.95283772982, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9392552370105991, "test_acc1": 77.89000266601562, "test_acc5": 94.6540028125, "epoch": 8, "n_parameters": 22050664}
10
+ {"train_lr": 0.0005699988009113075, "train_min_lr": 2.4020879548641256e-07, "train_loss": 3.5521272373714035, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 6.548601883111431, "test_loss": 0.9393050278188618, "test_acc1": 77.73400247070313, "test_acc5": 94.65400236328125, "epoch": 9, "n_parameters": 22050664}
11
+ {"train_lr": 0.0006300011990886927, "train_min_lr": 2.6549499568445216e-07, "train_loss": 3.520238784815577, "train_loss_scale": 33213.288569144686, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9476768675878757, "test_acc1": 77.7700023046875, "test_acc5": 94.59600275390625, "epoch": 10, "n_parameters": 22050664}
12
+ {"train_lr": 0.0006900035972660778, "train_min_lr": 2.9078119588249177e-07, "train_loss": 3.493932134813542, "train_loss_scale": 38451.97761790568, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 6.660394437593236, "test_loss": 0.9416997510058279, "test_acc1": 77.83000249023438, "test_acc5": 94.63600248046875, "epoch": 11, "n_parameters": 22050664}
13
+ {"train_lr": 0.000750005995443463, "train_min_lr": 3.160673960805312e-07, "train_loss": 3.4954532293631115, "train_loss_scale": 38085.26938449241, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9537998412856619, "test_acc1": 78.16400224609374, "test_acc5": 94.6940025390625, "epoch": 12, "n_parameters": 22050664}
14
+ {"train_lr": 0.0008100083936208481, "train_min_lr": 3.41353596278571e-07, "train_loss": 3.4618209781263656, "train_loss_scale": 40599.84012789768, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 6.776521925922397, "test_loss": 0.9393502482931122, "test_acc1": 78.21600211914063, "test_acc5": 94.8440025390625, "epoch": 13, "n_parameters": 22050664}
15
+ {"train_lr": 0.0008700107917982333, "train_min_lr": 3.666397964766104e-07, "train_loss": 3.4572233194403417, "train_loss_scale": 37273.272581934456, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9608023931051939, "test_acc1": 78.26000245117187, "test_acc5": 94.7740023828125, "epoch": 14, "n_parameters": 22050664}
16
+ {"train_lr": 0.0009300131899756183, "train_min_lr": 3.9192599667465007e-07, "train_loss": 3.44061637310673, "train_loss_scale": 37247.07913669065, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9536074338523486, "test_acc1": 78.32200237304687, "test_acc5": 94.7660027734375, "epoch": 15, "n_parameters": 22050664}
17
+ {"train_lr": 0.0009900155881530036, "train_min_lr": 4.172121968726895e-07, "train_loss": 3.431752730807145, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 6.945552310211767, "test_loss": 0.9266342950686244, "test_acc1": 78.37000237304687, "test_acc5": 94.7860024609375, "epoch": 16, "n_parameters": 22050664}
18
+ {"train_lr": 0.001050017986330389, "train_min_lr": 4.4249839707072927e-07, "train_loss": 3.4332912842527947, "train_loss_scale": 33920.511590727416, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9484759953640799, "test_acc1": 78.4200026171875, "test_acc5": 94.7280025, "epoch": 17, "n_parameters": 22050664}
19
+ {"train_lr": 0.001110020384507774, "train_min_lr": 4.677845972687686e-07, "train_loss": 3.415402708722533, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.03111657516943, "test_loss": 0.979095772250008, "test_acc1": 78.09000255859375, "test_acc5": 94.6860025390625, "epoch": 18, "n_parameters": 22050664}
20
+ {"train_lr": 0.0011700227826851595, "train_min_lr": 4.930707974668083e-07, "train_loss": 3.4104867167324184, "train_loss_scale": 35963.600319744204, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9353871516144002, "test_acc1": 78.5380025390625, "test_acc5": 94.9020025, "epoch": 19, "n_parameters": 22050664}
21
+ {"train_lr": 0.0011999696005901587, "train_min_lr": 5.05690980256859e-07, "train_loss": 3.397333256060557, "train_loss_scale": 34732.508393285374, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9350267630497008, "test_acc1": 78.21600241210938, "test_acc5": 94.78200248046875, "epoch": 20, "n_parameters": 22050664}
22
+ {"train_lr": 0.0011997870693296402, "train_min_lr": 5.056140579648166e-07, "train_loss": 3.383698422560017, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.136778656146128, "test_loss": 0.9135349624939547, "test_acc1": 78.71600249023437, "test_acc5": 94.88400220703124, "epoch": 21, "n_parameters": 22050664}
23
+ {"train_lr": 0.0011994219894223782, "train_min_lr": 5.054602060538313e-07, "train_loss": 3.3703202412997504, "train_loss_scale": 34339.606714628295, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9532031546567232, "test_acc1": 78.55800263671875, "test_acc5": 94.9040025390625, "epoch": 22, "n_parameters": 22050664}
24
+ {"train_lr": 0.001198874472075251, "train_min_lr": 5.052294713886864e-07, "train_loss": 3.356075361930876, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.241844146371745, "test_loss": 0.9423833109495294, "test_acc1": 78.75000256835938, "test_acc5": 95.01200244140625, "epoch": 23, "n_parameters": 22050664}
25
+ {"train_lr": 0.001198144684067349, "train_min_lr": 5.049219242533962e-07, "train_loss": 3.3644068712096136, "train_loss_scale": 33789.544364508394, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9384568313150915, "test_acc1": 78.79800256835938, "test_acc5": 94.97200283203125, "epoch": 24, "n_parameters": 22050664}
26
+ {"train_lr": 0.0011972328476991631, "train_min_lr": 5.045376583297983e-07, "train_loss": 3.343949767087194, "train_loss_scale": 34496.76738609112, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9397645968062277, "test_acc1": 78.96800243164063, "test_acc5": 95.098002421875, "epoch": 25, "n_parameters": 22050664}
27
+ {"train_lr": 0.0011961392407248872, "train_min_lr": 5.040767906690125e-07, "train_loss": 3.336281333562377, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.298573963552546, "test_loss": 0.9320428655802748, "test_acc1": 79.00600221679687, "test_acc5": 95.01200259765625, "epoch": 26, "n_parameters": 22050664}
28
+ {"train_lr": 0.0011948641962678012, "train_min_lr": 5.035394616557955e-07, "train_loss": 3.3189330199163116, "train_loss_scale": 34208.63948840927, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9222787600437193, "test_acc1": 79.01200250976562, "test_acc5": 95.14400234375, "epoch": 27, "n_parameters": 22050664}
29
+ {"train_lr": 0.0011934081027187965, "train_min_lr": 5.029258349657694e-07, "train_loss": 3.303631587446832, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.35417231514776, "test_loss": 0.9152912786443725, "test_acc1": 79.03000260742188, "test_acc5": 95.16000244140625, "epoch": 28, "n_parameters": 22050664}
30
+ {"train_lr": 0.0011917714036180707, "train_min_lr": 5.022360975155685e-07, "train_loss": 3.30221888006067, "train_loss_scale": 32977.54756195044, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9086335164900045, "test_acc1": 78.98400265625, "test_acc5": 95.16400259765625, "epoch": 29, "n_parameters": 22050664}
31
+ {"train_lr": 0.001189954597520034, "train_min_lr": 5.014704594059005e-07, "train_loss": 3.292660156981074, "train_loss_scale": 33763.35091926459, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.907500613964241, "test_acc1": 79.26000255859375, "test_acc5": 95.230002421875, "epoch": 30, "n_parameters": 22050664}
32
+ {"train_lr": 0.001187958237841412, "train_min_lr": 5.006291538575519e-07, "train_loss": 3.2944930741350524, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.42469676602468, "test_loss": 0.9170168010333112, "test_acc1": 79.10000237304688, "test_acc5": 95.13200259765625, "epoch": 31, "n_parameters": 22050664}
33
+ {"train_lr": 0.0011857829326927091, "train_min_lr": 4.997124371403409e-07, "train_loss": 3.2907891897655888, "train_loss_scale": 33108.514788169465, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9195689713681927, "test_acc1": 79.2620027734375, "test_acc5": 95.28000267578125, "epoch": 32, "n_parameters": 22050664}
34
+ {"train_lr": 0.0011834293446929475, "train_min_lr": 4.987205884950621e-07, "train_loss": 3.269422015340494, "train_loss_scale": 35439.7314148681, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9031644208740642, "test_acc1": 79.44000231445312, "test_acc5": 95.2100021484375, "epoch": 33, "n_parameters": 22050664}
35
+ {"train_lr": 0.0011808981907678268, "train_min_lr": 4.97653910048421e-07, "train_loss": 3.2735699294663543, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.493240050369982, "test_loss": 0.905245968403707, "test_acc1": 79.24400267578125, "test_acc5": 95.22600248046875, "epoch": 34, "n_parameters": 22050664}
36
+ {"train_lr": 0.0011781902419313573, "train_min_lr": 4.965127267210045e-07, "train_loss": 3.2718872641392656, "train_loss_scale": 34444.38049560352, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8895842155427424, "test_acc1": 79.29600248046874, "test_acc5": 95.22200224609375, "epoch": 35, "n_parameters": 22050664}
37
+ {"train_lr": 0.0011808981907678268, "train_min_lr": 4.97653910048421e-07, "train_loss": 3.2682923131185375, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.496189176131019, "test_loss": 0.8937030186634938, "test_acc1": 79.4140026171875, "test_acc5": 95.2220026171875, "epoch": 34, "n_parameters": 22050664}
38
+ {"train_lr": 0.0011781902419313573, "train_min_lr": 4.965127267210045e-07, "train_loss": 3.266568816775422, "train_loss_scale": 33789.544364508394, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9146524023918705, "test_acc1": 79.33600250976562, "test_acc5": 95.31400240234375, "epoch": 35, "n_parameters": 22050664}
39
+ {"train_lr": 0.0011753063230509806, "train_min_lr": 4.952973861283093e-07, "train_loss": 3.2613633376278943, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.520887302456619, "test_loss": 0.9073987637312357, "test_acc1": 79.4480026953125, "test_acc5": 95.28400267578125, "epoch": 36, "n_parameters": 22050664}
40
+ {"train_lr": 0.0011722473125963316, "train_min_lr": 4.940082584748532e-07, "train_loss": 3.2646328849519946, "train_loss_scale": 34103.86570743405, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9082906289410045, "test_acc1": 79.5240025, "test_acc5": 95.32400271484374, "epoch": 37, "n_parameters": 22050664}
41
+ {"train_lr": 0.0011690141423716237, "train_min_lr": 4.926457364414063e-07, "train_loss": 3.250318612197606, "train_loss_scale": 33815.7378097522, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.900067733671829, "test_acc1": 79.5080025390625, "test_acc5": 95.2900024609375, "epoch": 38, "n_parameters": 22050664}
42
+ {"train_lr": 0.0011656077972318321, "train_min_lr": 4.912102350653811e-07, "train_loss": 3.2493277104686107, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.563338302975174, "test_loss": 0.8961556157992996, "test_acc1": 79.69000251953125, "test_acc5": 95.4220026953125, "epoch": 39, "n_parameters": 22050664}
43
+ {"train_lr": 0.0011620293147826769, "train_min_lr": 4.897021916144021e-07, "train_loss": 3.2420077359409545, "train_loss_scale": 34522.96083133493, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.9002051103206081, "test_acc1": 79.76400252929687, "test_acc5": 95.36000259765625, "epoch": 40, "n_parameters": 22050664}
44
+ {"train_lr": 0.0011582797850645855, "train_min_lr": 4.881220654531131e-07, "train_loss": 3.225775114590411, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.616268442307922, "test_loss": 0.8994353438606699, "test_acc1": 79.88600252929687, "test_acc5": 95.428002421875, "epoch": 41, "n_parameters": 22050664}
45
+ {"train_lr": 0.001154360350220628, "train_min_lr": 4.864703379032491e-07, "train_loss": 3.2263035094328254, "train_loss_scale": 32898.96722621902, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.890631131543458, "test_acc1": 79.7900026953125, "test_acc5": 95.47400279296875, "epoch": 42, "n_parameters": 22050664}
46
+ {"train_lr": 0.001150272204148628, "train_min_lr": 4.847475120970235e-07, "train_loss": 3.224647225497914, "train_loss_scale": 34156.252597921666, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8872248013965957, "test_acc1": 79.73000259765625, "test_acc5": 95.2860026171875, "epoch": 43, "n_parameters": 22050664}
47
+ {"train_lr": 0.0011460165921374807, "train_min_lr": 4.829541128238657e-07, "train_loss": 3.224028726657041, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.645182122048333, "test_loss": 0.8646529002954032, "test_acc1": 79.8520023828125, "test_acc5": 95.51800240234375, "epoch": 44, "n_parameters": 22050664}
48
+ {"train_lr": 0.0011415948104878317, "train_min_lr": 4.810906863705679e-07, "train_loss": 3.227728815363656, "train_loss_scale": 36015.98721023181, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8860152457051604, "test_acc1": 79.8120027734375, "test_acc5": 95.40000263671875, "epoch": 45, "n_parameters": 22050664}
49
+ {"train_lr": 0.0011370082061172055, "train_min_lr": 4.791578003548795e-07, "train_loss": 3.2161908976608613, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.68220556649468, "test_loss": 0.8895701988962771, "test_acc1": 79.79200264648438, "test_acc5": 95.38000248046875, "epoch": 46, "n_parameters": 22050664}
50
+ {"train_lr": 0.0011322581761497228, "train_min_lr": 4.77156043552603e-07, "train_loss": 3.213227890830913, "train_loss_scale": 34365.8001598721, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8833366887260029, "test_acc1": 80.00200278320312, "test_acc5": 95.43000224609375, "epoch": 47, "n_parameters": 22050664}
51
+ {"train_lr": 0.0011273461674905277, "train_min_lr": 4.7508602571825396e-07, "train_loss": 3.210074199642018, "train_loss_scale": 33239.482014388486, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8948902931377178, "test_acc1": 79.99400262695312, "test_acc5": 95.518002421875, "epoch": 48, "n_parameters": 22050664}
52
+ {"train_lr": 0.0011222736763850426, "train_min_lr": 4.729483773993168e-07, "train_loss": 3.1931278697973626, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.754553957618207, "test_loss": 0.8982641385256789, "test_acc1": 79.9120023046875, "test_acc5": 95.4900024609375, "epoch": 49, "n_parameters": 22050664}
53
+ {"train_lr": 0.0011170422479631977, "train_min_lr": 4.707437497441777e-07, "train_loss": 3.1965765429438826, "train_loss_scale": 33134.70823341327, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8809949751118668, "test_acc1": 79.8760025, "test_acc5": 95.5240023828125, "epoch": 50, "n_parameters": 22050664}
54
+ {"train_lr": 0.0011116534757687612, "train_min_lr": 4.6847281430377656e-07, "train_loss": 3.208134999544882, "train_loss_scale": 34811.08872901679, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8624901903494624, "test_acc1": 80.21400263671875, "test_acc5": 95.486002578125, "epoch": 51, "n_parameters": 22050664}
55
+ {"train_lr": 0.001106109001273953, "train_min_lr": 4.6613626282704806e-07, "train_loss": 3.1933217710204165, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.7343954841772335, "test_loss": 0.8573215069206617, "test_acc1": 80.32800235351563, "test_acc5": 95.5840023828125, "epoch": 52, "n_parameters": 22050664}
56
+ {"train_lr": 0.001106109001273953, "train_min_lr": 4.6613626282704806e-07, "train_loss": 3.1863442023785757, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.762439713298941, "test_loss": 0.8716207481067599, "test_acc1": 80.196002421875, "test_acc5": 95.55000271484376, "epoch": 52, "n_parameters": 22050664}
57
+ {"train_lr": 0.0011004105133794123, "train_min_lr": 4.637348070502055e-07, "train_loss": 3.1893357260764645, "train_loss_scale": 34680.12150279776, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8803537958905897, "test_acc1": 79.86000239257812, "test_acc5": 95.6260024609375, "epoch": 53, "n_parameters": 22050664}
58
+ {"train_lr": 0.0010945597478997525, "train_min_lr": 4.6126917847994283e-07, "train_loss": 3.185652249675098, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.7989468898514005, "test_loss": 0.885556812277277, "test_acc1": 79.94800263671875, "test_acc5": 95.55200271484375, "epoch": 54, "n_parameters": 22050664}
59
+ {"train_lr": 0.001088558487034823, "train_min_lr": 4.5874012817060835e-07, "train_loss": 3.1908091484642713, "train_loss_scale": 34470.573940847324, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8760113961824024, "test_acc1": 80.1180028515625, "test_acc5": 95.53800255859375, "epoch": 55, "n_parameters": 22050664}
60
+ {"train_lr": 0.0010824085588268101, "train_min_lr": 4.5614842649542436e-07, "train_loss": 3.178943071433966, "train_loss_scale": 32898.96722621902, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8694060303782689, "test_acc1": 80.1320025390625, "test_acc5": 95.56200259765625, "epoch": 56, "n_parameters": 22050664}
61
+ {"train_lr": 0.0010761118366034246, "train_min_lr": 4.534948629118298e-07, "train_loss": 3.1813783728294998, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.838045957658312, "test_loss": 0.8739073046746145, "test_acc1": 80.1680027734375, "test_acc5": 95.6220028515625, "epoch": 57, "n_parameters": 22050664}
62
+ {"train_lr": 0.0010696702384072577, "train_min_lr": 4.507802457209944e-07, "train_loss": 3.172869831299801, "train_loss_scale": 33003.74100719424, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8888461014241663, "test_acc1": 80.3140026171875, "test_acc5": 95.6100026171875, "epoch": 58, "n_parameters": 22050664}
63
+ {"train_lr": 0.0010630857264115143, "train_min_lr": 4.48005401821614e-07, "train_loss": 3.1614516199016265, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.8672180701788665, "test_loss": 0.8756068150960762, "test_acc1": 80.05600259765625, "test_acc5": 95.6800024609375, "epoch": 59, "n_parameters": 22050664}
64
+ {"train_lr": 0.0010563603063223394, "train_min_lr": 4.451711764580204e-07, "train_loss": 3.1635347074217837, "train_loss_scale": 32846.580335731414, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8497080420719758, "test_acc1": 80.49600271484375, "test_acc5": 95.6460024609375, "epoch": 60, "n_parameters": 22050664}
65
+ {"train_lr": 0.001049496026767842, "train_min_lr": 4.422784329627143e-07, "train_loss": 3.1750706195783653, "train_loss_scale": 33606.19024780176, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8499589962813691, "test_acc1": 80.44400233398437, "test_acc5": 95.65800283203124, "epoch": 61, "n_parameters": 22050664}
66
+ {"train_lr": 0.0010424949786740677, "train_min_lr": 4.3932805249338794e-07, "train_loss": 3.162324577665253, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.85303239570819, "test_loss": 0.8645298016890315, "test_acc1": 80.34400255859374, "test_acc5": 95.674002421875, "epoch": 62, "n_parameters": 22050664}
67
+ {"train_lr": 0.0010353592946280874, "train_min_lr": 4.363209337645134e-07, "train_loss": 3.147220202892137, "train_loss_scale": 34549.15427657874, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.851393929874624, "test_acc1": 80.46200270507812, "test_acc5": 95.72400265625, "epoch": 63, "n_parameters": 22050664}
68
+ {"train_lr": 0.001028091148228388, "train_min_lr": 4.3325799277358664e-07, "train_loss": 3.1630775399631164, "train_loss_scale": 32951.354116706636, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.878828265284764, "test_acc1": 80.356002421875, "test_acc5": 95.648002421875, "epoch": 64, "n_parameters": 22050664}
69
+ {"train_lr": 0.0010206927534227683, "train_min_lr": 4.3014016252210287e-07, "train_loss": 3.1432487208732693, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.881106869494029, "test_loss": 0.8564972447530004, "test_acc1": 80.41000248046875, "test_acc5": 95.67000244140625, "epoch": 65, "n_parameters": 22050664}
70
+ {"train_lr": 0.001013166363833961, "train_min_lr": 4.2696839273136203e-07, "train_loss": 3.146101068321178, "train_loss_scale": 33187.09512390088, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8620290451377403, "test_acc1": 80.39400252929687, "test_acc5": 95.6820027734375, "epoch": 66, "n_parameters": 22050664}
71
+ {"train_lr": 0.0010055142720731358, "train_min_lr": 4.23743649553165e-07, "train_loss": 3.139933725769857, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.932112373227029, "test_loss": 0.8629113774718219, "test_acc1": 80.4600030078125, "test_acc5": 95.686002421875, "epoch": 67, "n_parameters": 22050664}
72
+ {"train_lr": 0.0009977388090415772, "train_min_lr": 4.204669152755244e-07, "train_loss": 3.1381681997665494, "train_loss_scale": 35570.69864108713, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8364801991531867, "test_acc1": 80.53000299804687, "test_acc5": 95.79200244140625, "epoch": 68, "n_parameters": 22050664}
73
+ {"train_lr": 0.000989842343220648, "train_min_lr": 4.1713918802344535e-07, "train_loss": 3.1466570022723657, "train_loss_scale": 34130.05915267786, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8543768139513395, "test_acc1": 80.51600252929687, "test_acc5": 95.77600236328125, "epoch": 69, "n_parameters": 22050664}
74
+ {"train_lr": 0.0009818272799503285, "train_min_lr": 4.137614814548824e-07, "train_loss": 3.132834618492759, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.971080147295738, "test_loss": 0.8561832326969118, "test_acc1": 80.77400264648438, "test_acc5": 95.65400265625, "epoch": 70, "n_parameters": 22050664}
75
+ {"train_lr": 0.0009736960606965343, "train_min_lr": 4.103348244519789e-07, "train_loss": 3.1380073279023266, "train_loss_scale": 33082.32134292566, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8322784344431098, "test_acc1": 80.71800240234376, "test_acc5": 95.762002734375, "epoch": 71, "n_parameters": 22050664}
76
+ {"train_lr": 0.0009654511623074229, "train_min_lr": 4.0686026080765107e-07, "train_loss": 3.1272493571066837, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.978859761540743, "test_loss": 0.8461165175638126, "test_acc1": 80.656002734375, "test_acc5": 95.70600275390625, "epoch": 72, "n_parameters": 22050664}
77
+ {"train_lr": 0.0009570950962589104, "train_min_lr": 4.033388489076452e-07, "train_loss": 3.125571237765342, "train_loss_scale": 35832.63309352518, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8337320769561156, "test_acc1": 80.59800280273437, "test_acc5": 95.6500025, "epoch": 73, "n_parameters": 22050664}
78
+ {"train_lr": 0.0009486304078896628, "train_min_lr": 3.997716614081388e-07, "train_loss": 3.1220558078931293, "train_loss_scale": 35387.344524380496, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8448109494820806, "test_acc1": 80.8000025390625, "test_acc5": 95.7320023828125, "epoch": 74, "n_parameters": 22050664}
79
+ {"train_lr": 0.0009400596756257564, "train_min_lr": 3.9615978490899916e-07, "train_loss": 3.1167206718957874, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 7.980819678135055, "test_loss": 0.8504435299462034, "test_acc1": 80.672002421875, "test_acc5": 95.7240025, "epoch": 75, "n_parameters": 22050664}
80
+ {"train_lr": 0.0009313855101952678, "train_min_lr": 3.925043196227978e-07, "train_loss": 3.131151482546263, "train_loss_scale": 23089.52198241407, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8508057000527856, "test_acc1": 80.7540024609375, "test_acc5": 95.67600251953125, "epoch": 76, "n_parameters": 22050664}
81
+ {"train_lr": 0.0009226105538330136, "train_min_lr": 3.888063790396719e-07, "train_loss": 3.121647229869303, "train_loss_scale": 16384.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.063685066122517, "test_loss": 0.8553337270521936, "test_acc1": 80.71600254882813, "test_acc5": 95.77800255859376, "epoch": 77, "n_parameters": 22050664}
82
+ {"train_lr": 0.0009137374794757061, "train_min_lr": 3.8506708958814587e-07, "train_loss": 3.1215238373199528, "train_loss_scale": 32637.032773780975, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.023835578220163, "test_loss": 0.8398646841977389, "test_acc1": 80.728002421875, "test_acc5": 95.7140026171875, "epoch": 78, "n_parameters": 22050664}
83
+ {"train_lr": 0.0009047689899477496, "train_min_lr": 3.812875902920097e-07, "train_loss": 3.1096004920659497, "train_loss_scale": 34889.6690647482, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8488422793741445, "test_acc1": 80.81800265625, "test_acc5": 95.764002734375, "epoch": 79, "n_parameters": 22050664}
84
+ {"train_lr": 0.000895707817137937, "train_min_lr": 3.774690324233622e-07, "train_loss": 3.1019059631654877, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.017535047660724, "test_loss": 0.8357925531063372, "test_acc1": 80.854002890625, "test_acc5": 95.75400236328124, "epoch": 80, "n_parameters": 22050664}
85
+ {"train_lr": 0.0008865567211672815, "train_min_lr": 3.7361257915192135e-07, "train_loss": 3.105172979364769, "train_loss_scale": 32898.96722621902, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8514758660138109, "test_acc1": 80.86000229492187, "test_acc5": 95.76400263671874, "epoch": 81, "n_parameters": 22050664}
86
+ {"train_lr": 0.0008773184895482654, "train_min_lr": 3.6971940519071187e-07, "train_loss": 3.0993131041336213, "train_loss_scale": 33396.64268585132, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8290771848827828, "test_acc1": 80.91400255859375, "test_acc5": 95.82800294921876, "epoch": 82, "n_parameters": 22050664}
87
+ {"train_lr": 0.0008679959363357301, "train_min_lr": 3.6579069643823646e-07, "train_loss": 3.1052896066916458, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.079149070117685, "test_loss": 0.8455219753370941, "test_acc1": 80.926002578125, "test_acc5": 95.84400259765626, "epoch": 83, "n_parameters": 22050664}
88
+ {"train_lr": 0.0008585919012696982, "train_min_lr": 3.618276496172392e-07, "train_loss": 3.1048215674839432, "train_loss_scale": 32872.77378097522, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8493605303855343, "test_acc1": 80.93200241210937, "test_acc5": 95.854002421875, "epoch": 84, "n_parameters": 22050664}
89
+ {"train_lr": 0.0008491092489103445, "train_min_lr": 3.5783147191017217e-07, "train_loss": 3.0910952954317072, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.114837278660348, "test_loss": 0.8335294819060173, "test_acc1": 81.03600266601562, "test_acc5": 95.796002734375, "epoch": 85, "n_parameters": 22050664}
90
+ {"train_lr": 0.0008395508677654423, "train_min_lr": 3.538033805914778e-07, "train_loss": 3.0963496649198587, "train_loss_scale": 33003.74100719424, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8419511684479605, "test_acc1": 80.93000259765626, "test_acc5": 95.80200275390625, "epoch": 86, "n_parameters": 22050664}
91
+ {"train_lr": 0.0008299196694104811, "train_min_lr": 3.4974460265679197e-07, "train_loss": 3.097735269035367, "train_loss_scale": 34470.573940847324, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8209714103518552, "test_acc1": 80.99800223632812, "test_acc5": 95.82000283203125, "epoch": 87, "n_parameters": 22050664}
92
+ {"train_lr": 0.0008202185876017782, "train_min_lr": 3.456563744491927e-07, "train_loss": 3.0955641708023354, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.106947433081366, "test_loss": 0.8577937365488242, "test_acc1": 81.04200248046875, "test_acc5": 95.8660028515625, "epoch": 88, "n_parameters": 22050664}
93
+ {"train_lr": 0.0008104505773828307, "train_min_lr": 3.4153994128259425e-07, "train_loss": 3.095939034156853, "train_loss_scale": 33160.90167865707, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8380633360556974, "test_acc1": 80.86000243164062, "test_acc5": 95.82200251953125, "epoch": 89, "n_parameters": 22050664}
94
+ {"train_lr": 0.0008006186141841735, "train_min_lr": 3.3739655706241775e-07, "train_loss": 3.0962999736090646, "train_loss_scale": 34077.672262190245, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.14378723118612, "test_loss": 0.8147525487055305, "test_acc1": 81.1600024609375, "test_acc5": 95.87000255859375, "epoch": 90, "n_parameters": 22050664}
95
+ {"train_lr": 0.0007907256929170464, "train_min_lr": 3.3322748390363286e-07, "train_loss": 3.085644722961598, "train_loss_scale": 33789.544364508394, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8255639232069482, "test_acc1": 81.09400284179688, "test_acc5": 95.85200255859375, "epoch": 91, "n_parameters": 22050664}
96
+ {"train_lr": 0.000780774827061097, "train_min_lr": 3.2903399174631094e-07, "train_loss": 3.080168022502431, "train_loss_scale": 33475.22302158274, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8191996077999814, "test_acc1": 81.27600220703125, "test_acc5": 95.8680023046875, "epoch": 92, "n_parameters": 22050664}
97
+ {"train_lr": 0.0007707690477464657, "train_min_lr": 3.2481735796878707e-07, "train_loss": 3.091090653511546, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.108262703191938, "test_loss": 0.8320856801881135, "test_acc1": 81.1040024609375, "test_acc5": 95.86000255859375, "epoch": 93, "n_parameters": 22050664}
98
+ {"train_lr": 0.0007607114028304567, "train_min_lr": 3.2057886699855644e-07, "train_loss": 3.0838422050817216, "train_loss_scale": 34339.606714628295, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8225924629746503, "test_acc1": 81.32400263671875, "test_acc5": 95.93400259765625, "epoch": 94, "n_parameters": 22050664}
99
+ {"train_lr": 0.0007506049559691417, "train_min_lr": 3.163198099210285e-07, "train_loss": 3.079352643992975, "train_loss_scale": 32820.38689048761, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8286462451210459, "test_acc1": 81.25400248046876, "test_acc5": 95.87600265625, "epoch": 95, "n_parameters": 22050664}
100
+ {"train_lr": 0.0007404527856841385, "train_min_lr": 3.1204148408624756e-07, "train_loss": 3.0663738454417357, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.165861574961223, "test_loss": 0.8339735549369841, "test_acc1": 81.2540021484375, "test_acc5": 95.91800220703125, "epoch": 96, "n_parameters": 22050664}
101
+ {"train_lr": 0.0007302579844248633, "train_min_lr": 3.077451927137065e-07, "train_loss": 3.069855202611783, "train_loss_scale": 33291.8689048761, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8121081865925825, "test_acc1": 81.27000266601563, "test_acc5": 95.91400248046875, "epoch": 97, "n_parameters": 22050664}
102
+ {"train_lr": 0.0007200236576265395, "train_min_lr": 3.0343224449537864e-07, "train_loss": 3.0711384402762216, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.210594864105055, "test_loss": 0.825240831566221, "test_acc1": 81.20800247070312, "test_acc5": 95.86200265625, "epoch": 98, "n_parameters": 22050664}
103
+ {"train_lr": 0.0007097529227642506, "train_min_lr": 2.991039531970694e-07, "train_loss": 3.0626642130118764, "train_loss_scale": 33056.12789768186, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8430311338592121, "test_acc1": 81.15000262695312, "test_acc5": 95.95600265625, "epoch": 99, "n_parameters": 22050664}
104
+ {"train_lr": 0.0006994489084033304, "train_min_lr": 2.947616372582396e-07, "train_loss": 3.0688416049015417, "train_loss_scale": 33894.318145483616, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8255556992447103, "test_acc1": 81.14400241210937, "test_acc5": 96.0340027734375, "epoch": 100, "n_parameters": 22050664}
105
+ {"train_lr": 0.0006891147532463716, "train_min_lr": 2.90406619390388e-07, "train_loss": 3.050780116130027, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.156300986889931, "test_loss": 0.827310688168038, "test_acc1": 81.40800227539063, "test_acc5": 95.9180026953125, "epoch": 101, "n_parameters": 22050664}
106
+ {"train_lr": 0.0006787536051771449, "train_min_lr": 2.860402261741446e-07, "train_loss": 3.066515294720324, "train_loss_scale": 32951.354116706636, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8045983578412588, "test_acc1": 81.38600220703125, "test_acc5": 95.9480025, "epoch": 102, "n_parameters": 22050664}
107
+ {"train_lr": 0.0006683686203017182, "train_min_lr": 2.8166378765518265e-07, "train_loss": 3.0461722993784006, "train_loss_scale": 34182.446043165466, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.160928911823545, "test_loss": 0.8275398775821424, "test_acc1": 81.2360026953125, "test_acc5": 95.96200263671875, "epoch": 103, "n_parameters": 22050664}
108
+ {"train_lr": 0.000657962961987091, "train_min_lr": 2.772786369390694e-07, "train_loss": 3.0512575588637976, "train_loss_scale": 32951.354116706636, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8159090402244612, "test_acc1": 81.29800264648438, "test_acc5": 95.97200236328125, "epoch": 104, "n_parameters": 22050664}
109
+ {"train_lr": 0.0006475397998975875, "train_min_lr": 2.728861097851944e-07, "train_loss": 3.0388771679571014, "train_loss_scale": 33841.931254996, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8106887500932198, "test_acc1": 81.29600247070313, "test_acc5": 96.000002734375, "epoch": 105, "n_parameters": 22050664}
110
+ {"train_lr": 0.0006371023090293484, "train_min_lr": 2.6848754419987826e-07, "train_loss": 3.0599861630528187, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.226814050849775, "test_loss": 0.8289316569575826, "test_acc1": 81.26400228515625, "test_acc5": 95.944002109375, "epoch": 106, "n_parameters": 22050664}
111
+ {"train_lr": 0.0006266536687432067, "train_min_lr": 2.6408428002880863e-07, "train_loss": 3.0528295041559983, "train_loss_scale": 34784.89528377298, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8158860117879533, "test_acc1": 81.27400231445313, "test_acc5": 96.03600244140625, "epoch": 107, "n_parameters": 22050664}
112
+ {"train_lr": 0.0006161970617962079, "train_min_lr": 2.5967765854890835e-07, "train_loss": 3.0504739769071127, "train_loss_scale": 33789.544364508394, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8229372885845999, "test_acc1": 81.46200267578125, "test_acc5": 95.990002421875, "epoch": 108, "n_parameters": 22050664}
113
+ {"train_lr": 0.0006057356733721271, "train_min_lr": 2.5526902205976684e-07, "train_loss": 3.027345542070105, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.232248372215924, "test_loss": 0.8122271108263321, "test_acc1": 81.43200228515624, "test_acc5": 96.1160024609375, "epoch": 109, "n_parameters": 22050664}
114
+ {"train_lr": 0.0005952726901112141, "train_min_lr": 2.508597134747675e-07, "train_loss": 3.0434238492584913, "train_loss_scale": 32794.19344524381, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8194303642247469, "test_acc1": 81.408002578125, "test_acc5": 96.04600251953126, "epoch": 110, "n_parameters": 22050664}
115
+ {"train_lr": 0.0005848112991395342, "train_min_lr": 2.46451075912018e-07, "train_loss": 3.042934847082928, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.260109536653514, "test_loss": 0.8051694743050873, "test_acc1": 81.51600240234374, "test_acc5": 96.07000265625, "epoch": 111, "n_parameters": 22050664}
116
+ {"train_lr": 0.0005743546870981169, "train_min_lr": 2.420444522852279e-07, "train_loss": 3.032702265478534, "train_loss_scale": 33056.12789768186, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8113489394424526, "test_acc1": 81.53400251953126, "test_acc5": 96.00600236328125, "epoch": 112, "n_parameters": 22050664}
117
+ {"train_lr": 0.0005639060391722829, "train_min_lr": 2.376411848946414e-07, "train_loss": 3.0351245145288877, "train_loss_scale": 33422.83613109512, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8198269318988305, "test_acc1": 81.426002421875, "test_acc5": 95.96200251953125, "epoch": 113, "n_parameters": 22050664}
118
+ {"train_lr": 0.0005534685381214122, "train_min_lr": 2.3324261501816197e-07, "train_loss": 3.028204346374928, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.27811834890303, "test_loss": 0.81069886638918, "test_acc1": 81.58000251953125, "test_acc5": 96.02600251953125, "epoch": 114, "n_parameters": 22050664}
119
+ {"train_lr": 0.0005430453633094327, "train_min_lr": 2.2885008250278335e-07, "train_loss": 3.0253899506003643, "train_loss_scale": 33370.449240607515, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.806049257175613, "test_acc1": 81.58800263671876, "test_acc5": 95.96800296875, "epoch": 115, "n_parameters": 22050664}
120
+ {"train_lr": 0.0005326396897363642, "train_min_lr": 2.2446492535646072e-07, "train_loss": 3.0359667836905095, "train_loss_scale": 33789.544364508394, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.295544336168028, "test_loss": 0.8151890412541746, "test_acc1": 81.63800248046876, "test_acc5": 95.992002578125, "epoch": 116, "n_parameters": 22050664}
121
+ {"train_lr": 0.0005222546870711816, "train_min_lr": 2.200884793405417e-07, "train_loss": 3.032183664689342, "train_loss_scale": 32820.38689048761, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8103964433415245, "test_acc1": 81.62200271484375, "test_acc5": 96.018002578125, "epoch": 117, "n_parameters": 22050664}
122
+ {"train_lr": 0.0005118935186862963, "train_min_lr": 2.1572207756287776e-07, "train_loss": 3.0339327676118995, "train_loss_scale": 33658.577138289365, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8092327595667075, "test_acc1": 81.62000265625, "test_acc5": 96.00000244140625, "epoch": 118, "n_parameters": 22050664}
123
+ {"train_lr": 0.0005015593406939666, "train_min_lr": 2.1136705007174873e-07, "train_loss": 3.0324163059060045, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.330242880814367, "test_loss": 0.8101029439281872, "test_acc1": 81.6780025, "test_acc5": 96.0640026171875, "epoch": 119, "n_parameters": 22050664}
124
+ {"train_lr": 0.0004912553009849237, "train_min_lr": 2.0702472345071697e-07, "train_loss": 3.0192292526805047, "train_loss_scale": 33396.64268585132, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8228759642775733, "test_acc1": 81.61600251953125, "test_acc5": 96.0160023046875, "epoch": 120, "n_parameters": 22050664}
125
+ {"train_lr": 0.00048098453826947695, "train_min_lr": 2.0269642041453535e-07, "train_loss": 3.025032090125896, "train_loss_scale": 33344.25579536371, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.806169944865103, "test_acc1": 81.68000239257813, "test_acc5": 96.0380024609375, "epoch": 121, "n_parameters": 22050664}
126
+ {"train_lr": 0.0004707501811214466, "train_min_lr": 1.9838345940623907e-07, "train_loss": 3.0156086171559577, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.318051948059471, "test_loss": 0.8045097694142174, "test_acc1": 81.66600259765625, "test_acc5": 96.06600236328126, "epoch": 122, "n_parameters": 22050664}
127
+ {"train_lr": 0.00046055534702516, "train_min_lr": 1.940871541955307e-07, "train_loss": 3.012861590924785, "train_loss_scale": 32898.96722621902, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7963014402007329, "test_acc1": 81.79800248046875, "test_acc5": 96.12600251953126, "epoch": 123, "n_parameters": 22050664}
128
+ {"train_lr": 0.0004504031414258444, "train_min_lr": 1.8980881347859748e-07, "train_loss": 3.0120739103030623, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.30440220234396, "test_loss": 0.8192962963162488, "test_acc1": 81.5720023046875, "test_acc5": 96.05600279296875, "epoch": 124, "n_parameters": 22050664}
129
+ {"train_lr": 0.00044029665678367184, "train_min_lr": 1.8554974047946694e-07, "train_loss": 2.995379314005232, "train_loss_scale": 34103.86570743405, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7923603880268927, "test_acc1": 81.78000265625, "test_acc5": 96.1960027734375, "epoch": 125, "n_parameters": 22050664}
130
+ {"train_lr": 0.00043023897163176777, "train_min_lr": 1.813112325530326e-07, "train_loss": 3.009233090088522, "train_loss_scale": 33082.32134292566, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8014870383812271, "test_acc1": 81.71400271484374, "test_acc5": 96.09800259765625, "epoch": 126, "n_parameters": 22050664}
131
+ {"train_lr": 0.0004202331496384605, "train_min_lr": 1.7709458078986844e-07, "train_loss": 3.0072289171884004, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.317251107866149, "test_loss": 0.7990033218196331, "test_acc1": 81.85400255859375, "test_acc5": 96.08800248046875, "epoch": 127, "n_parameters": 22050664}
132
+ {"train_lr": 0.0004102822386740558, "train_min_lr": 1.7290106962294996e-07, "train_loss": 3.0018409578729686, "train_loss_scale": 34051.478816946445, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.803236909040058, "test_acc1": 81.64200275390625, "test_acc5": 96.18000275390625, "epoch": 128, "n_parameters": 22050664}
133
+ {"train_lr": 0.0004003892698824331, "train_min_lr": 1.6873197643640103e-07, "train_loss": 3.0117413543825813, "train_loss_scale": 33213.288569144686, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8133140640859385, "test_acc1": 81.7520021484375, "test_acc5": 96.13600275390625, "epoch": 129, "n_parameters": 22050664}
134
+ {"train_lr": 0.00039055725675772037, "train_min_lr": 1.645885711763932e-07, "train_loss": 2.99114293488953, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.347412562961106, "test_loss": 0.7981168297865918, "test_acc1": 81.86600255859375, "test_acc5": 96.12000224609375, "epoch": 130, "n_parameters": 22050664}
135
+ {"train_lr": 0.00038078919422635947, "train_min_lr": 1.604721159643075e-07, "train_loss": 2.9937791184936877, "train_loss_scale": 34130.05915267786, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8047475976343373, "test_acc1": 81.87200236328125, "test_acc5": 96.07200314453125, "epoch": 131, "n_parameters": 22050664}
136
+ {"train_lr": 0.0003710880577348243, "train_min_lr": 1.5638386471227817e-07, "train_loss": 2.9823104569094356, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.347648527982424, "test_loss": 0.7992629258687259, "test_acc1": 81.6860023828125, "test_acc5": 96.14600255859375, "epoch": 132, "n_parameters": 22050664}
137
+ {"train_lr": 0.00036145680234326677, "train_min_lr": 1.5232506274124e-07, "train_loss": 2.982239541437605, "train_loss_scale": 24818.289368505197, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7932974567622629, "test_acc1": 82.04600265625, "test_acc5": 96.2040026953125, "epoch": 133, "n_parameters": 22050664}
138
+ {"train_lr": 0.00035189836182537853, "train_min_lr": 1.482969464015921e-07, "train_loss": 2.9952435807930193, "train_loss_scale": 16384.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.361135788863416, "test_loss": 0.7917518304049513, "test_acc1": 81.8000028125, "test_acc5": 96.19200255859376, "epoch": 134, "n_parameters": 22050664}
139
+ {"train_lr": 0.0003424156477747363, "train_min_lr": 1.4430074269659297e-07, "train_loss": 2.986121036737657, "train_loss_scale": 31065.426059152676, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.324703473457806, "test_loss": 0.8021120613312903, "test_acc1": 81.93800255859375, "test_acc5": 96.1920026953125, "epoch": 135, "n_parameters": 22050664}
140
+ {"train_lr": 0.00033301154871790155, "train_min_lr": 1.4033766890860325e-07, "train_loss": 2.989560591945831, "train_loss_scale": 33187.09512390088, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.8029765313818255, "test_acc1": 81.97000291015625, "test_acc5": 96.228002578125, "epoch": 136, "n_parameters": 22050664}
141
+ {"train_lr": 0.00032368892923454574, "train_min_lr": 1.3640893222828968e-07, "train_loss": 2.9786151676631563, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.362716270770004, "test_loss": 0.7872350294398898, "test_acc1": 81.86000247070312, "test_acc5": 96.2040026953125, "epoch": 137, "n_parameters": 22050664}
142
+ {"train_lr": 0.0003144506290848743, "train_min_lr": 1.3251572938690357e-07, "train_loss": 2.9767526229984944, "train_loss_scale": 33108.514788169465, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7968782120533572, "test_acc1": 81.9400027734375, "test_acc5": 96.13400283203126, "epoch": 138, "n_parameters": 22050664}
143
+ {"train_lr": 0.0003052994623446056, "train_min_lr": 1.2865924629174526e-07, "train_loss": 2.9753187026956573, "train_loss_scale": 33213.288569144686, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.792562966128342, "test_acc1": 81.95600255859375, "test_acc5": 96.24200232421875, "epoch": 139, "n_parameters": 22050664}
144
+ {"train_lr": 0.0002962382165477797, "train_min_lr": 1.248406576649231e-07, "train_loss": 2.972105971081175, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.410739273571377, "test_loss": 0.7984495508761806, "test_acc1": 81.98800232421875, "test_acc5": 96.24400236328125, "epoch": 140, "n_parameters": 22050664}
145
+ {"train_lr": 0.000287269651837642, "train_min_lr": 1.210611266855249e-07, "train_loss": 2.969630484315131, "train_loss_scale": 33239.482014388486, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7939257250942346, "test_acc1": 82.09400216796875, "test_acc5": 96.202002734375, "epoch": 141, "n_parameters": 22050664}
146
+ {"train_lr": 0.00027839650012588654, "train_min_lr": 1.1732180463530074e-07, "train_loss": 2.963131708707169, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.373655754885228, "test_loss": 0.7922103651834809, "test_acc1": 82.056002734375, "test_acc5": 96.2580031640625, "epoch": 142, "n_parameters": 22050664}
147
+ {"train_lr": 0.0002696214642604783, "train_min_lr": 1.1362383054796988e-07, "train_loss": 2.9901772019722097, "train_loss_scale": 32977.54756195044, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7907280547700766, "test_acc1": 82.004002421875, "test_acc5": 96.23200275390624, "epoch": 143, "n_parameters": 22050664}
148
+ {"train_lr": 0.00026094721720234913, "train_min_lr": 1.099683308622625e-07, "train_loss": 2.9724335675950435, "train_loss_scale": 33606.19024780176, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.794846570446291, "test_acc1": 82.08000240234375, "test_acc5": 96.26200263671875, "epoch": 144, "n_parameters": 22050664}
149
+ {"train_lr": 0.0002523764012111823, "train_min_lr": 1.0635641907879468e-07, "train_loss": 2.9746354275184284, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.435641677545416, "test_loss": 0.7899798398254482, "test_acc1": 82.1240023046875, "test_acc5": 96.18200275390625, "epoch": 145, "n_parameters": 22050664}
150
+ {"train_lr": 0.0002439116270405546, "train_min_lr": 1.0278919542088544e-07, "train_loss": 2.9593609390165403, "train_loss_scale": 33396.64268585132, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7913820301075928, "test_acc1": 82.110002734375, "test_acc5": 96.16800248046874, "epoch": 146, "n_parameters": 22050664}
151
+ {"train_lr": 0.00023555547314267977, "train_min_lr": 9.926774649941659e-08, "train_loss": 2.9662592806499735, "train_loss_scale": 32898.96722621902, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.374161851777734, "test_loss": 0.7895655076922351, "test_acc1": 82.14600255859375, "test_acc5": 96.26400251953125, "epoch": 147, "n_parameters": 22050664}
152
+ {"train_lr": 0.0002273104848829826, "train_min_lr": 9.579314498184329e-08, "train_loss": 2.956444854239861, "train_loss_scale": 32898.96722621902, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7937538271186916, "test_acc1": 82.05000248046875, "test_acc5": 96.2780027734375, "epoch": 148, "n_parameters": 22050664}
153
+ {"train_lr": 0.00021917917376475845, "train_min_lr": 9.236644926544675e-08, "train_loss": 2.960075378155918, "train_loss_scale": 33579.99680255795, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7853776978627416, "test_acc1": 82.15600263671875, "test_acc5": 96.26400259765624, "epoch": 149, "n_parameters": 22050664}
154
+ {"train_lr": 0.00021116401666414306, "train_min_lr": 8.898870315493756e-08, "train_loss": 2.960477837793928, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.412221198078159, "test_loss": 0.7872169410226909, "test_acc1": 82.11400255859375, "test_acc5": 96.1900025, "epoch": 150, "n_parameters": 22050664}
155
+ {"train_lr": 0.00020326745507563424, "train_min_lr": 8.566093554450148e-08, "train_loss": 2.963735546115682, "train_loss_scale": 33449.02957633893, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7846469998587179, "test_acc1": 82.17200265625, "test_acc5": 96.1900026953125, "epoch": 151, "n_parameters": 22050664}
156
+ {"train_lr": 0.00019549189436838765, "train_min_lr": 8.238416010438994e-08, "train_loss": 2.9542939141214037, "train_loss_scale": 32846.580335731414, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7945135463285082, "test_acc1": 82.13800231445312, "test_acc5": 96.274002421875, "epoch": 152, "n_parameters": 22050664}
157
+ {"train_lr": 0.00018783970305351788, "train_min_lr": 7.915937497214438e-08, "train_loss": 2.9590861505979924, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.42312776175239, "test_loss": 0.7894475546501974, "test_acc1": 82.18800255859375, "test_acc5": 96.24800248046876, "epoch": 153, "n_parameters": 22050664}
158
+ {"train_lr": 0.00018031321206262802, "train_min_lr": 7.598756244855594e-08, "train_loss": 2.9545637550685617, "train_loss_scale": 32872.77378097522, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7832233556581818, "test_acc1": 82.2800026953125, "test_acc5": 96.26000232421875, "epoch": 154, "n_parameters": 22050664}
159
+ {"train_lr": 0.00017291471403778477, "train_min_lr": 7.286968869844474e-08, "train_loss": 2.9411257664314943, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.460535157308113, "test_loss": 0.7818146299087364, "test_acc1": 82.31200263671874, "test_acc5": 96.2800026171875, "epoch": 155, "n_parameters": 22050664}
160
+ {"train_lr": 0.000165646462633157, "train_min_lr": 6.980670345635877e-08, "train_loss": 2.946246955415709, "train_loss_scale": 33318.0623501199, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7851437261996378, "test_acc1": 82.33400255859375, "test_acc5": 96.21200240234376, "epoch": 156, "n_parameters": 22050664}
161
+ {"train_lr": 0.0001585106718285323, "train_min_lr": 6.67995397372748e-08, "train_loss": 2.9569651318682757, "train_loss_scale": 33763.35091926459, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7856416392872352, "test_acc1": 82.270002265625, "test_acc5": 96.2160025390625, "epoch": 157, "n_parameters": 22050664}
162
+ {"train_lr": 0.00015150951525491518, "train_min_lr": 6.384911355239218e-08, "train_loss": 2.943816006088333, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.412836007362932, "test_loss": 0.7906035667157355, "test_acc1": 82.2500026171875, "test_acc5": 96.278002265625, "epoch": 158, "n_parameters": 22050664}
163
+ {"train_lr": 0.00014464512553242017, "train_min_lr": 6.095632363010868e-08, "train_loss": 2.936321774380003, "train_loss_scale": 33108.514788169465, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7884338106817872, "test_acc1": 82.33600267578124, "test_acc5": 96.27600263671874, "epoch": 159, "n_parameters": 22050664}
164
+ {"train_lr": 0.00013791959362065309, "train_min_lr": 5.812205114225775e-08, "train_loss": 2.9479614004290267, "train_loss_scale": 32951.354116706636, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7865795683087283, "test_acc1": 82.33600251953125, "test_acc5": 96.28400267578125, "epoch": 160, "n_parameters": 22050664}
165
+ {"train_lr": 0.00013133496818178628, "train_min_lr": 5.534715943569533e-08, "train_loss": 2.9334283141399937, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.39178083078276, "test_loss": 0.7734620242400934, "test_acc1": 82.3400026171875, "test_acc5": 96.28600275390625, "epoch": 161, "n_parameters": 22050664}
166
+ {"train_lr": 0.00012489325495651496, "train_min_lr": 5.2632493769315933e-08, "train_loss": 2.9349434450328302, "train_loss_scale": 32820.38689048761, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7805654825145052, "test_acc1": 82.2200024609375, "test_acc5": 96.296002578125, "epoch": 162, "n_parameters": 22050664}
167
+ {"train_lr": 0.00011859641615309004, "train_min_lr": 4.997888105657931e-08, "train_loss": 2.9322823003279885, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.401194008324834, "test_loss": 0.7856668759393328, "test_acc1": 82.330002421875, "test_acc5": 96.29000220703125, "epoch": 163, "n_parameters": 22050664}
168
+ {"train_lr": 0.0001124463698496093, "train_min_lr": 4.738712961362396e-08, "train_loss": 2.933759910835446, "train_loss_scale": 34418.18705035971, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7772776178503764, "test_acc1": 82.33600267578124, "test_acc5": 96.33000259765625, "epoch": 164, "n_parameters": 22050664}
169
+ {"train_lr": 0.00010644498940975462, "train_min_lr": 4.4858028913046215e-08, "train_loss": 2.940950796353541, "train_loss_scale": 33029.93445243805, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.780951201233245, "test_acc1": 82.37200224609376, "test_acc5": 96.3480023046875, "epoch": 165, "n_parameters": 22050664}
170
+ {"train_lr": 0.0001005941029121438, "train_min_lr": 4.2392349343419386e-08, "train_loss": 2.930712493108236, "train_loss_scale": 28668.725819344523, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7806092898809273, "test_acc1": 82.38400234375, "test_acc5": 96.2840025, "epoch": 166, "n_parameters": 22050664}
171
+ {"train_lr": 9.489549259348315e-05, "train_min_lr": 3.999084197462592e-08, "train_loss": 2.9364966816134115, "train_loss_scale": 16384.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.445717598132187, "test_loss": 0.7754470311275875, "test_acc1": 82.48000236328124, "test_acc5": 96.31200236328125, "epoch": 167, "n_parameters": 22050664}
172
+ {"train_lr": 8.935089430567914e-05, "train_min_lr": 3.7654238329074395e-08, "train_loss": 2.9296442612612563, "train_loss_scale": 27057.82893685052, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.419443316501583, "test_loss": 0.7821521736283339, "test_acc1": 82.4620023828125, "test_acc5": 96.30800251953124, "epoch": 168, "n_parameters": 22050664}
173
+ {"train_lr": 8.396199698708256e-05, "train_min_lr": 3.538325015887036e-08, "train_loss": 2.9280806060889355, "train_loss_scale": 33579.99680255795, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7808758036325906, "test_acc1": 82.50800283203125, "test_acc5": 96.31400228515625, "epoch": 169, "n_parameters": 22050664}
174
+ {"train_lr": 7.873044214801955e-05, "train_min_lr": 3.31785692290099e-08, "train_loss": 2.9233740523612375, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.401496309742367, "test_loss": 0.7729201214459106, "test_acc1": 82.4900024609375, "test_acc5": 96.3140024609375, "epoch": 170, "n_parameters": 22050664}
175
+ {"train_lr": 7.365782337077339e-05, "train_min_lr": 3.104086710666173e-08, "train_loss": 2.9217132557924987, "train_loss_scale": 33815.7378097522, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7769573926925659, "test_acc1": 82.5280025390625, "test_acc5": 96.3180024609375, "epoch": 171, "n_parameters": 22050664}
176
+ {"train_lr": 6.874568582416325e-05, "train_min_lr": 2.897079495660051e-08, "train_loss": 2.937382745871441, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.398904760392735, "test_loss": 0.7806921281659877, "test_acc1": 82.47600240234375, "test_acc5": 96.34400228515625, "epoch": 172, "n_parameters": 22050664}
177
+ {"train_lr": 6.399552579287093e-05, "train_min_lr": 2.6968983342856424e-08, "train_loss": 2.929157340102535, "train_loss_scale": 32898.96722621902, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7735310486937297, "test_acc1": 82.55400248046875, "test_acc5": 96.314002578125, "epoch": 173, "n_parameters": 22050664}
178
+ {"train_lr": 5.940879022165765e-05, "train_min_lr": 2.5036042036639032e-08, "train_loss": 2.9299022748077705, "train_loss_scale": 32794.19344524381, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7715491538739386, "test_acc1": 82.45800240234375, "test_acc5": 96.3480025390625, "epoch": 174, "n_parameters": 22050664}
179
+ {"train_lr": 5.4986876274611e-05, "train_min_lr": 2.317255983059502e-08, "train_loss": 2.9233135629615052, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.413696555877856, "test_loss": 0.7740811567952615, "test_acc1": 82.46400240234375, "test_acc5": 96.30400267578125, "epoch": 175, "n_parameters": 22050664}
180
+ {"train_lr": 5.073113090955539e-05, "train_min_lr": 2.1379104359456324e-08, "train_loss": 2.917001597982326, "train_loss_scale": 33579.99680255795, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7737324766757834, "test_acc1": 82.48200283203126, "test_acc5": 96.30000240234375, "epoch": 176, "n_parameters": 22050664}
181
+ {"train_lr": 4.664285046775539e-05, "train_min_lr": 1.9656221927132977e-08, "train_loss": 2.9184631969717194, "train_loss_scale": 24739.709032773782, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7749518234538668, "test_acc1": 82.54200283203124, "test_acc5": 96.298002734375, "epoch": 177, "n_parameters": 22050664}
182
+ {"train_lr": 4.272328027903755e-05, "train_min_lr": 1.800443734030389e-08, "train_loss": 2.9138236542781004, "train_loss_scale": 16384.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.454740936331135, "test_loss": 0.7744512380534456, "test_acc1": 82.50600236328125, "test_acc5": 96.32200271484375, "epoch": 178, "n_parameters": 22050664}
183
+ {"train_lr": 3.897361428245112e-05, "train_min_lr": 1.6424253748555405e-08, "train_loss": 2.92250551984941, "train_loss_scale": 30986.845723421262, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.47123324061088, "test_loss": 0.7706019772600582, "test_acc1": 82.512002265625, "test_acc5": 96.34000263671875, "epoch": 179, "n_parameters": 22050664}
184
+ {"train_lr": 3.539499466258256e-05, "train_min_lr": 1.4916152491117093e-08, "train_loss": 2.9254671096515885, "train_loss_scale": 33029.93445243805, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7771022417618119, "test_acc1": 82.5180025390625, "test_acc5": 96.31000279296875, "epoch": 180, "n_parameters": 22050664}
185
+ {"train_lr": 3.198851150163483e-05, "train_min_lr": 1.3480592950241282e-08, "train_loss": 2.9119572204937465, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.471226318277044, "test_loss": 0.7712065502432467, "test_acc1": 82.48400271484375, "test_acc5": 96.33200263671876, "epoch": 181, "n_parameters": 22050664}
186
+ {"train_lr": 2.8755202447378124e-05, "train_min_lr": 1.2118012411270712e-08, "train_loss": 2.919789870032113, "train_loss_scale": 34234.83293365308, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7697338130865389, "test_acc1": 82.48600255859375, "test_acc5": 96.33800251953124, "epoch": 182, "n_parameters": 22050664}
187
+ {"train_lr": 2.5696052397072256e-05, "train_min_lr": 1.0828825929437207e-08, "train_loss": 2.9249196721733712, "train_loss_scale": 33894.318145483616, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7702420989748175, "test_acc1": 82.55000263671874, "test_acc5": 96.33600251953125, "epoch": 183, "n_parameters": 22050664}
188
+ {"train_lr": 2.281199319745709e-05, "train_min_lr": 9.613426203431694e-09, "train_loss": 2.920290900744218, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.458990007472172, "test_loss": 0.773264969919474, "test_acc1": 82.5820022265625, "test_acc5": 96.36400244140626, "epoch": 184, "n_parameters": 22050664}
189
+ {"train_lr": 2.0103903360903355e-05, "train_min_lr": 8.472183455784596e-09, "train_loss": 2.922189742731724, "train_loss_scale": 32846.580335731414, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7718835865495769, "test_acc1": 82.592002578125, "test_acc5": 96.3600024609375, "epoch": 185, "n_parameters": 22050664}
190
+ {"train_lr": 1.7572607797809112e-05, "train_min_lr": 7.40544532009231e-09, "train_loss": 2.926638008521901, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.471824156199332, "test_loss": 0.7708597436888527, "test_acc1": 82.61600263671875, "test_acc5": 96.3660024609375, "epoch": 186, "n_parameters": 22050664}
191
+ {"train_lr": 1.521887756532412e-05, "train_min_lr": 6.4135367351246844e-09, "train_loss": 2.9166630273290295, "train_loss_scale": 33606.19024780176, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7691572500776699, "test_acc1": 82.57600287109375, "test_acc5": 96.35200259765625, "epoch": 187, "n_parameters": 22050664}
192
+ {"train_lr": 1.3043429632478247e-05, "train_min_lr": 5.496759845845538e-09, "train_loss": 2.9169099446919136, "train_loss_scale": 33291.8689048761, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7696331463014806, "test_acc1": 82.5800026953125, "test_acc5": 96.3440025, "epoch": 188, "n_parameters": 22050664}
193
+ {"train_lr": 1.1046926661785898e-05, "train_min_lr": 4.655393911376369e-09, "train_loss": 2.927640399177202, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.465344116460981, "test_loss": 0.7706647268460907, "test_acc1": 82.564002578125, "test_acc5": 96.368002265625, "epoch": 189, "n_parameters": 22050664}
194
+ {"train_lr": 9.229976807392545e-06, "train_min_lr": 3.889695219931303e-09, "train_loss": 2.921989344078288, "train_loss_scale": 33658.577138289365, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7712542056354857, "test_acc1": 82.56800240234375, "test_acc5": 96.35400251953125, "epoch": 190, "n_parameters": 22050664}
195
+ {"train_lr": 7.593133529824991e-06, "train_min_lr": 3.1998970107492513e-09, "train_loss": 2.917872938344614, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.447068836858614, "test_loss": 0.7700103600971572, "test_acc1": 82.57000255859376, "test_acc5": 96.3560025, "epoch": 191, "n_parameters": 22050664}
196
+ {"train_lr": 6.136895427402006e-06, "train_min_lr": 2.5862094030469547e-09, "train_loss": 2.90741600762073, "train_loss_scale": 33449.02957633893, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7708926533014719, "test_acc1": 82.5620025, "test_acc5": 96.35400251953125, "epoch": 192, "n_parameters": 22050664}
197
+ {"train_lr": 4.861706084356536e-06, "train_min_lr": 2.048819332014636e-09, "train_loss": 2.9134080779495286, "train_loss_scale": 33134.70823341327, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7696356020355952, "test_acc1": 82.56000283203124, "test_acc5": 96.36200232421875, "epoch": 193, "n_parameters": 22050664}
198
+ {"train_lr": 3.7679539357154917e-06, "train_min_lr": 1.5878904918737507e-09, "train_loss": 2.9181919657259727, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.45660332657641, "test_loss": 0.771700682075879, "test_acc1": 82.5820026171875, "test_acc5": 96.35200236328124, "epoch": 194, "n_parameters": 22050664}
199
+ {"train_lr": 2.8559721489787542e-06, "train_min_lr": 1.2035632860141317e-09, "train_loss": 2.9135249109624577, "train_loss_scale": 33894.318145483616, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7703310206873726, "test_acc1": 82.56000248046875, "test_acc5": 96.3620025, "epoch": 195, "n_parameters": 22050664}
200
+ {"train_lr": 2.1260385226331358e-06, "train_min_lr": 8.959547842257337e-10, "train_loss": 2.9228672822125907, "train_loss_scale": 32794.19344524381, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7709041916914569, "test_acc1": 82.5720025390625, "test_acc5": 96.3540024609375, "epoch": 196, "n_parameters": 22050664}
201
+ {"train_lr": 1.5783754015322459e-06, "train_min_lr": 6.651586870380765e-10, "train_loss": 2.904090182148486, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.401327879118215, "test_loss": 0.769470669386041, "test_acc1": 82.56800251953125, "test_acc5": 96.3500025, "epoch": 197, "n_parameters": 22050664}
202
+ {"train_lr": 1.2131496091681044e-06, "train_min_lr": 5.112452971781352e-10, "train_loss": 2.9214885762269547, "train_loss_scale": 32820.38689048761, "train_weight_decay": 0.049999999999998865, "train_grad_norm": Infinity, "test_loss": 0.7701320882516963, "test_acc1": 82.5560028125, "test_acc5": 96.3540024609375, "epoch": 198, "n_parameters": 22050664}
203
+ {"train_lr": 1.0304723968550897e-06, "train_min_lr": 4.34261498155455e-10, "train_loss": 2.9054273682580196, "train_loss_scale": 32768.0, "train_weight_decay": 0.049999999999998865, "train_grad_norm": 8.406738405128559, "test_loss": 0.7702368496711017, "test_acc1": 82.5660025390625, "test_acc5": 96.3600024609375, "epoch": 199, "n_parameters": 22050664}
pretrained models/vit_small_800ep/linear_probing_checkpoint.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e653c623d07e71375bc763f59c47b395744e0ab1aaef4a48abb56f431aadd08
3
+ size 12298179
pretrained models/vit_small_800ep/linear_probing_log.txt ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_lr": 0.039999999999999286, "train_loss": 1.689545123293377, "epoch": 0, "test_loss": 1.1036953158600598, "test_acc1": 72.69, "test_acc5": 91.494}
2
+ {"train_lr": 0.03999013120731347, "train_loss": 1.517477451660001, "epoch": 1, "test_loss": 1.0695886803524834, "test_acc1": 73.704, "test_acc5": 91.876}
3
+ {"train_lr": 0.03996053456856475, "train_loss": 1.4851327343775442, "epoch": 2, "test_loss": 1.068002465846283, "test_acc1": 73.594, "test_acc5": 91.892}
4
+ {"train_lr": 0.03991123929206054, "train_loss": 1.4702438482413658, "epoch": 3, "test_loss": 1.0439041412378451, "test_acc1": 74.742, "test_acc5": 92.066}
5
+ {"train_lr": 0.03984229402628841, "train_loss": 1.458547806575561, "epoch": 4, "test_loss": 1.026508611493877, "test_acc1": 74.916, "test_acc5": 92.102}
6
+ {"train_lr": 0.03975376681190351, "train_loss": 1.4446705719366812, "epoch": 5, "test_loss": 1.0384959582315416, "test_acc1": 74.518, "test_acc5": 92.134}
7
+ {"train_lr": 0.03964574501457449, "train_loss": 1.4387426459774995, "epoch": 6, "test_loss": 1.0347914073084081, "test_acc1": 74.702, "test_acc5": 92.184}
8
+ {"train_lr": 0.03951833523877471, "train_loss": 1.4285309173333378, "epoch": 7, "test_loss": 1.0441394304590566, "test_acc1": 74.266, "test_acc5": 92.12}
9
+ {"train_lr": 0.03937166322257292, "train_loss": 1.4219058672198281, "epoch": 8, "test_loss": 1.0304473516892414, "test_acc1": 74.598, "test_acc5": 92.338}
10
+ {"train_lr": 0.03920587371353957, "train_loss": 1.4172617367828806, "epoch": 9, "test_loss": 1.0281844393787336, "test_acc1": 74.688, "test_acc5": 92.284}
11
+ {"train_lr": 0.03902113032590416, "train_loss": 1.4104318962024804, "epoch": 10, "test_loss": 1.0183889675809412, "test_acc1": 74.81, "test_acc5": 92.226}
12
+ {"train_lr": 0.03881761537908567, "train_loss": 1.4080614700984841, "epoch": 11, "test_loss": 1.0225087730023934, "test_acc1": 74.956, "test_acc5": 92.448}
13
+ {"train_lr": 0.038595529717765475, "train_loss": 1.400154372171377, "epoch": 12, "test_loss": 1.0064463303709517, "test_acc1": 75.28, "test_acc5": 92.508}
14
+ {"train_lr": 0.03835509251368071, "train_loss": 1.39751422834901, "epoch": 13, "test_loss": 1.008302801307671, "test_acc1": 75.172, "test_acc5": 92.514}
15
+ {"train_lr": 0.03809654104932037, "train_loss": 1.391106969917925, "epoch": 14, "test_loss": 1.0169777056316331, "test_acc1": 74.974, "test_acc5": 92.354}
16
+ {"train_lr": 0.03782013048376655, "train_loss": 1.3887073723867107, "epoch": 15, "test_loss": 0.9981068696978749, "test_acc1": 75.262, "test_acc5": 92.588}
17
+ {"train_lr": 0.0375261336008776, "train_loss": 1.3799249568162635, "epoch": 16, "test_loss": 1.001631004621788, "test_acc1": 75.402, "test_acc5": 92.398}
18
+ {"train_lr": 0.0372148405400789, "train_loss": 1.3750834800064944, "epoch": 17, "test_loss": 1.0064559562169775, "test_acc1": 75.348, "test_acc5": 92.51}
19
+ {"train_lr": 0.03688655851004074, "train_loss": 1.3729945748401717, "epoch": 18, "test_loss": 1.0036106057541103, "test_acc1": 75.326, "test_acc5": 92.65}
20
+ {"train_lr": 0.03654161148549013, "train_loss": 1.3666629909921568, "epoch": 19, "test_loss": 0.9895373914314776, "test_acc1": 75.56, "test_acc5": 92.642}
21
+ {"train_lr": 0.03618033988749922, "train_loss": 1.3631684546010754, "epoch": 20, "test_loss": 0.9936697570112895, "test_acc1": 75.37, "test_acc5": 92.628}
22
+ {"train_lr": 0.035803100247513146, "train_loss": 1.35827666853135, "epoch": 21, "test_loss": 0.988680186016219, "test_acc1": 75.384, "test_acc5": 92.688}
23
+ {"train_lr": 0.03541026485551477, "train_loss": 1.352964636985772, "epoch": 22, "test_loss": 0.991098478892628, "test_acc1": 75.456, "test_acc5": 92.686}
24
+ {"train_lr": 0.035002221392609514, "train_loss": 1.3478119215978601, "epoch": 23, "test_loss": 0.9867509590575889, "test_acc1": 75.55, "test_acc5": 92.624}
25
+ {"train_lr": 0.03457937254842843, "train_loss": 1.342876697786319, "epoch": 24, "test_loss": 0.9745037979161253, "test_acc1": 75.82, "test_acc5": 92.844}
26
+ {"train_lr": 0.03414213562373142, "train_loss": 1.3372451914861179, "epoch": 25, "test_loss": 0.9822182805866612, "test_acc1": 75.526, "test_acc5": 92.856}
27
+ {"train_lr": 0.03369094211857451, "train_loss": 1.3362941288957582, "epoch": 26, "test_loss": 0.972987236265017, "test_acc1": 75.824, "test_acc5": 92.866}
28
+ {"train_lr": 0.0332262373064724, "train_loss": 1.3311171164385045, "epoch": 27, "test_loss": 0.9720645779933856, "test_acc1": 75.78, "test_acc5": 92.906}
29
+ {"train_lr": 0.03274847979497452, "train_loss": 1.325082043096566, "epoch": 28, "test_loss": 0.9673444857554776, "test_acc1": 75.916, "test_acc5": 92.886}
30
+ {"train_lr": 0.0322581410730585, "train_loss": 1.318458106273565, "epoch": 29, "test_loss": 0.9577055127188867, "test_acc1": 75.986, "test_acc5": 92.948}
31
+ {"train_lr": 0.03175570504584898, "train_loss": 1.313698449609474, "epoch": 30, "test_loss": 0.9652754348151538, "test_acc1": 76.16, "test_acc5": 92.866}
32
+ {"train_lr": 0.0312416675570427, "train_loss": 1.3116067488091632, "epoch": 31, "test_loss": 0.9613683790120543, "test_acc1": 76.156, "test_acc5": 92.992}
33
+ {"train_lr": 0.030716535899579463, "train_loss": 1.3072730516925597, "epoch": 32, "test_loss": 0.9594878790025808, "test_acc1": 76.284, "test_acc5": 92.818}
34
+ {"train_lr": 0.03018082831500789, "train_loss": 1.3039345941580713, "epoch": 33, "test_loss": 0.9494450887170981, "test_acc1": 76.558, "test_acc5": 92.958}
35
+ {"train_lr": 0.029635073482033827, "train_loss": 1.297146888253407, "epoch": 34, "test_loss": 0.9429929582972307, "test_acc1": 76.414, "test_acc5": 93.052}
36
+ {"train_lr": 0.02907980999479069, "train_loss": 1.2918728207365964, "epoch": 35, "test_loss": 0.952386612300666, "test_acc1": 76.178, "test_acc5": 93.042}
37
+ {"train_lr": 0.028515585831301034, "train_loss": 1.2884250930227792, "epoch": 36, "test_loss": 0.9462574858370484, "test_acc1": 76.444, "test_acc5": 93.1}
38
+ {"train_lr": 0.027942957812695356, "train_loss": 1.2840186556021627, "epoch": 37, "test_loss": 0.9460784324958008, "test_acc1": 76.094, "test_acc5": 93.11}
39
+ {"train_lr": 0.027362491053693778, "train_loss": 1.2778716668629417, "epoch": 38, "test_loss": 0.9403435928100834, "test_acc1": 76.294, "test_acc5": 93.006}
40
+ {"train_lr": 0.026774758404905383, "train_loss": 1.2729159116197508, "epoch": 39, "test_loss": 0.9281357269049907, "test_acc1": 76.456, "test_acc5": 93.178}
41
+ {"train_lr": 0.02618033988749931, "train_loss": 1.2710548778073476, "epoch": 40, "test_loss": 0.9318778451380073, "test_acc1": 76.654, "test_acc5": 93.054}
42
+ {"train_lr": 0.025579822120785283, "train_loss": 1.2680727961107183, "epoch": 41, "test_loss": 0.9226416005771987, "test_acc1": 76.732, "test_acc5": 93.272}
43
+ {"train_lr": 0.024973797743296628, "train_loss": 1.2612833142899478, "epoch": 42, "test_loss": 0.9267138875640777, "test_acc1": 76.65, "test_acc5": 93.178}
44
+ {"train_lr": 0.024362864827931218, "train_loss": 1.2554466980405319, "epoch": 43, "test_loss": 0.9270109015764022, "test_acc1": 76.604, "test_acc5": 93.252}
45
+ {"train_lr": 0.02374762629171483, "train_loss": 1.2551541850446892, "epoch": 44, "test_loss": 0.9233079135646017, "test_acc1": 76.602, "test_acc5": 93.228}
46
+ {"train_lr": 0.023128689300804688, "train_loss": 1.2467530475328334, "epoch": 45, "test_loss": 0.9222866944132411, "test_acc1": 76.572, "test_acc5": 93.314}
47
+ {"train_lr": 0.022506664671286574, "train_loss": 1.2423737958525887, "epoch": 46, "test_loss": 0.9160688839168573, "test_acc1": 76.806, "test_acc5": 93.312}
48
+ {"train_lr": 0.021882166266370087, "train_loss": 1.2395352902003942, "epoch": 47, "test_loss": 0.9117495895040278, "test_acc1": 76.906, "test_acc5": 93.356}
49
+ {"train_lr": 0.02125581039058589, "train_loss": 1.234340686553393, "epoch": 48, "test_loss": 0.9103791954337942, "test_acc1": 77.078, "test_acc5": 93.3}
50
+ {"train_lr": 0.020628215181562718, "train_loss": 1.229273920456251, "epoch": 49, "test_loss": 0.9123710898340357, "test_acc1": 76.818, "test_acc5": 93.374}
51
+ {"train_lr": 0.019999999999999643, "train_loss": 1.2242142646171796, "epoch": 50, "test_loss": 0.8978723425190059, "test_acc1": 77.018, "test_acc5": 93.436}
52
+ {"train_lr": 0.019371784818437987, "train_loss": 1.222054805463781, "epoch": 51, "test_loss": 0.89581045439961, "test_acc1": 77.412, "test_acc5": 93.464}
53
+ {"train_lr": 0.01874418960941403, "train_loss": 1.2172422857401661, "epoch": 52, "test_loss": 0.8990671562449056, "test_acc1": 77.198, "test_acc5": 93.38}
54
+ {"train_lr": 0.018117833733629834, "train_loss": 1.212679974318217, "epoch": 53, "test_loss": 0.8933714143171603, "test_acc1": 77.154, "test_acc5": 93.568}
55
+ {"train_lr": 0.01749333532871361, "train_loss": 1.2107014010984676, "epoch": 54, "test_loss": 0.8994917123749548, "test_acc1": 76.984, "test_acc5": 93.422}
56
+ {"train_lr": 0.016871310699195462, "train_loss": 1.2044116849192796, "epoch": 55, "test_loss": 0.887119930997795, "test_acc1": 77.466, "test_acc5": 93.474}
57
+ {"train_lr": 0.01625237370828576, "train_loss": 1.2036337877710026, "epoch": 56, "test_loss": 0.8806577618328892, "test_acc1": 77.352, "test_acc5": 93.638}
58
+ {"train_lr": 0.01563713517206934, "train_loss": 1.197582883147386, "epoch": 57, "test_loss": 0.8812796033310647, "test_acc1": 77.502, "test_acc5": 93.568}
59
+ {"train_lr": 0.01502620225670273, "train_loss": 1.1929604907909903, "epoch": 58, "test_loss": 0.8784429269785784, "test_acc1": 77.658, "test_acc5": 93.546}
60
+ {"train_lr": 0.014420177879215313, "train_loss": 1.1914965074402266, "epoch": 59, "test_loss": 0.8732185930165709, "test_acc1": 77.722, "test_acc5": 93.72}
61
+ {"train_lr": 0.013819660112501138, "train_loss": 1.186932871850154, "epoch": 60, "test_loss": 0.8700835025508185, "test_acc1": 77.706, "test_acc5": 93.634}
62
+ {"train_lr": 0.013225241595094335, "train_loss": 1.1848809746341011, "epoch": 61, "test_loss": 0.8694420481974982, "test_acc1": 77.774, "test_acc5": 93.678}
63
+ {"train_lr": 0.012637508946306743, "train_loss": 1.179342179419324, "epoch": 62, "test_loss": 0.8692531595576782, "test_acc1": 77.85, "test_acc5": 93.704}
64
+ {"train_lr": 0.012057042187304634, "train_loss": 1.175780507691276, "epoch": 63, "test_loss": 0.8689137622333911, "test_acc1": 77.756, "test_acc5": 93.658}
65
+ {"train_lr": 0.011484414168698694, "train_loss": 1.172104443057467, "epoch": 64, "test_loss": 0.8636522350110569, "test_acc1": 77.876, "test_acc5": 93.678}
66
+ {"train_lr": 0.010920190005208911, "train_loss": 1.1695272167103168, "epoch": 65, "test_loss": 0.8637410975536521, "test_acc1": 77.994, "test_acc5": 93.664}
67
+ {"train_lr": 0.010364926517965535, "train_loss": 1.1662103381995765, "epoch": 66, "test_loss": 0.8614302010712575, "test_acc1": 78.008, "test_acc5": 93.788}
68
+ {"train_lr": 0.00981917168499237, "train_loss": 1.1647515781342792, "epoch": 67, "test_loss": 0.8600807056135061, "test_acc1": 77.726, "test_acc5": 93.744}
69
+ {"train_lr": 0.00928346410042021, "train_loss": 1.159875242795331, "epoch": 68, "test_loss": 0.8532888666935721, "test_acc1": 78.158, "test_acc5": 93.818}
70
+ {"train_lr": 0.008758332442957144, "train_loss": 1.1573488503075635, "epoch": 69, "test_loss": 0.854381477194173, "test_acc1": 78.05, "test_acc5": 93.822}
71
+ {"train_lr": 0.008244294954150596, "train_loss": 1.1554011463904723, "epoch": 70, "test_loss": 0.8507190158932793, "test_acc1": 78.158, "test_acc5": 93.83}
72
+ {"train_lr": 0.007741858926940615, "train_loss": 1.1529827388092733, "epoch": 71, "test_loss": 0.8462771603419464, "test_acc1": 78.36, "test_acc5": 93.838}
73
+ {"train_lr": 0.0072515202050260266, "train_loss": 1.1478681866162883, "epoch": 72, "test_loss": 0.8463714151282091, "test_acc1": 78.258, "test_acc5": 93.838}
74
+ {"train_lr": 0.006773762693526923, "train_loss": 1.1450907730113584, "epoch": 73, "test_loss": 0.8473175518518808, "test_acc1": 78.332, "test_acc5": 93.848}
75
+ {"train_lr": 0.006309057881426436, "train_loss": 1.1427524263580766, "epoch": 74, "test_loss": 0.8456551166517394, "test_acc1": 78.426, "test_acc5": 93.892}
76
+ {"train_lr": 0.005857864376269011, "train_loss": 1.1430270793957833, "epoch": 75, "test_loss": 0.8414309409884165, "test_acc1": 78.346, "test_acc5": 93.88}
77
+ {"train_lr": 0.005420627451571633, "train_loss": 1.1378088079083462, "epoch": 76, "test_loss": 0.8429250161410594, "test_acc1": 78.44, "test_acc5": 93.884}
78
+ {"train_lr": 0.00499777860739071, "train_loss": 1.1345869605414594, "epoch": 77, "test_loss": 0.8386802897617525, "test_acc1": 78.45, "test_acc5": 93.85}
79
+ {"train_lr": 0.004589735144484149, "train_loss": 1.1309629369039125, "epoch": 78, "test_loss": 0.8381855641016547, "test_acc1": 78.526, "test_acc5": 93.942}
80
+ {"train_lr": 0.004196899752486165, "train_loss": 1.1317583032952139, "epoch": 79, "test_loss": 0.8392620356183271, "test_acc1": 78.576, "test_acc5": 93.89}
81
+ {"train_lr": 0.003819660112501032, "train_loss": 1.1334123895310175, "epoch": 80, "test_loss": 0.8373313920610413, "test_acc1": 78.5, "test_acc5": 93.926}
82
+ {"train_lr": 0.003458388514508749, "train_loss": 1.1301428636732382, "epoch": 81, "test_loss": 0.8361538065863507, "test_acc1": 78.47, "test_acc5": 93.892}
83
+ {"train_lr": 0.0031134414899596464, "train_loss": 1.1248956719597878, "epoch": 82, "test_loss": 0.8356027485308598, "test_acc1": 78.524, "test_acc5": 93.904}
84
+ {"train_lr": 0.00278515945992109, "train_loss": 1.1253422215081061, "epoch": 83, "test_loss": 0.8342346918522096, "test_acc1": 78.644, "test_acc5": 93.942}
85
+ {"train_lr": 0.002473866399122801, "train_loss": 1.1204191554373446, "epoch": 84, "test_loss": 0.8346123450188613, "test_acc1": 78.66, "test_acc5": 93.992}
86
+ {"train_lr": 0.0021798695162326912, "train_loss": 1.120181971274245, "epoch": 85, "test_loss": 0.832755912247361, "test_acc1": 78.746, "test_acc5": 93.956}
87
+ {"train_lr": 0.0019034589506796355, "train_loss": 1.1189785415943438, "epoch": 86, "test_loss": 0.8315444222974534, "test_acc1": 78.706, "test_acc5": 93.99}
88
+ {"train_lr": 0.001644907486320348, "train_loss": 1.1182358203509364, "epoch": 87, "test_loss": 0.830185788740613, "test_acc1": 78.712, "test_acc5": 94.036}
89
+ {"train_lr": 0.0014044702822349443, "train_loss": 1.1158945292186813, "epoch": 88, "test_loss": 0.8303322282676794, "test_acc1": 78.768, "test_acc5": 94.024}
90
+ {"train_lr": 0.001182384620915507, "train_loss": 1.1152455356959907, "epoch": 89, "test_loss": 0.8298551214440745, "test_acc1": 78.808, "test_acc5": 93.986}
91
+ {"train_lr": 0.0009788696740969044, "train_loss": 1.114670523915428, "epoch": 90, "test_loss": 0.8287614619701492, "test_acc1": 78.812, "test_acc5": 94.018}
92
+ {"train_lr": 0.0007941262864611389, "train_loss": 1.1143171500188473, "epoch": 91, "test_loss": 0.8285346797914529, "test_acc1": 78.722, "test_acc5": 94.024}
93
+ {"train_lr": 0.0006283367774273784, "train_loss": 1.1135167015222505, "epoch": 92, "test_loss": 0.8286565388647877, "test_acc1": 78.81, "test_acc5": 94.014}
94
+ {"train_lr": 0.0004816647612250554, "train_loss": 1.1136287877711053, "epoch": 93, "test_loss": 0.8278967168334187, "test_acc1": 78.802, "test_acc5": 94.022}
95
+ {"train_lr": 0.000354254985426235, "train_loss": 1.1106700731423527, "epoch": 94, "test_loss": 0.8271201193256645, "test_acc1": 78.852, "test_acc5": 94.04}
96
+ {"train_lr": 0.0002462331880972449, "train_loss": 1.1096463033590263, "epoch": 95, "test_loss": 0.8274339372482227, "test_acc1": 78.796, "test_acc5": 94.038}
97
+ {"train_lr": 0.0001577059737104466, "train_loss": 1.10907505068202, "epoch": 96, "test_loss": 0.8272342941514692, "test_acc1": 78.822, "test_acc5": 94.032}
98
+ {"train_lr": 8.876070793839875e-05, "train_loss": 1.1097301646543387, "epoch": 97, "test_loss": 0.8274692274934176, "test_acc1": 78.822, "test_acc5": 94.03}
99
+ {"train_lr": 3.946543143456929e-05, "train_loss": 1.109091356634713, "epoch": 98, "test_loss": 0.8273094764397461, "test_acc1": 78.828, "test_acc5": 94.03}
100
+ {"train_lr": 9.868792685368279e-06, "train_loss": 1.109760883600472, "epoch": 99, "test_loss": 0.8273151449631063, "test_acc1": 78.83, "test_acc5": 94.032}
pretrained models/vit_small_800ep/log.txt ADDED
The diff for this file is too large to render. See raw diff
 
pretrained models/vit_small_800ep/vit_small_800ep.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94e68f4ee7ac54bb68b69371bc98226fdebfbee73b333885c26599d852730d38
3
+ size 1697879351
pretrained models/vit_small_800ep/vit_small_backbone_800ep.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4c4412e12f94791d2836f7f5d0d5e08cdf1b9f7a0501dc84489eb1fd3477b22
3
+ size 93810389