diff --git "a/accv2022/vit_large_patch16_lion_for_mae_pretrain/log/train.info.log" "b/accv2022/vit_large_patch16_lion_for_mae_pretrain/log/train.info.log" new file mode 100644--- /dev/null +++ "b/accv2022/vit_large_patch16_lion_for_mae_pretrain/log/train.info.log" @@ -0,0 +1,3063 @@ +2023-11-22 00:23:38 - network: vit_large_patch16 +2023-11-22 00:23:38 - num_classes: 5000 +2023-11-22 00:23:38 - input_image_size: 224 +2023-11-22 00:23:38 - scale: 1.1428571428571428 +2023-11-22 00:23:38 - trained_model_path: /root/code/SimpleAICV_pytorch_training_examples_on_ImageNet_COCO_ADE20K/pretrained_models/vit_mae_pretrain_on_accv2022_from_imagenet1k_pretrain/vit_large_patch16_224_mae_pretrain_model-loss0.424_encoder.pth +2023-11-22 00:23:38 - train_criterion: OneHotLabelCELoss() +2023-11-22 00:23:38 - test_criterion: CELoss( + (loss): CrossEntropyLoss() +) +2023-11-22 00:23:38 - train_dataset: +2023-11-22 00:23:38 - test_dataset: +2023-11-22 00:23:38 - train_collater: +2023-11-22 00:23:38 - test_collater: +2023-11-22 00:23:38 - seed: 0 +2023-11-22 00:23:38 - batch_size: 128 +2023-11-22 00:23:38 - num_workers: 20 +2023-11-22 00:23:39 - accumulation_steps: 32 +2023-11-22 00:23:39 - optimizer: ('Lion', {'lr': 0.0004, 'global_weight_decay': False, 'weight_decay': 0.05, 'lr_layer_decay': 0.65, 'lr_layer_decay_block': ModuleList( + (0): TransformerEncoderLayer( + (norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (attention): MultiHeadAttention( + (qkv_linear): Linear(in_features=1024, out_features=3072, bias=True) + (out_linear): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.0, inplace=False) + (softmax): Softmax(dim=-1) + ) + (norm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (feed_forward): FeedForward( + (fc1): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): GELU(approximate='none') + (fc2): Linear(in_features=4096, out_features=1024, bias=True) + (drop): Dropout(p=0.0, inplace=False) + ) + (drop_path): Identity() + ) + (1-23): 23 x TransformerEncoderLayer( + (norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (attention): MultiHeadAttention( + (qkv_linear): Linear(in_features=1024, out_features=3072, bias=True) + (out_linear): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.0, inplace=False) + (softmax): Softmax(dim=-1) + ) + (norm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) + (feed_forward): FeedForward( + (fc1): Linear(in_features=1024, out_features=4096, bias=True) + (gelu): GELU(approximate='none') + (fc2): Linear(in_features=4096, out_features=1024, bias=True) + (drop): Dropout(p=0.0, inplace=False) + ) + (drop_path): DropPathBlock() + ) +), 'block_name': 'blocks', 'no_weight_decay_layer_name_list': ['position_encoding', 'cls_token']}) +2023-11-22 00:23:39 - scheduler: ('CosineLR', {'warm_up_epochs': 5, 'min_lr': 1e-06}) +2023-11-22 00:23:39 - epochs: 100 +2023-11-22 00:23:39 - print_interval: 10 +2023-11-22 00:23:39 - sync_bn: False +2023-11-22 00:23:39 - use_amp: True +2023-11-22 00:23:39 - use_compile: False +2023-11-22 00:23:39 - compile_params: {'mode': 'default'} +2023-11-22 00:23:39 - use_ema_model: False +2023-11-22 00:23:39 - ema_model_decay: 0.9999 +2023-11-22 00:23:39 - gpus_type: NVIDIA GeForce RTX 4090 +2023-11-22 00:23:39 - gpus_num: 2 +2023-11-22 00:23:39 - group: +2023-11-22 00:23:39 - --------------------parameters-------------------- +2023-11-22 00:23:39 - name: cls_token, grad: True +2023-11-22 00:23:39 - name: position_encoding, grad: True +2023-11-22 00:23:39 - name: patch_embedding.conv.weight, grad: True +2023-11-22 00:23:39 - name: patch_embedding.conv.bias, grad: True +2023-11-22 00:23:39 - name: blocks.0.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.0.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.0.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.0.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.0.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.0.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.0.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.0.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.0.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.0.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.0.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.0.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.1.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.1.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.1.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.1.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.1.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.1.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.1.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.1.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.1.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.1.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.1.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.1.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.2.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.2.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.2.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.2.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.2.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.2.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.2.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.2.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.2.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.2.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.2.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.2.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.3.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.3.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.3.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.3.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.3.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.3.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.3.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.3.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.3.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.3.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.3.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.3.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.4.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.4.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.4.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.4.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.4.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.4.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.4.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.4.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.4.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.4.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.4.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.4.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.5.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.5.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.5.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.5.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.5.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.5.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.5.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.5.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.5.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.5.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.5.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.5.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.6.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.6.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.6.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.6.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.6.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.6.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.6.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.6.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.6.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.6.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.6.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.6.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.7.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.7.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.7.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.7.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.7.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.7.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.7.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.7.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.7.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.7.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.7.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.7.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.8.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.8.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.8.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.8.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.8.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.8.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.8.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.8.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.8.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.8.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.8.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.8.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.9.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.9.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.9.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.9.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.9.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.9.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.9.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.9.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.9.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.9.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.9.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.9.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.10.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.10.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.10.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.10.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.10.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.10.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.10.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.10.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.10.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.10.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.10.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.10.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.11.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.11.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.11.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.11.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.11.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.11.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.11.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.11.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.11.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.11.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.11.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.11.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.12.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.12.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.12.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.12.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.12.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.12.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.12.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.12.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.12.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.12.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.12.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.12.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.13.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.13.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.13.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.13.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.13.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.13.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.13.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.13.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.13.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.13.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.13.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.13.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.14.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.14.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.14.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.14.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.14.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.14.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.14.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.14.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.14.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.14.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.14.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.14.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.15.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.15.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.15.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.15.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.15.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.15.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.15.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.15.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.15.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.15.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.15.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.15.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.16.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.16.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.16.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.16.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.16.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.16.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.16.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.16.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.16.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.16.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.16.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.16.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.17.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.17.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.17.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.17.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.17.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.17.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.17.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.17.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.17.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.17.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.17.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.17.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.18.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.18.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.18.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.18.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.18.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.18.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.18.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.18.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.18.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.18.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.18.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.18.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.19.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.19.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.19.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.19.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.19.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.19.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.19.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.19.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.19.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.19.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.19.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.19.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.20.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.20.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.20.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.20.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.20.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.20.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.20.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.20.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.20.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.20.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.20.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.20.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.21.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.21.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.21.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.21.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.21.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.21.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.21.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.21.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.21.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.21.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.21.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.21.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.22.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.22.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.22.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.22.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.22.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.22.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.22.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.22.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.22.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.22.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.22.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.22.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.23.norm1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.23.norm1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.23.attention.qkv_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.23.attention.qkv_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.23.attention.out_linear.weight, grad: True +2023-11-22 00:23:39 - name: blocks.23.attention.out_linear.bias, grad: True +2023-11-22 00:23:39 - name: blocks.23.norm2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.23.norm2.bias, grad: True +2023-11-22 00:23:39 - name: blocks.23.feed_forward.fc1.weight, grad: True +2023-11-22 00:23:39 - name: blocks.23.feed_forward.fc1.bias, grad: True +2023-11-22 00:23:39 - name: blocks.23.feed_forward.fc2.weight, grad: True +2023-11-22 00:23:39 - name: blocks.23.feed_forward.fc2.bias, grad: True +2023-11-22 00:23:39 - name: norm.weight, grad: True +2023-11-22 00:23:39 - name: norm.bias, grad: True +2023-11-22 00:23:39 - name: fc.weight, grad: True +2023-11-22 00:23:39 - name: fc.bias, grad: True +2023-11-22 00:23:39 - --------------------buffers-------------------- +2023-11-22 00:23:39 - -------------layers weight decay--------------- +2023-11-22 00:23:39 - name: blocks.7.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0006599743590836596 +2023-11-22 00:23:39 - name: blocks.7.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0006599743590836596 +2023-11-22 00:23:39 - name: blocks.7.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0006599743590836596 +2023-11-22 00:23:39 - name: blocks.7.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0006599743590836596 +2023-11-22 00:23:39 - name: blocks.7.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0006599743590836596 +2023-11-22 00:23:39 - name: blocks.7.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0006599743590836596 +2023-11-22 00:23:39 - name: blocks.7.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0006599743590836596 +2023-11-22 00:23:39 - name: blocks.7.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0006599743590836596 +2023-11-22 00:23:39 - name: norm.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 1.0 +2023-11-22 00:23:39 - name: norm.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 1.0 +2023-11-22 00:23:39 - name: fc.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 1.0 +2023-11-22 00:23:39 - name: blocks.13.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.008750783174008792 +2023-11-22 00:23:39 - name: blocks.13.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.008750783174008792 +2023-11-22 00:23:39 - name: blocks.13.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.008750783174008792 +2023-11-22 00:23:39 - name: blocks.13.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.008750783174008792 +2023-11-22 00:23:39 - name: blocks.13.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.008750783174008792 +2023-11-22 00:23:39 - name: blocks.13.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.008750783174008792 +2023-11-22 00:23:39 - name: blocks.13.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.008750783174008792 +2023-11-22 00:23:39 - name: blocks.13.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.008750783174008792 +2023-11-22 00:23:39 - name: blocks.18.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.07541889062500001 +2023-11-22 00:23:39 - name: blocks.18.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.07541889062500001 +2023-11-22 00:23:39 - name: blocks.18.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.07541889062500001 +2023-11-22 00:23:39 - name: blocks.18.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.07541889062500001 +2023-11-22 00:23:39 - name: blocks.18.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.07541889062500001 +2023-11-22 00:23:39 - name: blocks.18.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.07541889062500001 +2023-11-22 00:23:39 - name: blocks.18.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.07541889062500001 +2023-11-22 00:23:39 - name: blocks.18.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.07541889062500001 +2023-11-22 00:23:39 - name: blocks.20.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.17850625000000003 +2023-11-22 00:23:39 - name: blocks.20.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.17850625000000003 +2023-11-22 00:23:39 - name: blocks.20.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.17850625000000003 +2023-11-22 00:23:39 - name: blocks.20.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.17850625000000003 +2023-11-22 00:23:39 - name: blocks.20.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.17850625000000003 +2023-11-22 00:23:39 - name: blocks.20.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.17850625000000003 +2023-11-22 00:23:39 - name: blocks.20.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.17850625000000003 +2023-11-22 00:23:39 - name: blocks.20.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.17850625000000003 +2023-11-22 00:23:39 - name: blocks.21.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.274625 +2023-11-22 00:23:39 - name: blocks.21.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.274625 +2023-11-22 00:23:39 - name: blocks.21.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.274625 +2023-11-22 00:23:39 - name: blocks.21.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.274625 +2023-11-22 00:23:39 - name: blocks.21.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.274625 +2023-11-22 00:23:39 - name: blocks.21.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.274625 +2023-11-22 00:23:39 - name: blocks.21.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.274625 +2023-11-22 00:23:39 - name: blocks.21.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.274625 +2023-11-22 00:23:39 - name: blocks.22.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.42250000000000004 +2023-11-22 00:23:39 - name: blocks.22.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.42250000000000004 +2023-11-22 00:23:39 - name: blocks.22.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.42250000000000004 +2023-11-22 00:23:39 - name: blocks.22.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.42250000000000004 +2023-11-22 00:23:39 - name: blocks.22.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.42250000000000004 +2023-11-22 00:23:39 - name: blocks.22.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.42250000000000004 +2023-11-22 00:23:39 - name: blocks.22.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.42250000000000004 +2023-11-22 00:23:39 - name: blocks.22.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.42250000000000004 +2023-11-22 00:23:39 - name: blocks.23.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.65 +2023-11-22 00:23:39 - name: blocks.23.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.65 +2023-11-22 00:23:39 - name: blocks.23.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.65 +2023-11-22 00:23:39 - name: blocks.23.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.65 +2023-11-22 00:23:39 - name: blocks.23.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.65 +2023-11-22 00:23:39 - name: blocks.23.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.65 +2023-11-22 00:23:39 - name: blocks.23.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.65 +2023-11-22 00:23:39 - name: blocks.23.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.65 +2023-11-22 00:23:39 - name: blocks.5.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0002788391667128462 +2023-11-22 00:23:39 - name: blocks.5.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0002788391667128462 +2023-11-22 00:23:39 - name: blocks.5.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0002788391667128462 +2023-11-22 00:23:39 - name: blocks.5.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0002788391667128462 +2023-11-22 00:23:39 - name: blocks.5.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0002788391667128462 +2023-11-22 00:23:39 - name: blocks.5.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0002788391667128462 +2023-11-22 00:23:39 - name: blocks.5.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0002788391667128462 +2023-11-22 00:23:39 - name: blocks.5.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0002788391667128462 +2023-11-22 00:23:39 - name: blocks.4.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00018124545836335003 +2023-11-22 00:23:39 - name: blocks.4.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00018124545836335003 +2023-11-22 00:23:39 - name: blocks.4.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00018124545836335003 +2023-11-22 00:23:39 - name: blocks.4.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00018124545836335003 +2023-11-22 00:23:39 - name: blocks.4.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00018124545836335003 +2023-11-22 00:23:39 - name: blocks.4.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00018124545836335003 +2023-11-22 00:23:39 - name: blocks.4.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00018124545836335003 +2023-11-22 00:23:39 - name: blocks.4.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00018124545836335003 +2023-11-22 00:23:39 - name: blocks.10.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.002403183829162165 +2023-11-22 00:23:39 - name: blocks.10.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.002403183829162165 +2023-11-22 00:23:39 - name: blocks.10.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.002403183829162165 +2023-11-22 00:23:39 - name: blocks.10.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.002403183829162165 +2023-11-22 00:23:39 - name: blocks.10.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.002403183829162165 +2023-11-22 00:23:39 - name: blocks.10.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.002403183829162165 +2023-11-22 00:23:39 - name: blocks.10.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.002403183829162165 +2023-11-22 00:23:39 - name: blocks.10.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.002403183829162165 +2023-11-22 00:23:39 - name: blocks.3.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00011780954793617752 +2023-11-22 00:23:39 - name: blocks.3.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00011780954793617752 +2023-11-22 00:23:39 - name: blocks.3.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00011780954793617752 +2023-11-22 00:23:39 - name: blocks.3.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00011780954793617752 +2023-11-22 00:23:39 - name: blocks.3.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00011780954793617752 +2023-11-22 00:23:39 - name: blocks.3.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00011780954793617752 +2023-11-22 00:23:39 - name: blocks.3.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00011780954793617752 +2023-11-22 00:23:39 - name: blocks.3.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.00011780954793617752 +2023-11-22 00:23:39 - name: blocks.16.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.03186448128906251 +2023-11-22 00:23:39 - name: blocks.16.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.03186448128906251 +2023-11-22 00:23:39 - name: blocks.16.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.03186448128906251 +2023-11-22 00:23:39 - name: blocks.16.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.03186448128906251 +2023-11-22 00:23:39 - name: blocks.16.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.03186448128906251 +2023-11-22 00:23:39 - name: blocks.16.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.03186448128906251 +2023-11-22 00:23:39 - name: blocks.16.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.03186448128906251 +2023-11-22 00:23:39 - name: blocks.16.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.03186448128906251 +2023-11-22 00:23:39 - name: blocks.19.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.11602906250000002 +2023-11-22 00:23:39 - name: blocks.19.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.11602906250000002 +2023-11-22 00:23:39 - name: blocks.19.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.11602906250000002 +2023-11-22 00:23:39 - name: blocks.19.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.11602906250000002 +2023-11-22 00:23:39 - name: blocks.19.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.11602906250000002 +2023-11-22 00:23:39 - name: blocks.19.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.11602906250000002 +2023-11-22 00:23:39 - name: blocks.19.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.11602906250000002 +2023-11-22 00:23:39 - name: blocks.19.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.11602906250000002 +2023-11-22 00:23:39 - name: blocks.0.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 3.2353447101972754e-05 +2023-11-22 00:23:39 - name: blocks.0.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 3.2353447101972754e-05 +2023-11-22 00:23:39 - name: blocks.0.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 3.2353447101972754e-05 +2023-11-22 00:23:39 - name: blocks.0.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 3.2353447101972754e-05 +2023-11-22 00:23:39 - name: blocks.0.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 3.2353447101972754e-05 +2023-11-22 00:23:39 - name: blocks.0.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 3.2353447101972754e-05 +2023-11-22 00:23:39 - name: blocks.0.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 3.2353447101972754e-05 +2023-11-22 00:23:39 - name: blocks.0.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 3.2353447101972754e-05 +2023-11-22 00:23:39 - name: blocks.17.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.049022278906250015 +2023-11-22 00:23:39 - name: blocks.17.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.049022278906250015 +2023-11-22 00:23:39 - name: blocks.17.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.049022278906250015 +2023-11-22 00:23:39 - name: blocks.17.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.049022278906250015 +2023-11-22 00:23:39 - name: blocks.17.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.049022278906250015 +2023-11-22 00:23:39 - name: blocks.17.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.049022278906250015 +2023-11-22 00:23:39 - name: blocks.17.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.049022278906250015 +2023-11-22 00:23:39 - name: blocks.17.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.049022278906250015 +2023-11-22 00:23:39 - name: blocks.1.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 4.977453400303501e-05 +2023-11-22 00:23:39 - name: blocks.1.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 4.977453400303501e-05 +2023-11-22 00:23:39 - name: blocks.1.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 4.977453400303501e-05 +2023-11-22 00:23:39 - name: blocks.1.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 4.977453400303501e-05 +2023-11-22 00:23:39 - name: blocks.1.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 4.977453400303501e-05 +2023-11-22 00:23:39 - name: blocks.1.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 4.977453400303501e-05 +2023-11-22 00:23:39 - name: blocks.1.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 4.977453400303501e-05 +2023-11-22 00:23:39 - name: blocks.1.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 4.977453400303501e-05 +2023-11-22 00:23:39 - name: blocks.6.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0004289833334043787 +2023-11-22 00:23:39 - name: blocks.6.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0004289833334043787 +2023-11-22 00:23:39 - name: blocks.6.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0004289833334043787 +2023-11-22 00:23:39 - name: blocks.6.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0004289833334043787 +2023-11-22 00:23:39 - name: blocks.6.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0004289833334043787 +2023-11-22 00:23:39 - name: blocks.6.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0004289833334043787 +2023-11-22 00:23:39 - name: blocks.6.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0004289833334043787 +2023-11-22 00:23:39 - name: blocks.6.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0004289833334043787 +2023-11-22 00:23:39 - name: blocks.11.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.003697205891018715 +2023-11-22 00:23:39 - name: blocks.11.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.003697205891018715 +2023-11-22 00:23:39 - name: blocks.11.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.003697205891018715 +2023-11-22 00:23:39 - name: blocks.11.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.003697205891018715 +2023-11-22 00:23:39 - name: blocks.11.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.003697205891018715 +2023-11-22 00:23:39 - name: blocks.11.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.003697205891018715 +2023-11-22 00:23:39 - name: blocks.11.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.003697205891018715 +2023-11-22 00:23:39 - name: blocks.11.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.003697205891018715 +2023-11-22 00:23:39 - name: blocks.14.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.013462743344628911 +2023-11-22 00:23:39 - name: blocks.14.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.013462743344628911 +2023-11-22 00:23:39 - name: blocks.14.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.013462743344628911 +2023-11-22 00:23:39 - name: blocks.14.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.013462743344628911 +2023-11-22 00:23:39 - name: blocks.14.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.013462743344628911 +2023-11-22 00:23:39 - name: blocks.14.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.013462743344628911 +2023-11-22 00:23:39 - name: blocks.14.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.013462743344628911 +2023-11-22 00:23:39 - name: blocks.14.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.013462743344628911 +2023-11-22 00:23:39 - name: blocks.9.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0015620694889554071 +2023-11-22 00:23:39 - name: blocks.9.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0015620694889554071 +2023-11-22 00:23:39 - name: blocks.9.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0015620694889554071 +2023-11-22 00:23:39 - name: blocks.9.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0015620694889554071 +2023-11-22 00:23:39 - name: blocks.9.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0015620694889554071 +2023-11-22 00:23:39 - name: blocks.9.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0015620694889554071 +2023-11-22 00:23:39 - name: blocks.9.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0015620694889554071 +2023-11-22 00:23:39 - name: blocks.9.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0015620694889554071 +2023-11-22 00:23:39 - name: blocks.12.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.005688009063105715 +2023-11-22 00:23:39 - name: blocks.12.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.005688009063105715 +2023-11-22 00:23:39 - name: blocks.12.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.005688009063105715 +2023-11-22 00:23:39 - name: blocks.12.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.005688009063105715 +2023-11-22 00:23:39 - name: blocks.12.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.005688009063105715 +2023-11-22 00:23:39 - name: blocks.12.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.005688009063105715 +2023-11-22 00:23:39 - name: blocks.12.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.005688009063105715 +2023-11-22 00:23:39 - name: blocks.12.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.005688009063105715 +2023-11-22 00:23:39 - name: blocks.8.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0010153451678210146 +2023-11-22 00:23:39 - name: blocks.8.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0010153451678210146 +2023-11-22 00:23:39 - name: blocks.8.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0010153451678210146 +2023-11-22 00:23:39 - name: blocks.8.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0010153451678210146 +2023-11-22 00:23:39 - name: blocks.8.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0010153451678210146 +2023-11-22 00:23:39 - name: blocks.8.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0010153451678210146 +2023-11-22 00:23:39 - name: blocks.8.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0010153451678210146 +2023-11-22 00:23:39 - name: blocks.8.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.0010153451678210146 +2023-11-22 00:23:39 - name: cls_token, lr: 0.0004, weight_decay: 0.0, lr_scale: 2.1029740616282293e-05 +2023-11-22 00:23:39 - name: position_encoding, lr: 0.0004, weight_decay: 0.0, lr_scale: 2.1029740616282293e-05 +2023-11-22 00:23:39 - name: patch_embedding.conv.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 2.1029740616282293e-05 +2023-11-22 00:23:39 - name: blocks.15.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.02071191283789063 +2023-11-22 00:23:39 - name: blocks.15.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.02071191283789063 +2023-11-22 00:23:39 - name: blocks.15.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.02071191283789063 +2023-11-22 00:23:39 - name: blocks.15.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.02071191283789063 +2023-11-22 00:23:39 - name: blocks.15.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.02071191283789063 +2023-11-22 00:23:39 - name: blocks.15.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.02071191283789063 +2023-11-22 00:23:39 - name: blocks.15.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.02071191283789063 +2023-11-22 00:23:39 - name: blocks.15.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 0.02071191283789063 +2023-11-22 00:23:39 - name: blocks.2.norm1.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 7.65762061585154e-05 +2023-11-22 00:23:39 - name: blocks.2.norm1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 7.65762061585154e-05 +2023-11-22 00:23:39 - name: blocks.2.attention.qkv_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 7.65762061585154e-05 +2023-11-22 00:23:39 - name: blocks.2.attention.out_linear.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 7.65762061585154e-05 +2023-11-22 00:23:39 - name: blocks.2.norm2.weight, lr: 0.0004, weight_decay: 0.0, lr_scale: 7.65762061585154e-05 +2023-11-22 00:23:39 - name: blocks.2.norm2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 7.65762061585154e-05 +2023-11-22 00:23:39 - name: blocks.2.feed_forward.fc1.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 7.65762061585154e-05 +2023-11-22 00:23:39 - name: blocks.2.feed_forward.fc2.bias, lr: 0.0004, weight_decay: 0.0, lr_scale: 7.65762061585154e-05 +2023-11-22 00:23:39 - name: blocks.7.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0006599743590836596 +2023-11-22 00:23:39 - name: blocks.7.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0006599743590836596 +2023-11-22 00:23:39 - name: blocks.7.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0006599743590836596 +2023-11-22 00:23:39 - name: blocks.7.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0006599743590836596 +2023-11-22 00:23:39 - name: fc.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 1.0 +2023-11-22 00:23:39 - name: blocks.13.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.008750783174008792 +2023-11-22 00:23:39 - name: blocks.13.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.008750783174008792 +2023-11-22 00:23:39 - name: blocks.13.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.008750783174008792 +2023-11-22 00:23:39 - name: blocks.13.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.008750783174008792 +2023-11-22 00:23:39 - name: blocks.18.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.07541889062500001 +2023-11-22 00:23:39 - name: blocks.18.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.07541889062500001 +2023-11-22 00:23:39 - name: blocks.18.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.07541889062500001 +2023-11-22 00:23:39 - name: blocks.18.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.07541889062500001 +2023-11-22 00:23:39 - name: blocks.20.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.17850625000000003 +2023-11-22 00:23:39 - name: blocks.20.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.17850625000000003 +2023-11-22 00:23:39 - name: blocks.20.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.17850625000000003 +2023-11-22 00:23:39 - name: blocks.20.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.17850625000000003 +2023-11-22 00:23:39 - name: blocks.21.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.274625 +2023-11-22 00:23:39 - name: blocks.21.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.274625 +2023-11-22 00:23:39 - name: blocks.21.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.274625 +2023-11-22 00:23:39 - name: blocks.21.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.274625 +2023-11-22 00:23:39 - name: blocks.22.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.42250000000000004 +2023-11-22 00:23:39 - name: blocks.22.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.42250000000000004 +2023-11-22 00:23:39 - name: blocks.22.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.42250000000000004 +2023-11-22 00:23:39 - name: blocks.22.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.42250000000000004 +2023-11-22 00:23:39 - name: blocks.23.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.65 +2023-11-22 00:23:39 - name: blocks.23.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.65 +2023-11-22 00:23:39 - name: blocks.23.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.65 +2023-11-22 00:23:39 - name: blocks.23.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.65 +2023-11-22 00:23:39 - name: blocks.5.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0002788391667128462 +2023-11-22 00:23:39 - name: blocks.5.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0002788391667128462 +2023-11-22 00:23:39 - name: blocks.5.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0002788391667128462 +2023-11-22 00:23:39 - name: blocks.5.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0002788391667128462 +2023-11-22 00:23:39 - name: blocks.4.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.00018124545836335003 +2023-11-22 00:23:39 - name: blocks.4.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.00018124545836335003 +2023-11-22 00:23:39 - name: blocks.4.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.00018124545836335003 +2023-11-22 00:23:39 - name: blocks.4.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.00018124545836335003 +2023-11-22 00:23:39 - name: blocks.10.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.002403183829162165 +2023-11-22 00:23:39 - name: blocks.10.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.002403183829162165 +2023-11-22 00:23:39 - name: blocks.10.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.002403183829162165 +2023-11-22 00:23:39 - name: blocks.10.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.002403183829162165 +2023-11-22 00:23:39 - name: blocks.3.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.00011780954793617752 +2023-11-22 00:23:39 - name: blocks.3.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.00011780954793617752 +2023-11-22 00:23:39 - name: blocks.3.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.00011780954793617752 +2023-11-22 00:23:39 - name: blocks.3.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.00011780954793617752 +2023-11-22 00:23:39 - name: blocks.16.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.03186448128906251 +2023-11-22 00:23:39 - name: blocks.16.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.03186448128906251 +2023-11-22 00:23:39 - name: blocks.16.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.03186448128906251 +2023-11-22 00:23:39 - name: blocks.16.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.03186448128906251 +2023-11-22 00:23:39 - name: blocks.19.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.11602906250000002 +2023-11-22 00:23:39 - name: blocks.19.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.11602906250000002 +2023-11-22 00:23:39 - name: blocks.19.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.11602906250000002 +2023-11-22 00:23:39 - name: blocks.19.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.11602906250000002 +2023-11-22 00:23:39 - name: blocks.0.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 3.2353447101972754e-05 +2023-11-22 00:23:39 - name: blocks.0.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 3.2353447101972754e-05 +2023-11-22 00:23:39 - name: blocks.0.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 3.2353447101972754e-05 +2023-11-22 00:23:39 - name: blocks.0.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 3.2353447101972754e-05 +2023-11-22 00:23:39 - name: blocks.17.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.049022278906250015 +2023-11-22 00:23:39 - name: blocks.17.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.049022278906250015 +2023-11-22 00:23:39 - name: blocks.17.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.049022278906250015 +2023-11-22 00:23:39 - name: blocks.17.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.049022278906250015 +2023-11-22 00:23:39 - name: blocks.1.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 4.977453400303501e-05 +2023-11-22 00:23:39 - name: blocks.1.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 4.977453400303501e-05 +2023-11-22 00:23:39 - name: blocks.1.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 4.977453400303501e-05 +2023-11-22 00:23:39 - name: blocks.1.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 4.977453400303501e-05 +2023-11-22 00:23:39 - name: blocks.6.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0004289833334043787 +2023-11-22 00:23:39 - name: blocks.6.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0004289833334043787 +2023-11-22 00:23:39 - name: blocks.6.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0004289833334043787 +2023-11-22 00:23:39 - name: blocks.6.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0004289833334043787 +2023-11-22 00:23:39 - name: blocks.11.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.003697205891018715 +2023-11-22 00:23:39 - name: blocks.11.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.003697205891018715 +2023-11-22 00:23:39 - name: blocks.11.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.003697205891018715 +2023-11-22 00:23:39 - name: blocks.11.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.003697205891018715 +2023-11-22 00:23:39 - name: blocks.14.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.013462743344628911 +2023-11-22 00:23:39 - name: blocks.14.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.013462743344628911 +2023-11-22 00:23:39 - name: blocks.14.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.013462743344628911 +2023-11-22 00:23:39 - name: blocks.14.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.013462743344628911 +2023-11-22 00:23:39 - name: blocks.9.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0015620694889554071 +2023-11-22 00:23:39 - name: blocks.9.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0015620694889554071 +2023-11-22 00:23:39 - name: blocks.9.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0015620694889554071 +2023-11-22 00:23:39 - name: blocks.9.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0015620694889554071 +2023-11-22 00:23:39 - name: blocks.12.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.005688009063105715 +2023-11-22 00:23:39 - name: blocks.12.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.005688009063105715 +2023-11-22 00:23:39 - name: blocks.12.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.005688009063105715 +2023-11-22 00:23:39 - name: blocks.12.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.005688009063105715 +2023-11-22 00:23:39 - name: blocks.8.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0010153451678210146 +2023-11-22 00:23:39 - name: blocks.8.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0010153451678210146 +2023-11-22 00:23:39 - name: blocks.8.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0010153451678210146 +2023-11-22 00:23:39 - name: blocks.8.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.0010153451678210146 +2023-11-22 00:23:39 - name: patch_embedding.conv.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 2.1029740616282293e-05 +2023-11-22 00:23:39 - name: blocks.15.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.02071191283789063 +2023-11-22 00:23:39 - name: blocks.15.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.02071191283789063 +2023-11-22 00:23:39 - name: blocks.15.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.02071191283789063 +2023-11-22 00:23:39 - name: blocks.15.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 0.02071191283789063 +2023-11-22 00:23:39 - name: blocks.2.attention.qkv_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 7.65762061585154e-05 +2023-11-22 00:23:39 - name: blocks.2.attention.out_linear.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 7.65762061585154e-05 +2023-11-22 00:23:39 - name: blocks.2.feed_forward.fc1.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 7.65762061585154e-05 +2023-11-22 00:23:39 - name: blocks.2.feed_forward.fc2.weight, lr: 0.0004, weight_decay: 0.05, lr_scale: 7.65762061585154e-05 +2023-11-22 00:23:39 - using torch version:2.0.0+cu118 +2023-11-22 00:23:39 - this torch version support torch.compile function. +2023-11-22 00:23:39 - epoch 001 lr: 0.000400 +2023-11-22 00:25:32 - train: epoch 0001, iter [00010, 00202], lr: 0.000004, loss: 8.5139 +2023-11-22 00:27:23 - train: epoch 0001, iter [00020, 00202], lr: 0.000008, loss: 8.5185 +2023-11-22 00:29:14 - train: epoch 0001, iter [00030, 00202], lr: 0.000012, loss: 8.5083 +2023-11-22 00:31:06 - train: epoch 0001, iter [00040, 00202], lr: 0.000016, loss: 8.5081 +2023-11-22 00:32:57 - train: epoch 0001, iter [00050, 00202], lr: 0.000020, loss: 8.5095 +2023-11-22 00:34:49 - train: epoch 0001, iter [00060, 00202], lr: 0.000024, loss: 8.4955 +2023-11-22 00:36:41 - train: epoch 0001, iter [00070, 00202], lr: 0.000028, loss: 8.5046 +2023-11-22 00:38:33 - train: epoch 0001, iter [00080, 00202], lr: 0.000032, loss: 8.4875 +2023-11-22 00:40:26 - train: epoch 0001, iter [00090, 00202], lr: 0.000036, loss: 8.4717 +2023-11-22 00:42:18 - train: epoch 0001, iter [00100, 00202], lr: 0.000039, loss: 8.4690 +2023-11-22 00:44:11 - train: epoch 0001, iter [00110, 00202], lr: 0.000043, loss: 8.4138 +2023-11-22 00:46:04 - train: epoch 0001, iter [00120, 00202], lr: 0.000047, loss: 8.4159 +2023-11-22 00:47:57 - train: epoch 0001, iter [00130, 00202], lr: 0.000051, loss: 8.3173 +2023-11-22 00:49:51 - train: epoch 0001, iter [00140, 00202], lr: 0.000055, loss: 8.2939 +2023-11-22 00:51:45 - train: epoch 0001, iter [00150, 00202], lr: 0.000059, loss: 8.2953 +2023-11-22 00:53:39 - train: epoch 0001, iter [00160, 00202], lr: 0.000063, loss: 8.2684 +2023-11-22 00:55:34 - train: epoch 0001, iter [00170, 00202], lr: 0.000067, loss: 8.1518 +2023-11-22 00:57:27 - train: epoch 0001, iter [00180, 00202], lr: 0.000071, loss: 8.1959 +2023-11-22 00:59:22 - train: epoch 0001, iter [00190, 00202], lr: 0.000075, loss: 7.9393 +2023-11-22 01:01:15 - train: epoch 0001, iter [00200, 00202], lr: 0.000079, loss: 8.0458 +2023-11-22 01:01:45 - train: epoch 001, train_loss: 8.3725 +2023-11-22 01:35:57 - eval: epoch: 001, acc1: 2.859%, acc5: 8.497%, test_loss: 7.2764, per_image_load_time: 0.237ms, per_image_inference_time: 3.844ms +2023-11-22 01:36:00 - until epoch: 001, best_acc1: 2.859% +2023-11-22 01:36:00 - epoch 002 lr: 0.000080 +2023-11-22 01:37:57 - train: epoch 0002, iter [00010, 00202], lr: 0.000084, loss: 7.9056 +2023-11-22 01:39:49 - train: epoch 0002, iter [00020, 00202], lr: 0.000088, loss: 7.7283 +2023-11-22 01:41:41 - train: epoch 0002, iter [00030, 00202], lr: 0.000092, loss: 7.5235 +2023-11-22 01:43:34 - train: epoch 0002, iter [00040, 00202], lr: 0.000096, loss: 7.6062 +2023-11-22 01:45:27 - train: epoch 0002, iter [00050, 00202], lr: 0.000100, loss: 7.8096 +2023-11-22 01:47:21 - train: epoch 0002, iter [00060, 00202], lr: 0.000104, loss: 7.4754 +2023-11-22 01:49:15 - train: epoch 0002, iter [00070, 00202], lr: 0.000108, loss: 7.1288 +2023-11-22 01:51:09 - train: epoch 0002, iter [00080, 00202], lr: 0.000112, loss: 7.3625 +2023-11-22 01:53:04 - train: epoch 0002, iter [00090, 00202], lr: 0.000116, loss: 7.3162 +2023-11-22 01:54:58 - train: epoch 0002, iter [00100, 00202], lr: 0.000119, loss: 7.2600 +2023-11-22 01:56:52 - train: epoch 0002, iter [00110, 00202], lr: 0.000123, loss: 7.4018 +2023-11-22 01:58:47 - train: epoch 0002, iter [00120, 00202], lr: 0.000127, loss: 7.3347 +2023-11-22 02:00:43 - train: epoch 0002, iter [00130, 00202], lr: 0.000131, loss: 7.4319 +2023-11-22 02:02:39 - train: epoch 0002, iter [00140, 00202], lr: 0.000135, loss: 7.3568 +2023-11-22 02:04:35 - train: epoch 0002, iter [00150, 00202], lr: 0.000139, loss: 6.8640 +2023-11-22 02:06:30 - train: epoch 0002, iter [00160, 00202], lr: 0.000143, loss: 7.6547 +2023-11-22 02:08:24 - train: epoch 0002, iter [00170, 00202], lr: 0.000147, loss: 7.5305 +2023-11-22 02:10:18 - train: epoch 0002, iter [00180, 00202], lr: 0.000151, loss: 7.3790 +2023-11-22 02:12:10 - train: epoch 0002, iter [00190, 00202], lr: 0.000155, loss: 7.3000 +2023-11-22 02:14:04 - train: epoch 0002, iter [00200, 00202], lr: 0.000159, loss: 7.3424 +2023-11-22 02:14:34 - train: epoch 002, train_loss: 7.4025 +2023-11-22 02:48:36 - eval: epoch: 002, acc1: 23.932%, acc5: 47.316%, test_loss: 4.4567, per_image_load_time: 0.248ms, per_image_inference_time: 3.842ms +2023-11-22 02:48:40 - until epoch: 002, best_acc1: 23.932% +2023-11-22 02:48:40 - epoch 003 lr: 0.000160 +2023-11-22 02:50:38 - train: epoch 0003, iter [00010, 00202], lr: 0.000164, loss: 7.2199 +2023-11-22 02:52:32 - train: epoch 0003, iter [00020, 00202], lr: 0.000168, loss: 7.2028 +2023-11-22 02:54:27 - train: epoch 0003, iter [00030, 00202], lr: 0.000172, loss: 5.9884 +2023-11-22 02:56:22 - train: epoch 0003, iter [00040, 00202], lr: 0.000176, loss: 6.7885 +2023-11-22 02:58:17 - train: epoch 0003, iter [00050, 00202], lr: 0.000180, loss: 6.6061 +2023-11-22 03:00:13 - train: epoch 0003, iter [00060, 00202], lr: 0.000184, loss: 6.7674 +2023-11-22 03:02:08 - train: epoch 0003, iter [00070, 00202], lr: 0.000188, loss: 7.3376 +2023-11-22 03:04:04 - train: epoch 0003, iter [00080, 00202], lr: 0.000192, loss: 6.7487 +2023-11-22 03:05:59 - train: epoch 0003, iter [00090, 00202], lr: 0.000196, loss: 7.0715 +2023-11-22 03:07:54 - train: epoch 0003, iter [00100, 00202], lr: 0.000199, loss: 6.9972 +2023-11-22 03:09:48 - train: epoch 0003, iter [00110, 00202], lr: 0.000203, loss: 6.5105 +2023-11-22 03:11:42 - train: epoch 0003, iter [00120, 00202], lr: 0.000207, loss: 6.1062 +2023-11-22 03:13:36 - train: epoch 0003, iter [00130, 00202], lr: 0.000211, loss: 6.7863 +2023-11-22 03:15:29 - train: epoch 0003, iter [00140, 00202], lr: 0.000215, loss: 6.3836 +2023-11-22 03:17:23 - train: epoch 0003, iter [00150, 00202], lr: 0.000219, loss: 6.3548 +2023-11-22 03:19:16 - train: epoch 0003, iter [00160, 00202], lr: 0.000223, loss: 6.4462 +2023-11-22 03:21:08 - train: epoch 0003, iter [00170, 00202], lr: 0.000227, loss: 5.7527 +2023-11-22 03:23:01 - train: epoch 0003, iter [00180, 00202], lr: 0.000231, loss: 6.0434 +2023-11-22 03:24:53 - train: epoch 0003, iter [00190, 00202], lr: 0.000235, loss: 5.5239 +2023-11-22 03:26:45 - train: epoch 0003, iter [00200, 00202], lr: 0.000239, loss: 6.1274 +2023-11-22 03:27:15 - train: epoch 003, train_loss: 6.5518 +2023-11-22 04:02:07 - eval: epoch: 003, acc1: 41.468%, acc5: 64.364%, test_loss: 3.2420, per_image_load_time: 0.222ms, per_image_inference_time: 3.840ms +2023-11-22 04:02:11 - until epoch: 003, best_acc1: 41.468% +2023-11-22 04:02:11 - epoch 004 lr: 0.000240 +2023-11-22 04:04:12 - train: epoch 0004, iter [00010, 00202], lr: 0.000244, loss: 5.7700 +2023-11-22 04:06:07 - train: epoch 0004, iter [00020, 00202], lr: 0.000248, loss: 6.8284 +2023-11-22 04:08:02 - train: epoch 0004, iter [00030, 00202], lr: 0.000252, loss: 5.2753 +2023-11-22 04:09:57 - train: epoch 0004, iter [00040, 00202], lr: 0.000256, loss: 6.3002 +2023-11-22 04:11:51 - train: epoch 0004, iter [00050, 00202], lr: 0.000260, loss: 5.9941 +2023-11-22 04:13:45 - train: epoch 0004, iter [00060, 00202], lr: 0.000264, loss: 5.9394 +2023-11-22 04:15:39 - train: epoch 0004, iter [00070, 00202], lr: 0.000268, loss: 5.9078 +2023-11-22 04:17:33 - train: epoch 0004, iter [00080, 00202], lr: 0.000272, loss: 4.8347 +2023-11-22 04:19:25 - train: epoch 0004, iter [00090, 00202], lr: 0.000276, loss: 5.3105 +2023-11-22 04:21:16 - train: epoch 0004, iter [00100, 00202], lr: 0.000279, loss: 6.4600 +2023-11-22 04:23:09 - train: epoch 0004, iter [00110, 00202], lr: 0.000283, loss: 5.9367 +2023-11-22 04:25:01 - train: epoch 0004, iter [00120, 00202], lr: 0.000287, loss: 5.9288 +2023-11-22 04:26:53 - train: epoch 0004, iter [00130, 00202], lr: 0.000291, loss: 6.4361 +2023-11-22 04:28:45 - train: epoch 0004, iter [00140, 00202], lr: 0.000295, loss: 5.0852 +2023-11-22 04:30:38 - train: epoch 0004, iter [00150, 00202], lr: 0.000299, loss: 6.2430 +2023-11-22 04:32:31 - train: epoch 0004, iter [00160, 00202], lr: 0.000303, loss: 6.8462 +2023-11-22 04:34:24 - train: epoch 0004, iter [00170, 00202], lr: 0.000307, loss: 6.3935 +2023-11-22 04:36:17 - train: epoch 0004, iter [00180, 00202], lr: 0.000311, loss: 6.4045 +2023-11-22 04:38:10 - train: epoch 0004, iter [00190, 00202], lr: 0.000315, loss: 5.6665 +2023-11-22 04:40:03 - train: epoch 0004, iter [00200, 00202], lr: 0.000319, loss: 5.2982 +2023-11-22 04:40:33 - train: epoch 004, train_loss: 6.1392 +2023-11-22 05:15:50 - eval: epoch: 004, acc1: 49.476%, acc5: 70.500%, test_loss: 2.7890, per_image_load_time: 0.231ms, per_image_inference_time: 3.840ms +2023-11-22 05:15:53 - until epoch: 004, best_acc1: 49.476% +2023-11-22 05:15:53 - epoch 005 lr: 0.000320 +2023-11-22 05:17:51 - train: epoch 0005, iter [00010, 00202], lr: 0.000324, loss: 6.4831 +2023-11-22 05:19:42 - train: epoch 0005, iter [00020, 00202], lr: 0.000328, loss: 5.3985 +2023-11-22 05:21:35 - train: epoch 0005, iter [00030, 00202], lr: 0.000332, loss: 5.4984 +2023-11-22 05:23:27 - train: epoch 0005, iter [00040, 00202], lr: 0.000336, loss: 5.9760 +2023-11-22 05:25:20 - train: epoch 0005, iter [00050, 00202], lr: 0.000340, loss: 6.3219 +2023-11-22 05:27:12 - train: epoch 0005, iter [00060, 00202], lr: 0.000344, loss: 5.4029 +2023-11-22 05:29:04 - train: epoch 0005, iter [00070, 00202], lr: 0.000348, loss: 6.0922 +2023-11-22 05:30:56 - train: epoch 0005, iter [00080, 00202], lr: 0.000352, loss: 5.8990 +2023-11-22 05:32:49 - train: epoch 0005, iter [00090, 00202], lr: 0.000356, loss: 5.3834 +2023-11-22 05:34:42 - train: epoch 0005, iter [00100, 00202], lr: 0.000359, loss: 4.8855 +2023-11-22 05:36:34 - train: epoch 0005, iter [00110, 00202], lr: 0.000363, loss: 6.3994 +2023-11-22 05:38:27 - train: epoch 0005, iter [00120, 00202], lr: 0.000367, loss: 6.1085 +2023-11-22 05:40:20 - train: epoch 0005, iter [00130, 00202], lr: 0.000371, loss: 6.7030 +2023-11-22 05:42:14 - train: epoch 0005, iter [00140, 00202], lr: 0.000375, loss: 5.5463 +2023-11-22 05:44:07 - train: epoch 0005, iter [00150, 00202], lr: 0.000379, loss: 5.8565 +2023-11-22 05:46:00 - train: epoch 0005, iter [00160, 00202], lr: 0.000383, loss: 5.4590 +2023-11-22 05:47:54 - train: epoch 0005, iter [00170, 00202], lr: 0.000387, loss: 6.0318 +2023-11-22 05:49:48 - train: epoch 0005, iter [00180, 00202], lr: 0.000391, loss: 5.8297 +2023-11-22 05:51:44 - train: epoch 0005, iter [00190, 00202], lr: 0.000395, loss: 6.1780 +2023-11-22 05:53:40 - train: epoch 0005, iter [00200, 00202], lr: 0.000399, loss: 5.9561 +2023-11-22 05:54:10 - train: epoch 005, train_loss: 5.8243 +2023-11-22 06:29:16 - eval: epoch: 005, acc1: 53.772%, acc5: 73.786%, test_loss: 2.5398, per_image_load_time: 0.231ms, per_image_inference_time: 3.836ms +2023-11-22 06:29:20 - until epoch: 005, best_acc1: 53.772% +2023-11-22 06:29:20 - epoch 006 lr: 0.000400 +2023-11-22 06:31:19 - train: epoch 0006, iter [00010, 00202], lr: 0.000400, loss: 4.7087 +2023-11-22 06:33:12 - train: epoch 0006, iter [00020, 00202], lr: 0.000400, loss: 6.4674 +2023-11-22 06:35:05 - train: epoch 0006, iter [00030, 00202], lr: 0.000400, loss: 6.5018 +2023-11-22 06:36:58 - train: epoch 0006, iter [00040, 00202], lr: 0.000400, loss: 5.6416 +2023-11-22 06:38:51 - train: epoch 0006, iter [00050, 00202], lr: 0.000400, loss: 6.5711 +2023-11-22 06:40:44 - train: epoch 0006, iter [00060, 00202], lr: 0.000400, loss: 6.1916 +2023-11-22 06:42:38 - train: epoch 0006, iter [00070, 00202], lr: 0.000400, loss: 6.0642 +2023-11-22 06:44:33 - train: epoch 0006, iter [00080, 00202], lr: 0.000400, loss: 5.5042 +2023-11-22 06:46:28 - train: epoch 0006, iter [00090, 00202], lr: 0.000400, loss: 4.7713 +2023-11-22 06:48:24 - train: epoch 0006, iter [00100, 00202], lr: 0.000400, loss: 5.9013 +2023-11-22 06:50:22 - train: epoch 0006, iter [00110, 00202], lr: 0.000400, loss: 6.0658 +2023-11-22 06:52:18 - train: epoch 0006, iter [00120, 00202], lr: 0.000400, loss: 4.8170 +2023-11-22 06:54:15 - train: epoch 0006, iter [00130, 00202], lr: 0.000400, loss: 6.3195 +2023-11-22 06:56:10 - train: epoch 0006, iter [00140, 00202], lr: 0.000400, loss: 6.4043 +2023-11-22 06:58:07 - train: epoch 0006, iter [00150, 00202], lr: 0.000400, loss: 6.0920 +2023-11-22 07:00:02 - train: epoch 0006, iter [00160, 00202], lr: 0.000400, loss: 5.1400 +2023-11-22 07:01:57 - train: epoch 0006, iter [00170, 00202], lr: 0.000400, loss: 5.4553 +2023-11-22 07:03:51 - train: epoch 0006, iter [00180, 00202], lr: 0.000400, loss: 5.8642 +2023-11-22 07:05:46 - train: epoch 0006, iter [00190, 00202], lr: 0.000400, loss: 5.3315 +2023-11-22 07:07:39 - train: epoch 0006, iter [00200, 00202], lr: 0.000400, loss: 5.7905 +2023-11-22 07:08:08 - train: epoch 006, train_loss: 5.6553 +2023-11-22 07:42:27 - eval: epoch: 006, acc1: 57.038%, acc5: 76.188%, test_loss: 2.3831, per_image_load_time: 0.235ms, per_image_inference_time: 3.839ms +2023-11-22 07:42:31 - until epoch: 006, best_acc1: 57.038% +2023-11-22 07:42:31 - epoch 007 lr: 0.000400 +2023-11-22 07:44:32 - train: epoch 0007, iter [00010, 00202], lr: 0.000400, loss: 5.5840 +2023-11-22 07:46:27 - train: epoch 0007, iter [00020, 00202], lr: 0.000400, loss: 5.0067 +2023-11-22 07:48:22 - train: epoch 0007, iter [00030, 00202], lr: 0.000400, loss: 5.1261 +2023-11-22 07:50:18 - train: epoch 0007, iter [00040, 00202], lr: 0.000400, loss: 5.7727 +2023-11-22 07:52:15 - train: epoch 0007, iter [00050, 00202], lr: 0.000400, loss: 5.4935 +2023-11-22 07:54:12 - train: epoch 0007, iter [00060, 00202], lr: 0.000400, loss: 4.5301 +2023-11-22 07:56:09 - train: epoch 0007, iter [00070, 00202], lr: 0.000400, loss: 5.5287 +2023-11-22 07:58:05 - train: epoch 0007, iter [00080, 00202], lr: 0.000400, loss: 4.4336 +2023-11-22 08:00:00 - train: epoch 0007, iter [00090, 00202], lr: 0.000400, loss: 5.9335 +2023-11-22 08:01:54 - train: epoch 0007, iter [00100, 00202], lr: 0.000400, loss: 5.7896 +2023-11-22 08:03:46 - train: epoch 0007, iter [00110, 00202], lr: 0.000400, loss: 5.4567 +2023-11-22 08:05:40 - train: epoch 0007, iter [00120, 00202], lr: 0.000400, loss: 5.8425 +2023-11-22 08:07:32 - train: epoch 0007, iter [00130, 00202], lr: 0.000400, loss: 5.5218 +2023-11-22 08:09:25 - train: epoch 0007, iter [00140, 00202], lr: 0.000400, loss: 6.1714 +2023-11-22 08:11:17 - train: epoch 0007, iter [00150, 00202], lr: 0.000400, loss: 4.4765 +2023-11-22 08:13:09 - train: epoch 0007, iter [00160, 00202], lr: 0.000400, loss: 6.0776 +2023-11-22 08:15:00 - train: epoch 0007, iter [00170, 00202], lr: 0.000400, loss: 5.9434 +2023-11-22 08:16:53 - train: epoch 0007, iter [00180, 00202], lr: 0.000400, loss: 5.4001 +2023-11-22 08:18:45 - train: epoch 0007, iter [00190, 00202], lr: 0.000400, loss: 5.8092 +2023-11-22 08:20:37 - train: epoch 0007, iter [00200, 00202], lr: 0.000400, loss: 4.0949 +2023-11-22 08:21:06 - train: epoch 007, train_loss: 5.4294 +2023-11-22 08:56:27 - eval: epoch: 007, acc1: 59.578%, acc5: 77.909%, test_loss: 2.2129, per_image_load_time: 0.217ms, per_image_inference_time: 3.838ms +2023-11-22 08:56:31 - until epoch: 007, best_acc1: 59.578% +2023-11-22 08:56:31 - epoch 008 lr: 0.000400 +2023-11-22 08:58:29 - train: epoch 0008, iter [00010, 00202], lr: 0.000400, loss: 5.3741 +2023-11-22 09:00:21 - train: epoch 0008, iter [00020, 00202], lr: 0.000400, loss: 5.8767 +2023-11-22 09:02:12 - train: epoch 0008, iter [00030, 00202], lr: 0.000399, loss: 5.0664 +2023-11-22 09:04:02 - train: epoch 0008, iter [00040, 00202], lr: 0.000399, loss: 4.6950 +2023-11-22 09:05:52 - train: epoch 0008, iter [00050, 00202], lr: 0.000399, loss: 4.9891 +2023-11-22 09:07:41 - train: epoch 0008, iter [00060, 00202], lr: 0.000399, loss: 4.7282 +2023-11-22 09:09:32 - train: epoch 0008, iter [00070, 00202], lr: 0.000399, loss: 5.8740 +2023-11-22 09:11:24 - train: epoch 0008, iter [00080, 00202], lr: 0.000399, loss: 5.6255 +2023-11-22 09:13:16 - train: epoch 0008, iter [00090, 00202], lr: 0.000399, loss: 5.4814 +2023-11-22 09:15:08 - train: epoch 0008, iter [00100, 00202], lr: 0.000399, loss: 5.8431 +2023-11-22 09:17:00 - train: epoch 0008, iter [00110, 00202], lr: 0.000399, loss: 4.7934 +2023-11-22 09:18:52 - train: epoch 0008, iter [00120, 00202], lr: 0.000399, loss: 5.8079 +2023-11-22 09:20:44 - train: epoch 0008, iter [00130, 00202], lr: 0.000399, loss: 5.7833 +2023-11-22 09:22:37 - train: epoch 0008, iter [00140, 00202], lr: 0.000399, loss: 5.4198 +2023-11-22 09:24:30 - train: epoch 0008, iter [00150, 00202], lr: 0.000399, loss: 5.2513 +2023-11-22 09:26:23 - train: epoch 0008, iter [00160, 00202], lr: 0.000399, loss: 5.1587 +2023-11-22 09:28:16 - train: epoch 0008, iter [00170, 00202], lr: 0.000399, loss: 5.5381 +2023-11-22 09:30:10 - train: epoch 0008, iter [00180, 00202], lr: 0.000399, loss: 5.4459 +2023-11-22 09:32:05 - train: epoch 0008, iter [00190, 00202], lr: 0.000399, loss: 5.3854 +2023-11-22 09:34:00 - train: epoch 0008, iter [00200, 00202], lr: 0.000399, loss: 5.3052 +2023-11-22 09:34:31 - train: epoch 008, train_loss: 5.4115 +2023-11-22 10:09:27 - eval: epoch: 008, acc1: 61.435%, acc5: 79.327%, test_loss: 2.1330, per_image_load_time: 0.222ms, per_image_inference_time: 3.838ms +2023-11-22 10:09:31 - until epoch: 008, best_acc1: 61.435% +2023-11-22 10:09:31 - epoch 009 lr: 0.000399 +2023-11-22 10:11:27 - train: epoch 0009, iter [00010, 00202], lr: 0.000399, loss: 4.9355 +2023-11-22 10:13:20 - train: epoch 0009, iter [00020, 00202], lr: 0.000399, loss: 4.5243 +2023-11-22 10:15:13 - train: epoch 0009, iter [00030, 00202], lr: 0.000399, loss: 5.9823 +2023-11-22 10:17:06 - train: epoch 0009, iter [00040, 00202], lr: 0.000399, loss: 4.6618 +2023-11-22 10:18:58 - train: epoch 0009, iter [00050, 00202], lr: 0.000399, loss: 5.6253 +2023-11-22 10:20:51 - train: epoch 0009, iter [00060, 00202], lr: 0.000399, loss: 4.9729 +2023-11-22 10:22:43 - train: epoch 0009, iter [00070, 00202], lr: 0.000399, loss: 5.9947 +2023-11-22 10:24:36 - train: epoch 0009, iter [00080, 00202], lr: 0.000399, loss: 6.2859 +2023-11-22 10:26:29 - train: epoch 0009, iter [00090, 00202], lr: 0.000399, loss: 5.7094 +2023-11-22 10:28:22 - train: epoch 0009, iter [00100, 00202], lr: 0.000399, loss: 5.1029 +2023-11-22 10:30:16 - train: epoch 0009, iter [00110, 00202], lr: 0.000399, loss: 5.7691 +2023-11-22 10:32:09 - train: epoch 0009, iter [00120, 00202], lr: 0.000399, loss: 5.7786 +2023-11-22 10:34:02 - train: epoch 0009, iter [00130, 00202], lr: 0.000399, loss: 5.6283 +2023-11-22 10:35:55 - train: epoch 0009, iter [00140, 00202], lr: 0.000399, loss: 5.4064 +2023-11-22 10:37:49 - train: epoch 0009, iter [00150, 00202], lr: 0.000398, loss: 5.0185 +2023-11-22 10:39:42 - train: epoch 0009, iter [00160, 00202], lr: 0.000398, loss: 5.0533 +2023-11-22 10:41:37 - train: epoch 0009, iter [00170, 00202], lr: 0.000398, loss: 5.9021 +2023-11-22 10:43:31 - train: epoch 0009, iter [00180, 00202], lr: 0.000398, loss: 5.7846 +2023-11-22 10:45:25 - train: epoch 0009, iter [00190, 00202], lr: 0.000398, loss: 5.0452 +2023-11-22 10:47:20 - train: epoch 0009, iter [00200, 00202], lr: 0.000398, loss: 5.2134 +2023-11-22 10:47:50 - train: epoch 009, train_loss: 5.3003 +2023-11-22 11:21:47 - eval: epoch: 009, acc1: 63.027%, acc5: 80.445%, test_loss: 1.9973, per_image_load_time: 0.215ms, per_image_inference_time: 3.839ms +2023-11-22 11:21:51 - until epoch: 009, best_acc1: 63.027% +2023-11-22 11:21:51 - epoch 010 lr: 0.000398 +2023-11-22 11:23:48 - train: epoch 0010, iter [00010, 00202], lr: 0.000398, loss: 4.7494 +2023-11-22 11:25:42 - train: epoch 0010, iter [00020, 00202], lr: 0.000398, loss: 4.8285 +2023-11-22 11:27:35 - train: epoch 0010, iter [00030, 00202], lr: 0.000398, loss: 4.7519 +2023-11-22 11:29:28 - train: epoch 0010, iter [00040, 00202], lr: 0.000398, loss: 4.9805 +2023-11-22 11:31:21 - train: epoch 0010, iter [00050, 00202], lr: 0.000398, loss: 4.8305 +2023-11-22 11:33:15 - train: epoch 0010, iter [00060, 00202], lr: 0.000398, loss: 4.8917 +2023-11-22 11:35:09 - train: epoch 0010, iter [00070, 00202], lr: 0.000398, loss: 4.5798 +2023-11-22 11:37:04 - train: epoch 0010, iter [00080, 00202], lr: 0.000398, loss: 5.2824 +2023-11-22 11:38:58 - train: epoch 0010, iter [00090, 00202], lr: 0.000398, loss: 4.2967 +2023-11-22 11:40:54 - train: epoch 0010, iter [00100, 00202], lr: 0.000398, loss: 5.8225 +2023-11-22 11:42:50 - train: epoch 0010, iter [00110, 00202], lr: 0.000398, loss: 5.5025 +2023-11-22 11:44:45 - train: epoch 0010, iter [00120, 00202], lr: 0.000398, loss: 5.6224 +2023-11-22 11:46:40 - train: epoch 0010, iter [00130, 00202], lr: 0.000398, loss: 5.3614 +2023-11-22 11:48:34 - train: epoch 0010, iter [00140, 00202], lr: 0.000398, loss: 5.2868 +2023-11-22 11:50:29 - train: epoch 0010, iter [00150, 00202], lr: 0.000398, loss: 4.2504 +2023-11-22 11:52:23 - train: epoch 0010, iter [00160, 00202], lr: 0.000398, loss: 4.6900 +2023-11-22 11:54:16 - train: epoch 0010, iter [00170, 00202], lr: 0.000397, loss: 4.6653 +2023-11-22 11:56:10 - train: epoch 0010, iter [00180, 00202], lr: 0.000397, loss: 4.6096 +2023-11-22 11:58:02 - train: epoch 0010, iter [00190, 00202], lr: 0.000397, loss: 5.0165 +2023-11-22 11:59:55 - train: epoch 0010, iter [00200, 00202], lr: 0.000397, loss: 5.2549 +2023-11-22 12:00:25 - train: epoch 010, train_loss: 5.0848 +2023-11-22 12:34:55 - eval: epoch: 010, acc1: 64.293%, acc5: 81.404%, test_loss: 1.8947, per_image_load_time: 0.211ms, per_image_inference_time: 3.839ms +2023-11-22 12:34:58 - until epoch: 010, best_acc1: 64.293% +2023-11-22 12:34:58 - epoch 011 lr: 0.000397 +2023-11-22 12:36:58 - train: epoch 0011, iter [00010, 00202], lr: 0.000397, loss: 4.9921 +2023-11-22 12:38:52 - train: epoch 0011, iter [00020, 00202], lr: 0.000397, loss: 5.6256 +2023-11-22 12:40:46 - train: epoch 0011, iter [00030, 00202], lr: 0.000397, loss: 5.0443 +2023-11-22 12:42:39 - train: epoch 0011, iter [00040, 00202], lr: 0.000397, loss: 4.8049 +2023-11-22 12:44:33 - train: epoch 0011, iter [00050, 00202], lr: 0.000397, loss: 4.4654 +2023-11-22 12:46:27 - train: epoch 0011, iter [00060, 00202], lr: 0.000397, loss: 4.7788 +2023-11-22 12:48:20 - train: epoch 0011, iter [00070, 00202], lr: 0.000397, loss: 4.5020 +2023-11-22 12:50:14 - train: epoch 0011, iter [00080, 00202], lr: 0.000397, loss: 5.5528 +2023-11-22 12:52:08 - train: epoch 0011, iter [00090, 00202], lr: 0.000397, loss: 5.9098 +2023-11-22 12:54:01 - train: epoch 0011, iter [00100, 00202], lr: 0.000397, loss: 5.3338 +2023-11-22 12:55:54 - train: epoch 0011, iter [00110, 00202], lr: 0.000397, loss: 5.0094 +2023-11-22 12:57:48 - train: epoch 0011, iter [00120, 00202], lr: 0.000397, loss: 4.8536 +2023-11-22 12:59:41 - train: epoch 0011, iter [00130, 00202], lr: 0.000397, loss: 4.6733 +2023-11-22 13:01:35 - train: epoch 0011, iter [00140, 00202], lr: 0.000396, loss: 5.2641 +2023-11-22 13:03:29 - train: epoch 0011, iter [00150, 00202], lr: 0.000396, loss: 4.8896 +2023-11-22 13:05:22 - train: epoch 0011, iter [00160, 00202], lr: 0.000396, loss: 4.9011 +2023-11-22 13:07:16 - train: epoch 0011, iter [00170, 00202], lr: 0.000396, loss: 6.2786 +2023-11-22 13:09:10 - train: epoch 0011, iter [00180, 00202], lr: 0.000396, loss: 5.2551 +2023-11-22 13:11:03 - train: epoch 0011, iter [00190, 00202], lr: 0.000396, loss: 4.9868 +2023-11-22 13:12:57 - train: epoch 0011, iter [00200, 00202], lr: 0.000396, loss: 5.2337 +2023-11-22 13:13:27 - train: epoch 011, train_loss: 5.0719 +2023-11-22 13:47:28 - eval: epoch: 011, acc1: 65.395%, acc5: 82.342%, test_loss: 1.8692, per_image_load_time: 0.218ms, per_image_inference_time: 3.842ms +2023-11-22 13:47:32 - until epoch: 011, best_acc1: 65.395% +2023-11-22 13:47:32 - epoch 012 lr: 0.000396 +2023-11-22 13:49:27 - train: epoch 0012, iter [00010, 00202], lr: 0.000396, loss: 4.7258 +2023-11-22 13:51:17 - train: epoch 0012, iter [00020, 00202], lr: 0.000396, loss: 5.7316 +2023-11-22 13:53:07 - train: epoch 0012, iter [00030, 00202], lr: 0.000396, loss: 5.3367 +2023-11-22 13:54:59 - train: epoch 0012, iter [00040, 00202], lr: 0.000396, loss: 5.0926 +2023-11-22 13:56:48 - train: epoch 0012, iter [00050, 00202], lr: 0.000396, loss: 4.9713 +2023-11-22 13:58:38 - train: epoch 0012, iter [00060, 00202], lr: 0.000396, loss: 4.7966 +2023-11-22 14:00:27 - train: epoch 0012, iter [00070, 00202], lr: 0.000396, loss: 5.2652 +2023-11-22 14:02:16 - train: epoch 0012, iter [00080, 00202], lr: 0.000396, loss: 5.1390 +2023-11-22 14:04:06 - train: epoch 0012, iter [00090, 00202], lr: 0.000395, loss: 5.1741 +2023-11-22 14:05:55 - train: epoch 0012, iter [00100, 00202], lr: 0.000395, loss: 5.1086 +2023-11-22 14:07:45 - train: epoch 0012, iter [00110, 00202], lr: 0.000395, loss: 4.1675 +2023-11-22 14:09:35 - train: epoch 0012, iter [00120, 00202], lr: 0.000395, loss: 5.3634 +2023-11-22 14:11:26 - train: epoch 0012, iter [00130, 00202], lr: 0.000395, loss: 4.6445 +2023-11-22 14:13:16 - train: epoch 0012, iter [00140, 00202], lr: 0.000395, loss: 5.5690 +2023-11-22 14:15:07 - train: epoch 0012, iter [00150, 00202], lr: 0.000395, loss: 5.4362 +2023-11-22 14:16:58 - train: epoch 0012, iter [00160, 00202], lr: 0.000395, loss: 4.2610 +2023-11-22 14:18:48 - train: epoch 0012, iter [00170, 00202], lr: 0.000395, loss: 4.8138 +2023-11-22 14:20:39 - train: epoch 0012, iter [00180, 00202], lr: 0.000395, loss: 4.1058 +2023-11-22 14:22:29 - train: epoch 0012, iter [00190, 00202], lr: 0.000395, loss: 4.1798 +2023-11-22 14:24:20 - train: epoch 0012, iter [00200, 00202], lr: 0.000395, loss: 4.0498 +2023-11-22 14:24:49 - train: epoch 012, train_loss: 4.9684 +2023-11-22 14:58:34 - eval: epoch: 012, acc1: 66.404%, acc5: 83.116%, test_loss: 1.7896, per_image_load_time: 0.224ms, per_image_inference_time: 3.844ms +2023-11-22 14:58:38 - until epoch: 012, best_acc1: 66.404% +2023-11-22 14:58:38 - epoch 013 lr: 0.000395 +2023-11-22 15:00:34 - train: epoch 0013, iter [00010, 00202], lr: 0.000395, loss: 4.5613 +2023-11-22 15:02:25 - train: epoch 0013, iter [00020, 00202], lr: 0.000395, loss: 4.9647 +2023-11-22 15:04:15 - train: epoch 0013, iter [00030, 00202], lr: 0.000394, loss: 4.0863 +2023-11-22 15:06:06 - train: epoch 0013, iter [00040, 00202], lr: 0.000394, loss: 4.9203 +2023-11-22 15:07:56 - train: epoch 0013, iter [00050, 00202], lr: 0.000394, loss: 5.1837 +2023-11-22 15:09:46 - train: epoch 0013, iter [00060, 00202], lr: 0.000394, loss: 5.1055 +2023-11-22 15:11:37 - train: epoch 0013, iter [00070, 00202], lr: 0.000394, loss: 4.5601 +2023-11-22 15:13:29 - train: epoch 0013, iter [00080, 00202], lr: 0.000394, loss: 5.2450 +2023-11-22 15:15:21 - train: epoch 0013, iter [00090, 00202], lr: 0.000394, loss: 5.1812 +2023-11-22 15:17:13 - train: epoch 0013, iter [00100, 00202], lr: 0.000394, loss: 4.9697 +2023-11-22 15:19:05 - train: epoch 0013, iter [00110, 00202], lr: 0.000394, loss: 4.6937 +2023-11-22 15:20:58 - train: epoch 0013, iter [00120, 00202], lr: 0.000394, loss: 5.0795 +2023-11-22 15:22:50 - train: epoch 0013, iter [00130, 00202], lr: 0.000394, loss: 4.8294 +2023-11-22 15:24:42 - train: epoch 0013, iter [00140, 00202], lr: 0.000394, loss: 4.7139 +2023-11-22 15:26:33 - train: epoch 0013, iter [00150, 00202], lr: 0.000394, loss: 5.5100 +2023-11-22 15:28:24 - train: epoch 0013, iter [00160, 00202], lr: 0.000393, loss: 5.2752 +2023-11-22 15:30:15 - train: epoch 0013, iter [00170, 00202], lr: 0.000393, loss: 5.0098 +2023-11-22 15:32:06 - train: epoch 0013, iter [00180, 00202], lr: 0.000393, loss: 4.6072 +2023-11-22 15:33:58 - train: epoch 0013, iter [00190, 00202], lr: 0.000393, loss: 4.5703 +2023-11-22 15:35:49 - train: epoch 0013, iter [00200, 00202], lr: 0.000393, loss: 4.2398 +2023-11-22 15:36:19 - train: epoch 013, train_loss: 4.8910 +2023-11-22 16:10:26 - eval: epoch: 013, acc1: 67.460%, acc5: 83.885%, test_loss: 1.7206, per_image_load_time: 0.225ms, per_image_inference_time: 3.845ms +2023-11-22 16:10:30 - until epoch: 013, best_acc1: 67.460% +2023-11-22 16:10:30 - epoch 014 lr: 0.000393 +2023-11-22 16:12:27 - train: epoch 0014, iter [00010, 00202], lr: 0.000393, loss: 5.3703 +2023-11-22 16:14:19 - train: epoch 0014, iter [00020, 00202], lr: 0.000393, loss: 5.2729 +2023-11-22 16:16:11 - train: epoch 0014, iter [00030, 00202], lr: 0.000393, loss: 5.9220 +2023-11-22 16:18:02 - train: epoch 0014, iter [00040, 00202], lr: 0.000393, loss: 3.9813 +2023-11-22 16:19:54 - train: epoch 0014, iter [00050, 00202], lr: 0.000393, loss: 3.9215 +2023-11-22 16:21:45 - train: epoch 0014, iter [00060, 00202], lr: 0.000393, loss: 4.6232 +2023-11-22 16:23:39 - train: epoch 0014, iter [00070, 00202], lr: 0.000392, loss: 3.9958 +2023-11-22 16:25:31 - train: epoch 0014, iter [00080, 00202], lr: 0.000392, loss: 4.9685 +2023-11-22 16:27:23 - train: epoch 0014, iter [00090, 00202], lr: 0.000392, loss: 5.0720 +2023-11-22 16:29:17 - train: epoch 0014, iter [00100, 00202], lr: 0.000392, loss: 5.0289 +2023-11-22 16:31:09 - train: epoch 0014, iter [00110, 00202], lr: 0.000392, loss: 5.4278 +2023-11-22 16:33:02 - train: epoch 0014, iter [00120, 00202], lr: 0.000392, loss: 5.0794 +2023-11-22 16:34:54 - train: epoch 0014, iter [00130, 00202], lr: 0.000392, loss: 5.6947 +2023-11-22 16:36:47 - train: epoch 0014, iter [00140, 00202], lr: 0.000392, loss: 4.5258 +2023-11-22 16:38:39 - train: epoch 0014, iter [00150, 00202], lr: 0.000392, loss: 5.3323 +2023-11-22 16:40:32 - train: epoch 0014, iter [00160, 00202], lr: 0.000392, loss: 4.3599 +2023-11-22 16:42:24 - train: epoch 0014, iter [00170, 00202], lr: 0.000392, loss: 5.3066 +2023-11-22 16:44:16 - train: epoch 0014, iter [00180, 00202], lr: 0.000391, loss: 5.4494 +2023-11-22 16:46:08 - train: epoch 0014, iter [00190, 00202], lr: 0.000391, loss: 5.4058 +2023-11-22 16:47:58 - train: epoch 0014, iter [00200, 00202], lr: 0.000391, loss: 5.5932 +2023-11-22 16:48:27 - train: epoch 014, train_loss: 4.9956 +2023-11-22 17:22:44 - eval: epoch: 014, acc1: 68.175%, acc5: 84.629%, test_loss: 1.6762, per_image_load_time: 0.213ms, per_image_inference_time: 3.844ms +2023-11-22 17:22:47 - until epoch: 014, best_acc1: 68.175% +2023-11-22 17:22:47 - epoch 015 lr: 0.000391 +2023-11-22 17:24:43 - train: epoch 0015, iter [00010, 00202], lr: 0.000391, loss: 5.3656 +2023-11-22 17:26:33 - train: epoch 0015, iter [00020, 00202], lr: 0.000391, loss: 5.2800 +2023-11-22 17:28:23 - train: epoch 0015, iter [00030, 00202], lr: 0.000391, loss: 4.8936 +2023-11-22 17:30:13 - train: epoch 0015, iter [00040, 00202], lr: 0.000391, loss: 5.2550 +2023-11-22 17:32:04 - train: epoch 0015, iter [00050, 00202], lr: 0.000391, loss: 5.0746 +2023-11-22 17:33:55 - train: epoch 0015, iter [00060, 00202], lr: 0.000391, loss: 5.0758 +2023-11-22 17:35:47 - train: epoch 0015, iter [00070, 00202], lr: 0.000391, loss: 5.4085 +2023-11-22 17:37:41 - train: epoch 0015, iter [00080, 00202], lr: 0.000390, loss: 5.2197 +2023-11-22 17:39:35 - train: epoch 0015, iter [00090, 00202], lr: 0.000390, loss: 4.8465 +2023-11-22 17:41:28 - train: epoch 0015, iter [00100, 00202], lr: 0.000390, loss: 4.6368 +2023-11-22 17:43:22 - train: epoch 0015, iter [00110, 00202], lr: 0.000390, loss: 4.1355 +2023-11-22 17:45:15 - train: epoch 0015, iter [00120, 00202], lr: 0.000390, loss: 4.5754 +2023-11-22 17:47:08 - train: epoch 0015, iter [00130, 00202], lr: 0.000390, loss: 4.8489 +2023-11-22 17:49:00 - train: epoch 0015, iter [00140, 00202], lr: 0.000390, loss: 4.7010 +2023-11-22 17:50:53 - train: epoch 0015, iter [00150, 00202], lr: 0.000390, loss: 4.6939 +2023-11-22 17:52:46 - train: epoch 0015, iter [00160, 00202], lr: 0.000390, loss: 5.0458 +2023-11-22 17:54:38 - train: epoch 0015, iter [00170, 00202], lr: 0.000390, loss: 5.7891 +2023-11-22 17:56:31 - train: epoch 0015, iter [00180, 00202], lr: 0.000389, loss: 4.8810 +2023-11-22 17:58:23 - train: epoch 0015, iter [00190, 00202], lr: 0.000389, loss: 4.7764 +2023-11-22 18:00:15 - train: epoch 0015, iter [00200, 00202], lr: 0.000389, loss: 3.9495 +2023-11-22 18:00:44 - train: epoch 015, train_loss: 4.8338 +2023-11-22 18:35:11 - eval: epoch: 015, acc1: 68.995%, acc5: 85.301%, test_loss: 1.5995, per_image_load_time: 0.222ms, per_image_inference_time: 3.845ms +2023-11-22 18:35:14 - until epoch: 015, best_acc1: 68.995% +2023-11-22 18:35:14 - epoch 016 lr: 0.000389 +2023-11-22 18:37:06 - train: epoch 0016, iter [00010, 00202], lr: 0.000389, loss: 5.0282 +2023-11-22 18:38:54 - train: epoch 0016, iter [00020, 00202], lr: 0.000389, loss: 5.0883 +2023-11-22 18:40:43 - train: epoch 0016, iter [00030, 00202], lr: 0.000389, loss: 4.3314 +2023-11-22 18:42:32 - train: epoch 0016, iter [00040, 00202], lr: 0.000389, loss: 5.2194 +2023-11-22 18:44:22 - train: epoch 0016, iter [00050, 00202], lr: 0.000389, loss: 4.9467 +2023-11-22 18:46:12 - train: epoch 0016, iter [00060, 00202], lr: 0.000389, loss: 5.3656 +2023-11-22 18:48:04 - train: epoch 0016, iter [00070, 00202], lr: 0.000388, loss: 5.0079 +2023-11-22 18:49:55 - train: epoch 0016, iter [00080, 00202], lr: 0.000388, loss: 4.7928 +2023-11-22 18:51:47 - train: epoch 0016, iter [00090, 00202], lr: 0.000388, loss: 4.8531 +2023-11-22 18:53:40 - train: epoch 0016, iter [00100, 00202], lr: 0.000388, loss: 5.1956 +2023-11-22 18:55:34 - train: epoch 0016, iter [00110, 00202], lr: 0.000388, loss: 5.0961 +2023-11-22 18:57:28 - train: epoch 0016, iter [00120, 00202], lr: 0.000388, loss: 4.6802 +2023-11-22 18:59:22 - train: epoch 0016, iter [00130, 00202], lr: 0.000388, loss: 5.0232 +2023-11-22 19:01:16 - train: epoch 0016, iter [00140, 00202], lr: 0.000388, loss: 5.0244 +2023-11-22 19:03:10 - train: epoch 0016, iter [00150, 00202], lr: 0.000388, loss: 4.6463 +2023-11-22 19:05:02 - train: epoch 0016, iter [00160, 00202], lr: 0.000387, loss: 4.5146 +2023-11-22 19:06:53 - train: epoch 0016, iter [00170, 00202], lr: 0.000387, loss: 4.7957 +2023-11-22 19:08:44 - train: epoch 0016, iter [00180, 00202], lr: 0.000387, loss: 4.4575 +2023-11-22 19:10:35 - train: epoch 0016, iter [00190, 00202], lr: 0.000387, loss: 4.8960 +2023-11-22 19:12:25 - train: epoch 0016, iter [00200, 00202], lr: 0.000387, loss: 5.4729 +2023-11-22 19:12:54 - train: epoch 016, train_loss: 4.7963 +2023-11-22 19:46:41 - eval: epoch: 016, acc1: 69.785%, acc5: 85.905%, test_loss: 1.5614, per_image_load_time: 0.235ms, per_image_inference_time: 3.845ms +2023-11-22 19:46:45 - until epoch: 016, best_acc1: 69.785% +2023-11-22 19:46:45 - epoch 017 lr: 0.000387 +2023-11-22 19:48:41 - train: epoch 0017, iter [00010, 00202], lr: 0.000387, loss: 4.9941 +2023-11-22 19:50:33 - train: epoch 0017, iter [00020, 00202], lr: 0.000387, loss: 4.6029 +2023-11-22 19:52:24 - train: epoch 0017, iter [00030, 00202], lr: 0.000387, loss: 4.9656 +2023-11-22 19:54:14 - train: epoch 0017, iter [00040, 00202], lr: 0.000386, loss: 4.7698 +2023-11-22 19:56:04 - train: epoch 0017, iter [00050, 00202], lr: 0.000386, loss: 4.7384 +2023-11-22 19:57:52 - train: epoch 0017, iter [00060, 00202], lr: 0.000386, loss: 5.1314 +2023-11-22 19:59:42 - train: epoch 0017, iter [00070, 00202], lr: 0.000386, loss: 5.0753 +2023-11-22 20:01:31 - train: epoch 0017, iter [00080, 00202], lr: 0.000386, loss: 4.8157 +2023-11-22 20:03:21 - train: epoch 0017, iter [00090, 00202], lr: 0.000386, loss: 5.3395 +2023-11-22 20:05:11 - train: epoch 0017, iter [00100, 00202], lr: 0.000386, loss: 5.1190 +2023-11-22 20:07:01 - train: epoch 0017, iter [00110, 00202], lr: 0.000386, loss: 5.5174 +2023-11-22 20:08:50 - train: epoch 0017, iter [00120, 00202], lr: 0.000386, loss: 4.4824 +2023-11-22 20:10:40 - train: epoch 0017, iter [00130, 00202], lr: 0.000385, loss: 4.5973 +2023-11-22 20:12:31 - train: epoch 0017, iter [00140, 00202], lr: 0.000385, loss: 4.9188 +2023-11-22 20:14:22 - train: epoch 0017, iter [00150, 00202], lr: 0.000385, loss: 5.0271 +2023-11-22 20:16:15 - train: epoch 0017, iter [00160, 00202], lr: 0.000385, loss: 4.0096 +2023-11-22 20:18:09 - train: epoch 0017, iter [00170, 00202], lr: 0.000385, loss: 4.5300 +2023-11-22 20:20:03 - train: epoch 0017, iter [00180, 00202], lr: 0.000385, loss: 4.3315 +2023-11-22 20:21:56 - train: epoch 0017, iter [00190, 00202], lr: 0.000385, loss: 3.4605 +2023-11-22 20:23:49 - train: epoch 0017, iter [00200, 00202], lr: 0.000385, loss: 4.9901 +2023-11-22 20:24:19 - train: epoch 017, train_loss: 4.7853 +2023-11-22 20:57:42 - eval: epoch: 017, acc1: 70.493%, acc5: 86.551%, test_loss: 1.5091, per_image_load_time: 0.229ms, per_image_inference_time: 3.844ms +2023-11-22 20:57:46 - until epoch: 017, best_acc1: 70.493% +2023-11-22 20:57:46 - epoch 018 lr: 0.000385 +2023-11-22 20:59:41 - train: epoch 0018, iter [00010, 00202], lr: 0.000384, loss: 4.5861 +2023-11-22 21:01:32 - train: epoch 0018, iter [00020, 00202], lr: 0.000384, loss: 4.8335 +2023-11-22 21:03:25 - train: epoch 0018, iter [00030, 00202], lr: 0.000384, loss: 3.7454 +2023-11-22 21:05:18 - train: epoch 0018, iter [00040, 00202], lr: 0.000384, loss: 3.9247 +2023-11-22 21:07:11 - train: epoch 0018, iter [00050, 00202], lr: 0.000384, loss: 5.2106 +2023-11-22 21:09:05 - train: epoch 0018, iter [00060, 00202], lr: 0.000384, loss: 4.8730 +2023-11-22 21:10:58 - train: epoch 0018, iter [00070, 00202], lr: 0.000384, loss: 4.9660 +2023-11-22 21:12:51 - train: epoch 0018, iter [00080, 00202], lr: 0.000383, loss: 4.7375 +2023-11-22 21:14:44 - train: epoch 0018, iter [00090, 00202], lr: 0.000383, loss: 4.6112 +2023-11-22 21:16:36 - train: epoch 0018, iter [00100, 00202], lr: 0.000383, loss: 4.8674 +2023-11-22 21:18:28 - train: epoch 0018, iter [00110, 00202], lr: 0.000383, loss: 3.9316 +2023-11-22 21:20:20 - train: epoch 0018, iter [00120, 00202], lr: 0.000383, loss: 4.7810 +2023-11-22 21:22:13 - train: epoch 0018, iter [00130, 00202], lr: 0.000383, loss: 4.3179 +2023-11-22 21:24:05 - train: epoch 0018, iter [00140, 00202], lr: 0.000383, loss: 4.4323 +2023-11-22 21:25:57 - train: epoch 0018, iter [00150, 00202], lr: 0.000383, loss: 4.7283 +2023-11-22 21:27:49 - train: epoch 0018, iter [00160, 00202], lr: 0.000382, loss: 4.6010 +2023-11-22 21:29:40 - train: epoch 0018, iter [00170, 00202], lr: 0.000382, loss: 4.9855 +2023-11-22 21:31:32 - train: epoch 0018, iter [00180, 00202], lr: 0.000382, loss: 4.9723 +2023-11-22 21:33:24 - train: epoch 0018, iter [00190, 00202], lr: 0.000382, loss: 4.2612 +2023-11-22 21:35:16 - train: epoch 0018, iter [00200, 00202], lr: 0.000382, loss: 4.5527 +2023-11-22 21:35:45 - train: epoch 018, train_loss: 4.6909 +2023-11-22 22:08:55 - eval: epoch: 018, acc1: 71.119%, acc5: 87.144%, test_loss: 1.4624, per_image_load_time: 0.240ms, per_image_inference_time: 3.842ms +2023-11-22 22:08:59 - until epoch: 018, best_acc1: 71.119% +2023-11-22 22:08:59 - epoch 019 lr: 0.000382 +2023-11-22 22:10:58 - train: epoch 0019, iter [00010, 00202], lr: 0.000382, loss: 4.4245 +2023-11-22 22:12:52 - train: epoch 0019, iter [00020, 00202], lr: 0.000382, loss: 4.9057 +2023-11-22 22:14:48 - train: epoch 0019, iter [00030, 00202], lr: 0.000381, loss: 5.6750 +2023-11-22 22:16:42 - train: epoch 0019, iter [00040, 00202], lr: 0.000381, loss: 4.3242 +2023-11-22 22:18:37 - train: epoch 0019, iter [00050, 00202], lr: 0.000381, loss: 4.8419 +2023-11-22 22:20:31 - train: epoch 0019, iter [00060, 00202], lr: 0.000381, loss: 4.5894 +2023-11-22 22:22:25 - train: epoch 0019, iter [00070, 00202], lr: 0.000381, loss: 4.7908 +2023-11-22 22:24:19 - train: epoch 0019, iter [00080, 00202], lr: 0.000381, loss: 4.7345 +2023-11-22 22:26:12 - train: epoch 0019, iter [00090, 00202], lr: 0.000381, loss: 4.1553 +2023-11-22 22:28:05 - train: epoch 0019, iter [00100, 00202], lr: 0.000380, loss: 4.8031 +2023-11-22 22:29:56 - train: epoch 0019, iter [00110, 00202], lr: 0.000380, loss: 4.4443 +2023-11-22 22:31:48 - train: epoch 0019, iter [00120, 00202], lr: 0.000380, loss: 4.1385 +2023-11-22 22:33:39 - train: epoch 0019, iter [00130, 00202], lr: 0.000380, loss: 4.8681 +2023-11-22 22:35:29 - train: epoch 0019, iter [00140, 00202], lr: 0.000380, loss: 4.8243 +2023-11-22 22:37:18 - train: epoch 0019, iter [00150, 00202], lr: 0.000380, loss: 5.4185 +2023-11-22 22:39:09 - train: epoch 0019, iter [00160, 00202], lr: 0.000380, loss: 4.7980 +2023-11-22 22:40:58 - train: epoch 0019, iter [00170, 00202], lr: 0.000379, loss: 4.5984 +2023-11-22 22:42:48 - train: epoch 0019, iter [00180, 00202], lr: 0.000379, loss: 4.9139 +2023-11-22 22:44:38 - train: epoch 0019, iter [00190, 00202], lr: 0.000379, loss: 3.8700 +2023-11-22 22:46:29 - train: epoch 0019, iter [00200, 00202], lr: 0.000379, loss: 4.9952 +2023-11-22 22:46:58 - train: epoch 019, train_loss: 4.7268 +2023-11-22 23:21:09 - eval: epoch: 019, acc1: 71.881%, acc5: 87.760%, test_loss: 1.4336, per_image_load_time: 0.234ms, per_image_inference_time: 3.842ms +2023-11-22 23:21:12 - until epoch: 019, best_acc1: 71.881% +2023-11-22 23:21:12 - epoch 020 lr: 0.000379 +2023-11-22 23:23:09 - train: epoch 0020, iter [00010, 00202], lr: 0.000379, loss: 4.1262 +2023-11-22 23:25:00 - train: epoch 0020, iter [00020, 00202], lr: 0.000379, loss: 4.5174 +2023-11-22 23:26:52 - train: epoch 0020, iter [00030, 00202], lr: 0.000379, loss: 4.4122 +2023-11-22 23:28:43 - train: epoch 0020, iter [00040, 00202], lr: 0.000378, loss: 5.3884 +2023-11-22 23:30:34 - train: epoch 0020, iter [00050, 00202], lr: 0.000378, loss: 4.5550 +2023-11-22 23:32:25 - train: epoch 0020, iter [00060, 00202], lr: 0.000378, loss: 3.9343 +2023-11-22 23:34:16 - train: epoch 0020, iter [00070, 00202], lr: 0.000378, loss: 4.1234 +2023-11-22 23:36:06 - train: epoch 0020, iter [00080, 00202], lr: 0.000378, loss: 4.3258 +2023-11-22 23:37:57 - train: epoch 0020, iter [00090, 00202], lr: 0.000378, loss: 4.4263 +2023-11-22 23:39:49 - train: epoch 0020, iter [00100, 00202], lr: 0.000378, loss: 3.4887 +2023-11-22 23:41:41 - train: epoch 0020, iter [00110, 00202], lr: 0.000377, loss: 4.8291 +2023-11-22 23:43:33 - train: epoch 0020, iter [00120, 00202], lr: 0.000377, loss: 4.8350 +2023-11-22 23:45:24 - train: epoch 0020, iter [00130, 00202], lr: 0.000377, loss: 4.6617 +2023-11-22 23:47:15 - train: epoch 0020, iter [00140, 00202], lr: 0.000377, loss: 4.8820 +2023-11-22 23:49:05 - train: epoch 0020, iter [00150, 00202], lr: 0.000377, loss: 4.8075 +2023-11-22 23:50:56 - train: epoch 0020, iter [00160, 00202], lr: 0.000377, loss: 4.4725 +2023-11-22 23:52:47 - train: epoch 0020, iter [00170, 00202], lr: 0.000376, loss: 4.6922 +2023-11-22 23:54:37 - train: epoch 0020, iter [00180, 00202], lr: 0.000376, loss: 4.6932 +2023-11-22 23:56:27 - train: epoch 0020, iter [00190, 00202], lr: 0.000376, loss: 4.4182 +2023-11-22 23:58:19 - train: epoch 0020, iter [00200, 00202], lr: 0.000376, loss: 4.9975 +2023-11-22 23:58:48 - train: epoch 020, train_loss: 4.5548 +2023-11-23 00:31:49 - eval: epoch: 020, acc1: 72.460%, acc5: 88.284%, test_loss: 1.3814, per_image_load_time: 0.244ms, per_image_inference_time: 3.843ms +2023-11-23 00:31:53 - until epoch: 020, best_acc1: 72.460% +2023-11-23 00:31:53 - epoch 021 lr: 0.000376 +2023-11-23 00:33:48 - train: epoch 0021, iter [00010, 00202], lr: 0.000376, loss: 5.2661 +2023-11-23 00:35:40 - train: epoch 0021, iter [00020, 00202], lr: 0.000376, loss: 5.2245 +2023-11-23 00:37:31 - train: epoch 0021, iter [00030, 00202], lr: 0.000375, loss: 4.5684 +2023-11-23 00:39:21 - train: epoch 0021, iter [00040, 00202], lr: 0.000375, loss: 3.7394 +2023-11-23 00:41:11 - train: epoch 0021, iter [00050, 00202], lr: 0.000375, loss: 4.4467 +2023-11-23 00:43:02 - train: epoch 0021, iter [00060, 00202], lr: 0.000375, loss: 3.6855 +2023-11-23 00:44:53 - train: epoch 0021, iter [00070, 00202], lr: 0.000375, loss: 4.6090 +2023-11-23 00:46:44 - train: epoch 0021, iter [00080, 00202], lr: 0.000375, loss: 4.9590 +2023-11-23 00:48:35 - train: epoch 0021, iter [00090, 00202], lr: 0.000375, loss: 3.6787 +2023-11-23 00:50:26 - train: epoch 0021, iter [00100, 00202], lr: 0.000374, loss: 4.5188 +2023-11-23 00:52:16 - train: epoch 0021, iter [00110, 00202], lr: 0.000374, loss: 4.6228 +2023-11-23 00:54:07 - train: epoch 0021, iter [00120, 00202], lr: 0.000374, loss: 4.5623 +2023-11-23 00:55:57 - train: epoch 0021, iter [00130, 00202], lr: 0.000374, loss: 4.2376 +2023-11-23 00:57:47 - train: epoch 0021, iter [00140, 00202], lr: 0.000374, loss: 4.0465 +2023-11-23 00:59:37 - train: epoch 0021, iter [00150, 00202], lr: 0.000374, loss: 4.4260 +2023-11-23 01:01:28 - train: epoch 0021, iter [00160, 00202], lr: 0.000373, loss: 5.2502 +2023-11-23 01:03:19 - train: epoch 0021, iter [00170, 00202], lr: 0.000373, loss: 4.9786 +2023-11-23 01:05:10 - train: epoch 0021, iter [00180, 00202], lr: 0.000373, loss: 4.2707 +2023-11-23 01:07:00 - train: epoch 0021, iter [00190, 00202], lr: 0.000373, loss: 4.8019 +2023-11-23 01:08:51 - train: epoch 0021, iter [00200, 00202], lr: 0.000373, loss: 5.0567 +2023-11-23 01:09:20 - train: epoch 021, train_loss: 4.5316 +2023-11-23 01:42:04 - eval: epoch: 021, acc1: 73.149%, acc5: 88.853%, test_loss: 1.3301, per_image_load_time: 0.260ms, per_image_inference_time: 3.845ms +2023-11-23 01:42:08 - until epoch: 021, best_acc1: 73.149% +2023-11-23 01:42:08 - epoch 022 lr: 0.000373 +2023-11-23 01:44:02 - train: epoch 0022, iter [00010, 00202], lr: 0.000373, loss: 4.5453 +2023-11-23 01:45:53 - train: epoch 0022, iter [00020, 00202], lr: 0.000372, loss: 4.1979 +2023-11-23 01:47:44 - train: epoch 0022, iter [00030, 00202], lr: 0.000372, loss: 4.1731 +2023-11-23 01:49:34 - train: epoch 0022, iter [00040, 00202], lr: 0.000372, loss: 5.0176 +2023-11-23 01:51:25 - train: epoch 0022, iter [00050, 00202], lr: 0.000372, loss: 4.9011 +2023-11-23 01:53:15 - train: epoch 0022, iter [00060, 00202], lr: 0.000372, loss: 4.4091 +2023-11-23 01:55:06 - train: epoch 0022, iter [00070, 00202], lr: 0.000372, loss: 4.5202 +2023-11-23 01:56:55 - train: epoch 0022, iter [00080, 00202], lr: 0.000371, loss: 5.3700 +2023-11-23 01:58:45 - train: epoch 0022, iter [00090, 00202], lr: 0.000371, loss: 4.9235 +2023-11-23 02:00:35 - train: epoch 0022, iter [00100, 00202], lr: 0.000371, loss: 3.3750 +2023-11-23 02:02:26 - train: epoch 0022, iter [00110, 00202], lr: 0.000371, loss: 4.4208 +2023-11-23 02:04:16 - train: epoch 0022, iter [00120, 00202], lr: 0.000371, loss: 4.3016 +2023-11-23 02:06:06 - train: epoch 0022, iter [00130, 00202], lr: 0.000371, loss: 5.5236 +2023-11-23 02:07:56 - train: epoch 0022, iter [00140, 00202], lr: 0.000370, loss: 4.5857 +2023-11-23 02:09:47 - train: epoch 0022, iter [00150, 00202], lr: 0.000370, loss: 4.1732 +2023-11-23 02:11:37 - train: epoch 0022, iter [00160, 00202], lr: 0.000370, loss: 4.9649 +2023-11-23 02:13:27 - train: epoch 0022, iter [00170, 00202], lr: 0.000370, loss: 4.4104 +2023-11-23 02:15:17 - train: epoch 0022, iter [00180, 00202], lr: 0.000370, loss: 4.8065 +2023-11-23 02:17:08 - train: epoch 0022, iter [00190, 00202], lr: 0.000370, loss: 4.7726 +2023-11-23 02:18:58 - train: epoch 0022, iter [00200, 00202], lr: 0.000369, loss: 4.4884 +2023-11-23 02:19:27 - train: epoch 022, train_loss: 4.6210 +2023-11-23 02:52:12 - eval: epoch: 022, acc1: 73.804%, acc5: 89.427%, test_loss: 1.2882, per_image_load_time: 0.255ms, per_image_inference_time: 3.843ms +2023-11-23 02:52:16 - until epoch: 022, best_acc1: 73.804% +2023-11-23 02:52:16 - epoch 023 lr: 0.000369 +2023-11-23 02:54:10 - train: epoch 0023, iter [00010, 00202], lr: 0.000369, loss: 4.2295 +2023-11-23 02:56:00 - train: epoch 0023, iter [00020, 00202], lr: 0.000369, loss: 4.8226 +2023-11-23 02:57:51 - train: epoch 0023, iter [00030, 00202], lr: 0.000369, loss: 4.8048 +2023-11-23 02:59:41 - train: epoch 0023, iter [00040, 00202], lr: 0.000369, loss: 4.0462 +2023-11-23 03:01:31 - train: epoch 0023, iter [00050, 00202], lr: 0.000368, loss: 4.9337 +2023-11-23 03:03:21 - train: epoch 0023, iter [00060, 00202], lr: 0.000368, loss: 4.0046 +2023-11-23 03:05:10 - train: epoch 0023, iter [00070, 00202], lr: 0.000368, loss: 4.3241 +2023-11-23 03:07:00 - train: epoch 0023, iter [00080, 00202], lr: 0.000368, loss: 4.1298 +2023-11-23 03:08:50 - train: epoch 0023, iter [00090, 00202], lr: 0.000368, loss: 4.3649 +2023-11-23 03:10:40 - train: epoch 0023, iter [00100, 00202], lr: 0.000368, loss: 3.5455 +2023-11-23 03:12:30 - train: epoch 0023, iter [00110, 00202], lr: 0.000367, loss: 3.8798 +2023-11-23 03:14:20 - train: epoch 0023, iter [00120, 00202], lr: 0.000367, loss: 4.2447 +2023-11-23 03:16:10 - train: epoch 0023, iter [00130, 00202], lr: 0.000367, loss: 4.8211 +2023-11-23 03:18:00 - train: epoch 0023, iter [00140, 00202], lr: 0.000367, loss: 4.3882 +2023-11-23 03:19:49 - train: epoch 0023, iter [00150, 00202], lr: 0.000367, loss: 4.9402 +2023-11-23 03:21:39 - train: epoch 0023, iter [00160, 00202], lr: 0.000366, loss: 4.0417 +2023-11-23 03:23:30 - train: epoch 0023, iter [00170, 00202], lr: 0.000366, loss: 4.8773 +2023-11-23 03:25:20 - train: epoch 0023, iter [00180, 00202], lr: 0.000366, loss: 4.5586 +2023-11-23 03:27:10 - train: epoch 0023, iter [00190, 00202], lr: 0.000366, loss: 4.9256 +2023-11-23 03:28:59 - train: epoch 0023, iter [00200, 00202], lr: 0.000366, loss: 4.9823 +2023-11-23 03:29:28 - train: epoch 023, train_loss: 4.5475 +2023-11-23 04:02:00 - eval: epoch: 023, acc1: 74.361%, acc5: 89.960%, test_loss: 1.2606, per_image_load_time: 0.264ms, per_image_inference_time: 3.842ms +2023-11-23 04:02:04 - until epoch: 023, best_acc1: 74.361% +2023-11-23 04:02:04 - epoch 024 lr: 0.000366 +2023-11-23 04:03:58 - train: epoch 0024, iter [00010, 00202], lr: 0.000366, loss: 4.2771 +2023-11-23 04:05:48 - train: epoch 0024, iter [00020, 00202], lr: 0.000365, loss: 4.6225 +2023-11-23 04:07:38 - train: epoch 0024, iter [00030, 00202], lr: 0.000365, loss: 3.6579 +2023-11-23 04:09:28 - train: epoch 0024, iter [00040, 00202], lr: 0.000365, loss: 4.6029 +2023-11-23 04:11:18 - train: epoch 0024, iter [00050, 00202], lr: 0.000365, loss: 3.7838 +2023-11-23 04:13:07 - train: epoch 0024, iter [00060, 00202], lr: 0.000365, loss: 4.6955 +2023-11-23 04:14:58 - train: epoch 0024, iter [00070, 00202], lr: 0.000364, loss: 4.2876 +2023-11-23 04:16:48 - train: epoch 0024, iter [00080, 00202], lr: 0.000364, loss: 4.2431 +2023-11-23 04:18:38 - train: epoch 0024, iter [00090, 00202], lr: 0.000364, loss: 4.1516 +2023-11-23 04:20:29 - train: epoch 0024, iter [00100, 00202], lr: 0.000364, loss: 4.0249 +2023-11-23 04:22:19 - train: epoch 0024, iter [00110, 00202], lr: 0.000364, loss: 5.3371 +2023-11-23 04:24:09 - train: epoch 0024, iter [00120, 00202], lr: 0.000363, loss: 4.8107 +2023-11-23 04:26:00 - train: epoch 0024, iter [00130, 00202], lr: 0.000363, loss: 3.9937 +2023-11-23 04:27:51 - train: epoch 0024, iter [00140, 00202], lr: 0.000363, loss: 4.3841 +2023-11-23 04:29:43 - train: epoch 0024, iter [00150, 00202], lr: 0.000363, loss: 4.5197 +2023-11-23 04:31:35 - train: epoch 0024, iter [00160, 00202], lr: 0.000363, loss: 3.8946 +2023-11-23 04:33:27 - train: epoch 0024, iter [00170, 00202], lr: 0.000363, loss: 3.8266 +2023-11-23 04:35:20 - train: epoch 0024, iter [00180, 00202], lr: 0.000362, loss: 4.5511 +2023-11-23 04:37:13 - train: epoch 0024, iter [00190, 00202], lr: 0.000362, loss: 4.1910 +2023-11-23 04:39:05 - train: epoch 0024, iter [00200, 00202], lr: 0.000362, loss: 4.8734 +2023-11-23 04:39:35 - train: epoch 024, train_loss: 4.4322 +2023-11-23 05:13:40 - eval: epoch: 024, acc1: 74.917%, acc5: 90.466%, test_loss: 1.2124, per_image_load_time: 0.238ms, per_image_inference_time: 3.841ms +2023-11-23 05:13:44 - until epoch: 024, best_acc1: 74.917% +2023-11-23 05:13:44 - epoch 025 lr: 0.000362 +2023-11-23 05:15:39 - train: epoch 0025, iter [00010, 00202], lr: 0.000362, loss: 4.3603 +2023-11-23 05:17:29 - train: epoch 0025, iter [00020, 00202], lr: 0.000362, loss: 4.3301 +2023-11-23 05:19:20 - train: epoch 0025, iter [00030, 00202], lr: 0.000361, loss: 5.0883 +2023-11-23 05:21:10 - train: epoch 0025, iter [00040, 00202], lr: 0.000361, loss: 4.8601 +2023-11-23 05:22:59 - train: epoch 0025, iter [00050, 00202], lr: 0.000361, loss: 5.1335 +2023-11-23 05:24:49 - train: epoch 0025, iter [00060, 00202], lr: 0.000361, loss: 4.9011 +2023-11-23 05:26:39 - train: epoch 0025, iter [00070, 00202], lr: 0.000361, loss: 4.7655 +2023-11-23 05:28:29 - train: epoch 0025, iter [00080, 00202], lr: 0.000360, loss: 4.2779 +2023-11-23 05:30:20 - train: epoch 0025, iter [00090, 00202], lr: 0.000360, loss: 3.9170 +2023-11-23 05:32:10 - train: epoch 0025, iter [00100, 00202], lr: 0.000360, loss: 4.5198 +2023-11-23 05:34:00 - train: epoch 0025, iter [00110, 00202], lr: 0.000360, loss: 4.4051 +2023-11-23 05:35:50 - train: epoch 0025, iter [00120, 00202], lr: 0.000360, loss: 4.6274 +2023-11-23 05:37:39 - train: epoch 0025, iter [00130, 00202], lr: 0.000359, loss: 4.1948 +2023-11-23 05:39:29 - train: epoch 0025, iter [00140, 00202], lr: 0.000359, loss: 3.6050 +2023-11-23 05:41:20 - train: epoch 0025, iter [00150, 00202], lr: 0.000359, loss: 4.3045 +2023-11-23 05:43:10 - train: epoch 0025, iter [00160, 00202], lr: 0.000359, loss: 4.5020 +2023-11-23 05:45:00 - train: epoch 0025, iter [00170, 00202], lr: 0.000359, loss: 4.4772 +2023-11-23 05:46:50 - train: epoch 0025, iter [00180, 00202], lr: 0.000358, loss: 4.7231 +2023-11-23 05:48:39 - train: epoch 0025, iter [00190, 00202], lr: 0.000358, loss: 4.1962 +2023-11-23 05:50:28 - train: epoch 0025, iter [00200, 00202], lr: 0.000358, loss: 5.0723 +2023-11-23 05:50:57 - train: epoch 025, train_loss: 4.4424 +2023-11-23 06:23:22 - eval: epoch: 025, acc1: 75.519%, acc5: 90.902%, test_loss: 1.1867, per_image_load_time: 0.251ms, per_image_inference_time: 3.843ms +2023-11-23 06:23:26 - until epoch: 025, best_acc1: 75.519% +2023-11-23 06:23:26 - epoch 026 lr: 0.000358 +2023-11-23 06:25:19 - train: epoch 0026, iter [00010, 00202], lr: 0.000358, loss: 4.5337 +2023-11-23 06:27:08 - train: epoch 0026, iter [00020, 00202], lr: 0.000358, loss: 4.8026 +2023-11-23 06:28:57 - train: epoch 0026, iter [00030, 00202], lr: 0.000357, loss: 4.2272 +2023-11-23 06:30:46 - train: epoch 0026, iter [00040, 00202], lr: 0.000357, loss: 4.8397 +2023-11-23 06:32:35 - train: epoch 0026, iter [00050, 00202], lr: 0.000357, loss: 4.2269 +2023-11-23 06:34:25 - train: epoch 0026, iter [00060, 00202], lr: 0.000357, loss: 3.7845 +2023-11-23 06:36:15 - train: epoch 0026, iter [00070, 00202], lr: 0.000357, loss: 4.5395 +2023-11-23 06:38:04 - train: epoch 0026, iter [00080, 00202], lr: 0.000356, loss: 4.3822 +2023-11-23 06:39:53 - train: epoch 0026, iter [00090, 00202], lr: 0.000356, loss: 4.2208 +2023-11-23 06:41:42 - train: epoch 0026, iter [00100, 00202], lr: 0.000356, loss: 4.4012 +2023-11-23 06:43:32 - train: epoch 0026, iter [00110, 00202], lr: 0.000356, loss: 4.3154 +2023-11-23 06:45:22 - train: epoch 0026, iter [00120, 00202], lr: 0.000356, loss: 3.8768 +2023-11-23 06:47:12 - train: epoch 0026, iter [00130, 00202], lr: 0.000355, loss: 4.8435 +2023-11-23 06:49:01 - train: epoch 0026, iter [00140, 00202], lr: 0.000355, loss: 3.0162 +2023-11-23 06:50:50 - train: epoch 0026, iter [00150, 00202], lr: 0.000355, loss: 3.8218 +2023-11-23 06:52:40 - train: epoch 0026, iter [00160, 00202], lr: 0.000355, loss: 5.1443 +2023-11-23 06:54:29 - train: epoch 0026, iter [00170, 00202], lr: 0.000354, loss: 4.5830 +2023-11-23 06:56:19 - train: epoch 0026, iter [00180, 00202], lr: 0.000354, loss: 4.3828 +2023-11-23 06:58:09 - train: epoch 0026, iter [00190, 00202], lr: 0.000354, loss: 3.9823 +2023-11-23 06:59:58 - train: epoch 0026, iter [00200, 00202], lr: 0.000354, loss: 4.5483 +2023-11-23 07:00:28 - train: epoch 026, train_loss: 4.3796 +2023-11-23 07:32:41 - eval: epoch: 026, acc1: 76.092%, acc5: 91.410%, test_loss: 1.1555, per_image_load_time: 0.265ms, per_image_inference_time: 3.844ms +2023-11-23 07:32:45 - until epoch: 026, best_acc1: 76.092% +2023-11-23 07:32:45 - epoch 027 lr: 0.000354 +2023-11-23 07:34:38 - train: epoch 0027, iter [00010, 00202], lr: 0.000354, loss: 4.2024 +2023-11-23 07:36:28 - train: epoch 0027, iter [00020, 00202], lr: 0.000353, loss: 3.8304 +2023-11-23 07:38:17 - train: epoch 0027, iter [00030, 00202], lr: 0.000353, loss: 4.3948 +2023-11-23 07:40:07 - train: epoch 0027, iter [00040, 00202], lr: 0.000353, loss: 5.0418 +2023-11-23 07:41:56 - train: epoch 0027, iter [00050, 00202], lr: 0.000353, loss: 4.1339 +2023-11-23 07:43:45 - train: epoch 0027, iter [00060, 00202], lr: 0.000353, loss: 4.1481 +2023-11-23 07:45:35 - train: epoch 0027, iter [00070, 00202], lr: 0.000352, loss: 3.4623 +2023-11-23 07:47:25 - train: epoch 0027, iter [00080, 00202], lr: 0.000352, loss: 4.4207 +2023-11-23 07:49:14 - train: epoch 0027, iter [00090, 00202], lr: 0.000352, loss: 4.8965 +2023-11-23 07:51:03 - train: epoch 0027, iter [00100, 00202], lr: 0.000352, loss: 4.2912 +2023-11-23 07:52:52 - train: epoch 0027, iter [00110, 00202], lr: 0.000351, loss: 5.0749 +2023-11-23 07:54:42 - train: epoch 0027, iter [00120, 00202], lr: 0.000351, loss: 4.8563 +2023-11-23 07:56:31 - train: epoch 0027, iter [00130, 00202], lr: 0.000351, loss: 3.8003 +2023-11-23 07:58:21 - train: epoch 0027, iter [00140, 00202], lr: 0.000351, loss: 4.3823 +2023-11-23 08:00:10 - train: epoch 0027, iter [00150, 00202], lr: 0.000351, loss: 4.6835 +2023-11-23 08:02:00 - train: epoch 0027, iter [00160, 00202], lr: 0.000350, loss: 5.2409 +2023-11-23 08:03:49 - train: epoch 0027, iter [00170, 00202], lr: 0.000350, loss: 4.9988 +2023-11-23 08:05:38 - train: epoch 0027, iter [00180, 00202], lr: 0.000350, loss: 5.2662 +2023-11-23 08:07:27 - train: epoch 0027, iter [00190, 00202], lr: 0.000350, loss: 4.5722 +2023-11-23 08:09:16 - train: epoch 0027, iter [00200, 00202], lr: 0.000350, loss: 4.6273 +2023-11-23 08:09:45 - train: epoch 027, train_loss: 4.4318 +2023-11-23 08:41:59 - eval: epoch: 027, acc1: 76.624%, acc5: 91.837%, test_loss: 1.1040, per_image_load_time: 0.257ms, per_image_inference_time: 3.841ms +2023-11-23 08:42:03 - until epoch: 027, best_acc1: 76.624% +2023-11-23 08:42:03 - epoch 028 lr: 0.000350 +2023-11-23 08:43:56 - train: epoch 0028, iter [00010, 00202], lr: 0.000349, loss: 3.1009 +2023-11-23 08:45:46 - train: epoch 0028, iter [00020, 00202], lr: 0.000349, loss: 4.7948 +2023-11-23 08:47:36 - train: epoch 0028, iter [00030, 00202], lr: 0.000349, loss: 3.9493 +2023-11-23 08:49:25 - train: epoch 0028, iter [00040, 00202], lr: 0.000349, loss: 4.4175 +2023-11-23 08:51:15 - train: epoch 0028, iter [00050, 00202], lr: 0.000348, loss: 5.0352 +2023-11-23 08:53:04 - train: epoch 0028, iter [00060, 00202], lr: 0.000348, loss: 4.7267 +2023-11-23 08:54:54 - train: epoch 0028, iter [00070, 00202], lr: 0.000348, loss: 3.5476 +2023-11-23 08:56:43 - train: epoch 0028, iter [00080, 00202], lr: 0.000348, loss: 4.1358 +2023-11-23 08:58:33 - train: epoch 0028, iter [00090, 00202], lr: 0.000348, loss: 4.4629 +2023-11-23 09:00:22 - train: epoch 0028, iter [00100, 00202], lr: 0.000347, loss: 5.2368 +2023-11-23 09:02:12 - train: epoch 0028, iter [00110, 00202], lr: 0.000347, loss: 5.0497 +2023-11-23 09:04:02 - train: epoch 0028, iter [00120, 00202], lr: 0.000347, loss: 4.9968 +2023-11-23 09:05:51 - train: epoch 0028, iter [00130, 00202], lr: 0.000347, loss: 4.4663 +2023-11-23 09:07:41 - train: epoch 0028, iter [00140, 00202], lr: 0.000346, loss: 4.0678 +2023-11-23 09:09:31 - train: epoch 0028, iter [00150, 00202], lr: 0.000346, loss: 4.6968 +2023-11-23 09:11:22 - train: epoch 0028, iter [00160, 00202], lr: 0.000346, loss: 4.8194 +2023-11-23 09:13:15 - train: epoch 0028, iter [00170, 00202], lr: 0.000346, loss: 4.3955 +2023-11-23 09:15:06 - train: epoch 0028, iter [00180, 00202], lr: 0.000346, loss: 4.6581 +2023-11-23 09:16:57 - train: epoch 0028, iter [00190, 00202], lr: 0.000345, loss: 4.5771 +2023-11-23 09:18:48 - train: epoch 0028, iter [00200, 00202], lr: 0.000345, loss: 4.5054 +2023-11-23 09:19:17 - train: epoch 028, train_loss: 4.3901 +2023-11-23 09:52:07 - eval: epoch: 028, acc1: 77.209%, acc5: 92.294%, test_loss: 1.0939, per_image_load_time: 0.239ms, per_image_inference_time: 3.841ms +2023-11-23 09:52:11 - until epoch: 028, best_acc1: 77.209% +2023-11-23 09:52:11 - epoch 029 lr: 0.000345 +2023-11-23 09:54:06 - train: epoch 0029, iter [00010, 00202], lr: 0.000345, loss: 5.0974 +2023-11-23 09:55:57 - train: epoch 0029, iter [00020, 00202], lr: 0.000345, loss: 4.7845 +2023-11-23 09:57:47 - train: epoch 0029, iter [00030, 00202], lr: 0.000344, loss: 3.4396 +2023-11-23 09:59:38 - train: epoch 0029, iter [00040, 00202], lr: 0.000344, loss: 4.9072 +2023-11-23 10:01:28 - train: epoch 0029, iter [00050, 00202], lr: 0.000344, loss: 4.6107 +2023-11-23 10:03:18 - train: epoch 0029, iter [00060, 00202], lr: 0.000344, loss: 5.0741 +2023-11-23 10:05:08 - train: epoch 0029, iter [00070, 00202], lr: 0.000343, loss: 4.2334 +2023-11-23 10:07:00 - train: epoch 0029, iter [00080, 00202], lr: 0.000343, loss: 4.0859 +2023-11-23 10:08:51 - train: epoch 0029, iter [00090, 00202], lr: 0.000343, loss: 3.9802 +2023-11-23 10:10:42 - train: epoch 0029, iter [00100, 00202], lr: 0.000343, loss: 3.5032 +2023-11-23 10:12:33 - train: epoch 0029, iter [00110, 00202], lr: 0.000343, loss: 4.6029 +2023-11-23 10:14:24 - train: epoch 0029, iter [00120, 00202], lr: 0.000342, loss: 4.6448 +2023-11-23 10:16:13 - train: epoch 0029, iter [00130, 00202], lr: 0.000342, loss: 4.3541 +2023-11-23 10:18:04 - train: epoch 0029, iter [00140, 00202], lr: 0.000342, loss: 4.7566 +2023-11-23 10:19:54 - train: epoch 0029, iter [00150, 00202], lr: 0.000342, loss: 4.1239 +2023-11-23 10:21:42 - train: epoch 0029, iter [00160, 00202], lr: 0.000341, loss: 4.6268 +2023-11-23 10:23:31 - train: epoch 0029, iter [00170, 00202], lr: 0.000341, loss: 3.9257 +2023-11-23 10:25:20 - train: epoch 0029, iter [00180, 00202], lr: 0.000341, loss: 4.7252 +2023-11-23 10:27:08 - train: epoch 0029, iter [00190, 00202], lr: 0.000341, loss: 4.5378 +2023-11-23 10:28:56 - train: epoch 0029, iter [00200, 00202], lr: 0.000340, loss: 4.4123 +2023-11-23 10:29:25 - train: epoch 029, train_loss: 4.3759 +2023-11-23 11:02:24 - eval: epoch: 029, acc1: 77.715%, acc5: 92.718%, test_loss: 1.0499, per_image_load_time: 0.235ms, per_image_inference_time: 3.840ms +2023-11-23 11:02:28 - until epoch: 029, best_acc1: 77.715% +2023-11-23 11:02:28 - epoch 030 lr: 0.000340 +2023-11-23 11:04:23 - train: epoch 0030, iter [00010, 00202], lr: 0.000340, loss: 4.3495 +2023-11-23 11:06:14 - train: epoch 0030, iter [00020, 00202], lr: 0.000340, loss: 3.2490 +2023-11-23 11:08:06 - train: epoch 0030, iter [00030, 00202], lr: 0.000340, loss: 4.6745 +2023-11-23 11:09:57 - train: epoch 0030, iter [00040, 00202], lr: 0.000339, loss: 4.4848 +2023-11-23 11:11:49 - train: epoch 0030, iter [00050, 00202], lr: 0.000339, loss: 4.5961 +2023-11-23 11:13:40 - train: epoch 0030, iter [00060, 00202], lr: 0.000339, loss: 3.9764 +2023-11-23 11:15:30 - train: epoch 0030, iter [00070, 00202], lr: 0.000339, loss: 4.8506 +2023-11-23 11:17:22 - train: epoch 0030, iter [00080, 00202], lr: 0.000339, loss: 3.9076 +2023-11-23 11:19:13 - train: epoch 0030, iter [00090, 00202], lr: 0.000338, loss: 4.2301 +2023-11-23 11:21:05 - train: epoch 0030, iter [00100, 00202], lr: 0.000338, loss: 4.0884 +2023-11-23 11:22:56 - train: epoch 0030, iter [00110, 00202], lr: 0.000338, loss: 4.8416 +2023-11-23 11:24:48 - train: epoch 0030, iter [00120, 00202], lr: 0.000338, loss: 5.0163 +2023-11-23 11:26:39 - train: epoch 0030, iter [00130, 00202], lr: 0.000337, loss: 4.6390 +2023-11-23 11:28:30 - train: epoch 0030, iter [00140, 00202], lr: 0.000337, loss: 3.8370 +2023-11-23 11:30:21 - train: epoch 0030, iter [00150, 00202], lr: 0.000337, loss: 4.4102 +2023-11-23 11:32:13 - train: epoch 0030, iter [00160, 00202], lr: 0.000337, loss: 5.4379 +2023-11-23 11:34:05 - train: epoch 0030, iter [00170, 00202], lr: 0.000336, loss: 4.5507 +2023-11-23 11:35:58 - train: epoch 0030, iter [00180, 00202], lr: 0.000336, loss: 3.1451 +2023-11-23 11:37:49 - train: epoch 0030, iter [00190, 00202], lr: 0.000336, loss: 3.7142 +2023-11-23 11:39:41 - train: epoch 0030, iter [00200, 00202], lr: 0.000336, loss: 4.9758 +2023-11-23 11:40:11 - train: epoch 030, train_loss: 4.3572 +2023-11-23 12:13:50 - eval: epoch: 030, acc1: 78.239%, acc5: 93.114%, test_loss: 1.0225, per_image_load_time: 0.225ms, per_image_inference_time: 3.838ms +2023-11-23 12:13:54 - until epoch: 030, best_acc1: 78.239% +2023-11-23 12:13:54 - epoch 031 lr: 0.000336 +2023-11-23 12:15:51 - train: epoch 0031, iter [00010, 00202], lr: 0.000335, loss: 4.6776 +2023-11-23 12:17:43 - train: epoch 0031, iter [00020, 00202], lr: 0.000335, loss: 4.9682 +2023-11-23 12:19:35 - train: epoch 0031, iter [00030, 00202], lr: 0.000335, loss: 4.1499 +2023-11-23 12:21:27 - train: epoch 0031, iter [00040, 00202], lr: 0.000335, loss: 4.6858 +2023-11-23 12:23:19 - train: epoch 0031, iter [00050, 00202], lr: 0.000334, loss: 4.6044 +2023-11-23 12:25:11 - train: epoch 0031, iter [00060, 00202], lr: 0.000334, loss: 4.6502 +2023-11-23 12:27:03 - train: epoch 0031, iter [00070, 00202], lr: 0.000334, loss: 4.6297 +2023-11-23 12:28:56 - train: epoch 0031, iter [00080, 00202], lr: 0.000334, loss: 4.7005 +2023-11-23 12:30:49 - train: epoch 0031, iter [00090, 00202], lr: 0.000333, loss: 4.6123 +2023-11-23 12:32:41 - train: epoch 0031, iter [00100, 00202], lr: 0.000333, loss: 4.5985 +2023-11-23 12:34:34 - train: epoch 0031, iter [00110, 00202], lr: 0.000333, loss: 4.7057 +2023-11-23 12:36:25 - train: epoch 0031, iter [00120, 00202], lr: 0.000333, loss: 4.7182 +2023-11-23 12:38:18 - train: epoch 0031, iter [00130, 00202], lr: 0.000332, loss: 3.7572 +2023-11-23 12:40:10 - train: epoch 0031, iter [00140, 00202], lr: 0.000332, loss: 4.0500 +2023-11-23 12:42:02 - train: epoch 0031, iter [00150, 00202], lr: 0.000332, loss: 2.8729 +2023-11-23 12:43:55 - train: epoch 0031, iter [00160, 00202], lr: 0.000332, loss: 4.8554 +2023-11-23 12:45:48 - train: epoch 0031, iter [00170, 00202], lr: 0.000331, loss: 4.0333 +2023-11-23 12:47:41 - train: epoch 0031, iter [00180, 00202], lr: 0.000331, loss: 4.3654 +2023-11-23 12:49:33 - train: epoch 0031, iter [00190, 00202], lr: 0.000331, loss: 3.9889 +2023-11-23 12:51:25 - train: epoch 0031, iter [00200, 00202], lr: 0.000331, loss: 4.1908 +2023-11-23 12:51:55 - train: epoch 031, train_loss: 4.2384 +2023-11-23 13:25:52 - eval: epoch: 031, acc1: 78.776%, acc5: 93.500%, test_loss: 0.9793, per_image_load_time: 0.220ms, per_image_inference_time: 3.839ms +2023-11-23 13:25:56 - until epoch: 031, best_acc1: 78.776% +2023-11-23 13:25:56 - epoch 032 lr: 0.000331 +2023-11-23 13:27:54 - train: epoch 0032, iter [00010, 00202], lr: 0.000330, loss: 3.1276 +2023-11-23 13:29:46 - train: epoch 0032, iter [00020, 00202], lr: 0.000330, loss: 4.7563 +2023-11-23 13:31:38 - train: epoch 0032, iter [00030, 00202], lr: 0.000330, loss: 4.0999 +2023-11-23 13:33:30 - train: epoch 0032, iter [00040, 00202], lr: 0.000330, loss: 3.9905 +2023-11-23 13:35:22 - train: epoch 0032, iter [00050, 00202], lr: 0.000329, loss: 4.1841 +2023-11-23 13:37:15 - train: epoch 0032, iter [00060, 00202], lr: 0.000329, loss: 4.9117 +2023-11-23 13:39:08 - train: epoch 0032, iter [00070, 00202], lr: 0.000329, loss: 4.4131 +2023-11-23 13:41:01 - train: epoch 0032, iter [00080, 00202], lr: 0.000329, loss: 4.3871 +2023-11-23 13:42:53 - train: epoch 0032, iter [00090, 00202], lr: 0.000328, loss: 4.5691 +2023-11-23 13:44:45 - train: epoch 0032, iter [00100, 00202], lr: 0.000328, loss: 3.8520 +2023-11-23 13:46:38 - train: epoch 0032, iter [00110, 00202], lr: 0.000328, loss: 4.7205 +2023-11-23 13:48:30 - train: epoch 0032, iter [00120, 00202], lr: 0.000328, loss: 4.8186 +2023-11-23 13:50:22 - train: epoch 0032, iter [00130, 00202], lr: 0.000327, loss: 3.7528 +2023-11-23 13:52:14 - train: epoch 0032, iter [00140, 00202], lr: 0.000327, loss: 4.4780 +2023-11-23 13:54:06 - train: epoch 0032, iter [00150, 00202], lr: 0.000327, loss: 3.2666 +2023-11-23 13:55:59 - train: epoch 0032, iter [00160, 00202], lr: 0.000327, loss: 4.4403 +2023-11-23 13:57:51 - train: epoch 0032, iter [00170, 00202], lr: 0.000326, loss: 3.8864 +2023-11-23 13:59:41 - train: epoch 0032, iter [00180, 00202], lr: 0.000326, loss: 4.8881 +2023-11-23 14:01:30 - train: epoch 0032, iter [00190, 00202], lr: 0.000326, loss: 4.6833 +2023-11-23 14:03:20 - train: epoch 0032, iter [00200, 00202], lr: 0.000326, loss: 5.0243 +2023-11-23 14:03:49 - train: epoch 032, train_loss: 4.2680 +2023-11-23 14:37:25 - eval: epoch: 032, acc1: 79.275%, acc5: 93.877%, test_loss: 0.9805, per_image_load_time: 0.230ms, per_image_inference_time: 3.841ms +2023-11-23 14:37:29 - until epoch: 032, best_acc1: 79.275% +2023-11-23 14:37:29 - epoch 033 lr: 0.000326 +2023-11-23 14:39:23 - train: epoch 0033, iter [00010, 00202], lr: 0.000325, loss: 3.7767 +2023-11-23 14:41:13 - train: epoch 0033, iter [00020, 00202], lr: 0.000325, loss: 3.5798 +2023-11-23 14:43:04 - train: epoch 0033, iter [00030, 00202], lr: 0.000325, loss: 4.2773 +2023-11-23 14:44:54 - train: epoch 0033, iter [00040, 00202], lr: 0.000325, loss: 4.5582 +2023-11-23 14:46:44 - train: epoch 0033, iter [00050, 00202], lr: 0.000324, loss: 2.5267 +2023-11-23 14:48:35 - train: epoch 0033, iter [00060, 00202], lr: 0.000324, loss: 4.1794 +2023-11-23 14:50:26 - train: epoch 0033, iter [00070, 00202], lr: 0.000324, loss: 4.3872 +2023-11-23 14:52:18 - train: epoch 0033, iter [00080, 00202], lr: 0.000324, loss: 4.5204 +2023-11-23 14:54:08 - train: epoch 0033, iter [00090, 00202], lr: 0.000323, loss: 4.1382 +2023-11-23 14:55:59 - train: epoch 0033, iter [00100, 00202], lr: 0.000323, loss: 3.1884 +2023-11-23 14:57:51 - train: epoch 0033, iter [00110, 00202], lr: 0.000323, loss: 4.4813 +2023-11-23 14:59:42 - train: epoch 0033, iter [00120, 00202], lr: 0.000323, loss: 3.1596 +2023-11-23 15:01:33 - train: epoch 0033, iter [00130, 00202], lr: 0.000322, loss: 4.2024 +2023-11-23 15:03:24 - train: epoch 0033, iter [00140, 00202], lr: 0.000322, loss: 3.6167 +2023-11-23 15:05:15 - train: epoch 0033, iter [00150, 00202], lr: 0.000322, loss: 5.0768 +2023-11-23 15:07:07 - train: epoch 0033, iter [00160, 00202], lr: 0.000322, loss: 3.8097 +2023-11-23 15:08:59 - train: epoch 0033, iter [00170, 00202], lr: 0.000321, loss: 4.8404 +2023-11-23 15:10:50 - train: epoch 0033, iter [00180, 00202], lr: 0.000321, loss: 3.6937 +2023-11-23 15:12:42 - train: epoch 0033, iter [00190, 00202], lr: 0.000321, loss: 4.6377 +2023-11-23 15:14:33 - train: epoch 0033, iter [00200, 00202], lr: 0.000320, loss: 4.1159 +2023-11-23 15:15:02 - train: epoch 033, train_loss: 4.2726 +2023-11-23 15:48:47 - eval: epoch: 033, acc1: 79.791%, acc5: 94.213%, test_loss: 0.9334, per_image_load_time: 0.230ms, per_image_inference_time: 3.841ms +2023-11-23 15:48:51 - until epoch: 033, best_acc1: 79.791% +2023-11-23 15:48:51 - epoch 034 lr: 0.000320 +2023-11-23 15:50:48 - train: epoch 0034, iter [00010, 00202], lr: 0.000320, loss: 4.5547 +2023-11-23 15:52:39 - train: epoch 0034, iter [00020, 00202], lr: 0.000320, loss: 3.8692 +2023-11-23 15:54:30 - train: epoch 0034, iter [00030, 00202], lr: 0.000320, loss: 3.8610 +2023-11-23 15:56:22 - train: epoch 0034, iter [00040, 00202], lr: 0.000319, loss: 4.1815 +2023-11-23 15:58:13 - train: epoch 0034, iter [00050, 00202], lr: 0.000319, loss: 4.0347 +2023-11-23 16:00:05 - train: epoch 0034, iter [00060, 00202], lr: 0.000319, loss: 4.0736 +2023-11-23 16:01:56 - train: epoch 0034, iter [00070, 00202], lr: 0.000319, loss: 3.2192 +2023-11-23 16:03:47 - train: epoch 0034, iter [00080, 00202], lr: 0.000318, loss: 4.1188 +2023-11-23 16:05:38 - train: epoch 0034, iter [00090, 00202], lr: 0.000318, loss: 4.6246 +2023-11-23 16:07:30 - train: epoch 0034, iter [00100, 00202], lr: 0.000318, loss: 4.2458 +2023-11-23 16:09:22 - train: epoch 0034, iter [00110, 00202], lr: 0.000318, loss: 4.5249 +2023-11-23 16:11:14 - train: epoch 0034, iter [00120, 00202], lr: 0.000317, loss: 4.8449 +2023-11-23 16:13:05 - train: epoch 0034, iter [00130, 00202], lr: 0.000317, loss: 4.4806 +2023-11-23 16:14:57 - train: epoch 0034, iter [00140, 00202], lr: 0.000317, loss: 4.4488 +2023-11-23 16:16:48 - train: epoch 0034, iter [00150, 00202], lr: 0.000316, loss: 4.6702 +2023-11-23 16:18:39 - train: epoch 0034, iter [00160, 00202], lr: 0.000316, loss: 4.2731 +2023-11-23 16:20:31 - train: epoch 0034, iter [00170, 00202], lr: 0.000316, loss: 2.9809 +2023-11-23 16:22:23 - train: epoch 0034, iter [00180, 00202], lr: 0.000316, loss: 3.8319 +2023-11-23 16:24:14 - train: epoch 0034, iter [00190, 00202], lr: 0.000315, loss: 4.3713 +2023-11-23 16:26:05 - train: epoch 0034, iter [00200, 00202], lr: 0.000315, loss: 4.5279 +2023-11-23 16:26:34 - train: epoch 034, train_loss: 4.1973 +2023-11-23 17:00:34 - eval: epoch: 034, acc1: 80.264%, acc5: 94.551%, test_loss: 0.9260, per_image_load_time: 0.215ms, per_image_inference_time: 3.840ms +2023-11-23 17:00:38 - until epoch: 034, best_acc1: 80.264% +2023-11-23 17:00:38 - epoch 035 lr: 0.000315 +2023-11-23 17:02:31 - train: epoch 0035, iter [00010, 00202], lr: 0.000315, loss: 4.3228 +2023-11-23 17:04:21 - train: epoch 0035, iter [00020, 00202], lr: 0.000315, loss: 3.9905 +2023-11-23 17:06:10 - train: epoch 0035, iter [00030, 00202], lr: 0.000314, loss: 3.8709 +2023-11-23 17:08:00 - train: epoch 0035, iter [00040, 00202], lr: 0.000314, loss: 4.0487 +2023-11-23 17:09:52 - train: epoch 0035, iter [00050, 00202], lr: 0.000314, loss: 4.3395 +2023-11-23 17:11:46 - train: epoch 0035, iter [00060, 00202], lr: 0.000313, loss: 4.1219 +2023-11-23 17:13:40 - train: epoch 0035, iter [00070, 00202], lr: 0.000313, loss: 4.2516 +2023-11-23 17:15:33 - train: epoch 0035, iter [00080, 00202], lr: 0.000313, loss: 4.8673 +2023-11-23 17:17:25 - train: epoch 0035, iter [00090, 00202], lr: 0.000313, loss: 4.1555 +2023-11-23 17:19:18 - train: epoch 0035, iter [00100, 00202], lr: 0.000312, loss: 4.6449 +2023-11-23 17:21:10 - train: epoch 0035, iter [00110, 00202], lr: 0.000312, loss: 4.0665 +2023-11-23 17:23:02 - train: epoch 0035, iter [00120, 00202], lr: 0.000312, loss: 4.5608 +2023-11-23 17:24:54 - train: epoch 0035, iter [00130, 00202], lr: 0.000312, loss: 3.3217 +2023-11-23 17:26:46 - train: epoch 0035, iter [00140, 00202], lr: 0.000311, loss: 4.5838 +2023-11-23 17:28:38 - train: epoch 0035, iter [00150, 00202], lr: 0.000311, loss: 4.7598 +2023-11-23 17:30:31 - train: epoch 0035, iter [00160, 00202], lr: 0.000311, loss: 4.2578 +2023-11-23 17:32:24 - train: epoch 0035, iter [00170, 00202], lr: 0.000311, loss: 4.3317 +2023-11-23 17:34:16 - train: epoch 0035, iter [00180, 00202], lr: 0.000310, loss: 4.1132 +2023-11-23 17:36:08 - train: epoch 0035, iter [00190, 00202], lr: 0.000310, loss: 4.5898 +2023-11-23 17:38:00 - train: epoch 0035, iter [00200, 00202], lr: 0.000310, loss: 4.5029 +2023-11-23 17:38:30 - train: epoch 035, train_loss: 4.2031 +2023-11-23 18:13:08 - eval: epoch: 035, acc1: 80.707%, acc5: 94.867%, test_loss: 0.8832, per_image_load_time: 0.196ms, per_image_inference_time: 3.839ms +2023-11-23 18:13:12 - until epoch: 035, best_acc1: 80.707% +2023-11-23 18:13:12 - epoch 036 lr: 0.000310 +2023-11-23 18:15:07 - train: epoch 0036, iter [00010, 00202], lr: 0.000309, loss: 4.6571 +2023-11-23 18:16:58 - train: epoch 0036, iter [00020, 00202], lr: 0.000309, loss: 4.0628 +2023-11-23 18:18:48 - train: epoch 0036, iter [00030, 00202], lr: 0.000309, loss: 4.1868 +2023-11-23 18:20:40 - train: epoch 0036, iter [00040, 00202], lr: 0.000309, loss: 4.0192 +2023-11-23 18:22:32 - train: epoch 0036, iter [00050, 00202], lr: 0.000308, loss: 4.3831 +2023-11-23 18:24:24 - train: epoch 0036, iter [00060, 00202], lr: 0.000308, loss: 4.6827 +2023-11-23 18:26:17 - train: epoch 0036, iter [00070, 00202], lr: 0.000308, loss: 4.0600 +2023-11-23 18:28:10 - train: epoch 0036, iter [00080, 00202], lr: 0.000307, loss: 4.8047 +2023-11-23 18:30:02 - train: epoch 0036, iter [00090, 00202], lr: 0.000307, loss: 3.8618 +2023-11-23 18:31:54 - train: epoch 0036, iter [00100, 00202], lr: 0.000307, loss: 3.9654 +2023-11-23 18:33:46 - train: epoch 0036, iter [00110, 00202], lr: 0.000307, loss: 4.3248 +2023-11-23 18:35:39 - train: epoch 0036, iter [00120, 00202], lr: 0.000306, loss: 3.8751 +2023-11-23 18:37:32 - train: epoch 0036, iter [00130, 00202], lr: 0.000306, loss: 4.8349 +2023-11-23 18:39:25 - train: epoch 0036, iter [00140, 00202], lr: 0.000306, loss: 3.4857 +2023-11-23 18:41:18 - train: epoch 0036, iter [00150, 00202], lr: 0.000305, loss: 5.0098 +2023-11-23 18:43:11 - train: epoch 0036, iter [00160, 00202], lr: 0.000305, loss: 4.6630 +2023-11-23 18:45:03 - train: epoch 0036, iter [00170, 00202], lr: 0.000305, loss: 4.2429 +2023-11-23 18:46:55 - train: epoch 0036, iter [00180, 00202], lr: 0.000305, loss: 4.2766 +2023-11-23 18:48:47 - train: epoch 0036, iter [00190, 00202], lr: 0.000304, loss: 3.6444 +2023-11-23 18:50:40 - train: epoch 0036, iter [00200, 00202], lr: 0.000304, loss: 4.7511 +2023-11-23 18:51:09 - train: epoch 036, train_loss: 4.1616 +2023-11-23 19:24:47 - eval: epoch: 036, acc1: 81.150%, acc5: 95.189%, test_loss: 0.8727, per_image_load_time: 0.222ms, per_image_inference_time: 3.841ms +2023-11-23 19:24:50 - until epoch: 036, best_acc1: 81.150% +2023-11-23 19:24:50 - epoch 037 lr: 0.000304 +2023-11-23 19:26:44 - train: epoch 0037, iter [00010, 00202], lr: 0.000304, loss: 3.6035 +2023-11-23 19:28:34 - train: epoch 0037, iter [00020, 00202], lr: 0.000303, loss: 3.9293 +2023-11-23 19:30:25 - train: epoch 0037, iter [00030, 00202], lr: 0.000303, loss: 4.1017 +2023-11-23 19:32:16 - train: epoch 0037, iter [00040, 00202], lr: 0.000303, loss: 3.9704 +2023-11-23 19:34:08 - train: epoch 0037, iter [00050, 00202], lr: 0.000303, loss: 4.7033 +2023-11-23 19:35:59 - train: epoch 0037, iter [00060, 00202], lr: 0.000302, loss: 3.9115 +2023-11-23 19:37:50 - train: epoch 0037, iter [00070, 00202], lr: 0.000302, loss: 3.9667 +2023-11-23 19:39:41 - train: epoch 0037, iter [00080, 00202], lr: 0.000302, loss: 4.7955 +2023-11-23 19:41:32 - train: epoch 0037, iter [00090, 00202], lr: 0.000302, loss: 4.5627 +2023-11-23 19:43:23 - train: epoch 0037, iter [00100, 00202], lr: 0.000301, loss: 4.5765 +2023-11-23 19:45:14 - train: epoch 0037, iter [00110, 00202], lr: 0.000301, loss: 4.1604 +2023-11-23 19:47:05 - train: epoch 0037, iter [00120, 00202], lr: 0.000301, loss: 4.9707 +2023-11-23 19:48:56 - train: epoch 0037, iter [00130, 00202], lr: 0.000300, loss: 4.3969 +2023-11-23 19:50:48 - train: epoch 0037, iter [00140, 00202], lr: 0.000300, loss: 3.5318 +2023-11-23 19:52:39 - train: epoch 0037, iter [00150, 00202], lr: 0.000300, loss: 3.8545 +2023-11-23 19:54:30 - train: epoch 0037, iter [00160, 00202], lr: 0.000300, loss: 3.2419 +2023-11-23 19:56:20 - train: epoch 0037, iter [00170, 00202], lr: 0.000299, loss: 4.2938 +2023-11-23 19:58:11 - train: epoch 0037, iter [00180, 00202], lr: 0.000299, loss: 4.4719 +2023-11-23 20:00:02 - train: epoch 0037, iter [00190, 00202], lr: 0.000299, loss: 3.5113 +2023-11-23 20:01:54 - train: epoch 0037, iter [00200, 00202], lr: 0.000298, loss: 4.1754 +2023-11-23 20:02:24 - train: epoch 037, train_loss: 4.0853 +2023-11-23 20:36:00 - eval: epoch: 037, acc1: 81.551%, acc5: 95.459%, test_loss: 0.8515, per_image_load_time: 0.240ms, per_image_inference_time: 3.841ms +2023-11-23 20:36:04 - until epoch: 037, best_acc1: 81.551% +2023-11-23 20:36:04 - epoch 038 lr: 0.000298 +2023-11-23 20:37:59 - train: epoch 0038, iter [00010, 00202], lr: 0.000298, loss: 4.7357 +2023-11-23 20:39:50 - train: epoch 0038, iter [00020, 00202], lr: 0.000298, loss: 4.6756 +2023-11-23 20:41:42 - train: epoch 0038, iter [00030, 00202], lr: 0.000297, loss: 4.0625 +2023-11-23 20:43:34 - train: epoch 0038, iter [00040, 00202], lr: 0.000297, loss: 4.0707 +2023-11-23 20:45:27 - train: epoch 0038, iter [00050, 00202], lr: 0.000297, loss: 3.3280 +2023-11-23 20:47:19 - train: epoch 0038, iter [00060, 00202], lr: 0.000297, loss: 4.8663 +2023-11-23 20:49:11 - train: epoch 0038, iter [00070, 00202], lr: 0.000296, loss: 3.7127 +2023-11-23 20:51:03 - train: epoch 0038, iter [00080, 00202], lr: 0.000296, loss: 4.1610 +2023-11-23 20:52:55 - train: epoch 0038, iter [00090, 00202], lr: 0.000296, loss: 4.0058 +2023-11-23 20:54:47 - train: epoch 0038, iter [00100, 00202], lr: 0.000295, loss: 3.0785 +2023-11-23 20:56:39 - train: epoch 0038, iter [00110, 00202], lr: 0.000295, loss: 4.1840 +2023-11-23 20:58:31 - train: epoch 0038, iter [00120, 00202], lr: 0.000295, loss: 4.6807 +2023-11-23 21:00:23 - train: epoch 0038, iter [00130, 00202], lr: 0.000295, loss: 3.8735 +2023-11-23 21:02:15 - train: epoch 0038, iter [00140, 00202], lr: 0.000294, loss: 3.9526 +2023-11-23 21:04:08 - train: epoch 0038, iter [00150, 00202], lr: 0.000294, loss: 4.7721 +2023-11-23 21:05:59 - train: epoch 0038, iter [00160, 00202], lr: 0.000294, loss: 4.7881 +2023-11-23 21:07:51 - train: epoch 0038, iter [00170, 00202], lr: 0.000293, loss: 4.1661 +2023-11-23 21:09:43 - train: epoch 0038, iter [00180, 00202], lr: 0.000293, loss: 4.2793 +2023-11-23 21:11:36 - train: epoch 0038, iter [00190, 00202], lr: 0.000293, loss: 4.3958 +2023-11-23 21:13:28 - train: epoch 0038, iter [00200, 00202], lr: 0.000293, loss: 4.9744 +2023-11-23 21:13:58 - train: epoch 038, train_loss: 4.1388 +2023-11-23 21:47:46 - eval: epoch: 038, acc1: 81.956%, acc5: 95.716%, test_loss: 0.8133, per_image_load_time: 0.230ms, per_image_inference_time: 3.841ms +2023-11-23 21:47:49 - until epoch: 038, best_acc1: 81.956% +2023-11-23 21:47:49 - epoch 039 lr: 0.000293 +2023-11-23 21:49:46 - train: epoch 0039, iter [00010, 00202], lr: 0.000292, loss: 3.8192 +2023-11-23 21:51:37 - train: epoch 0039, iter [00020, 00202], lr: 0.000292, loss: 4.5323 +2023-11-23 21:53:28 - train: epoch 0039, iter [00030, 00202], lr: 0.000292, loss: 4.2739 +2023-11-23 21:55:21 - train: epoch 0039, iter [00040, 00202], lr: 0.000291, loss: 4.1928 +2023-11-23 21:57:13 - train: epoch 0039, iter [00050, 00202], lr: 0.000291, loss: 4.3940 +2023-11-23 21:59:05 - train: epoch 0039, iter [00060, 00202], lr: 0.000291, loss: 3.9295 +2023-11-23 22:00:57 - train: epoch 0039, iter [00070, 00202], lr: 0.000291, loss: 4.5440 +2023-11-23 22:02:49 - train: epoch 0039, iter [00080, 00202], lr: 0.000290, loss: 4.5971 +2023-11-23 22:04:41 - train: epoch 0039, iter [00090, 00202], lr: 0.000290, loss: 4.3717 +2023-11-23 22:06:33 - train: epoch 0039, iter [00100, 00202], lr: 0.000290, loss: 4.5061 +2023-11-23 22:08:26 - train: epoch 0039, iter [00110, 00202], lr: 0.000289, loss: 3.7552 +2023-11-23 22:10:18 - train: epoch 0039, iter [00120, 00202], lr: 0.000289, loss: 4.5375 +2023-11-23 22:12:11 - train: epoch 0039, iter [00130, 00202], lr: 0.000289, loss: 4.1162 +2023-11-23 22:14:03 - train: epoch 0039, iter [00140, 00202], lr: 0.000288, loss: 4.6637 +2023-11-23 22:15:55 - train: epoch 0039, iter [00150, 00202], lr: 0.000288, loss: 4.2175 +2023-11-23 22:17:47 - train: epoch 0039, iter [00160, 00202], lr: 0.000288, loss: 3.6087 +2023-11-23 22:19:39 - train: epoch 0039, iter [00170, 00202], lr: 0.000288, loss: 3.8071 +2023-11-23 22:21:31 - train: epoch 0039, iter [00180, 00202], lr: 0.000287, loss: 4.0957 +2023-11-23 22:23:24 - train: epoch 0039, iter [00190, 00202], lr: 0.000287, loss: 3.8200 +2023-11-23 22:25:17 - train: epoch 0039, iter [00200, 00202], lr: 0.000287, loss: 4.7400 +2023-11-23 22:25:47 - train: epoch 039, train_loss: 4.1115 +2023-11-23 22:59:35 - eval: epoch: 039, acc1: 82.454%, acc5: 95.993%, test_loss: 0.7915, per_image_load_time: 0.234ms, per_image_inference_time: 3.838ms +2023-11-23 22:59:39 - until epoch: 039, best_acc1: 82.454% +2023-11-23 22:59:39 - epoch 040 lr: 0.000287 +2023-11-23 23:01:35 - train: epoch 0040, iter [00010, 00202], lr: 0.000286, loss: 3.0029 +2023-11-23 23:03:26 - train: epoch 0040, iter [00020, 00202], lr: 0.000286, loss: 4.6013 +2023-11-23 23:05:19 - train: epoch 0040, iter [00030, 00202], lr: 0.000286, loss: 4.0283 +2023-11-23 23:07:12 - train: epoch 0040, iter [00040, 00202], lr: 0.000285, loss: 4.1423 +2023-11-23 23:09:04 - train: epoch 0040, iter [00050, 00202], lr: 0.000285, loss: 4.3069 +2023-11-23 23:10:56 - train: epoch 0040, iter [00060, 00202], lr: 0.000285, loss: 4.4794 +2023-11-23 23:12:48 - train: epoch 0040, iter [00070, 00202], lr: 0.000285, loss: 4.4430 +2023-11-23 23:14:39 - train: epoch 0040, iter [00080, 00202], lr: 0.000284, loss: 4.1808 +2023-11-23 23:16:31 - train: epoch 0040, iter [00090, 00202], lr: 0.000284, loss: 4.2754 +2023-11-23 23:18:22 - train: epoch 0040, iter [00100, 00202], lr: 0.000284, loss: 3.8111 +2023-11-23 23:20:15 - train: epoch 0040, iter [00110, 00202], lr: 0.000283, loss: 4.5150 +2023-11-23 23:22:08 - train: epoch 0040, iter [00120, 00202], lr: 0.000283, loss: 3.2836 +2023-11-23 23:24:00 - train: epoch 0040, iter [00130, 00202], lr: 0.000283, loss: 4.4463 +2023-11-23 23:25:52 - train: epoch 0040, iter [00140, 00202], lr: 0.000283, loss: 3.5000 +2023-11-23 23:27:44 - train: epoch 0040, iter [00150, 00202], lr: 0.000282, loss: 4.1694 +2023-11-23 23:29:35 - train: epoch 0040, iter [00160, 00202], lr: 0.000282, loss: 2.9043 +2023-11-23 23:31:27 - train: epoch 0040, iter [00170, 00202], lr: 0.000282, loss: 4.4387 +2023-11-23 23:33:20 - train: epoch 0040, iter [00180, 00202], lr: 0.000281, loss: 3.8660 +2023-11-23 23:35:13 - train: epoch 0040, iter [00190, 00202], lr: 0.000281, loss: 4.5130 +2023-11-23 23:37:06 - train: epoch 0040, iter [00200, 00202], lr: 0.000281, loss: 4.4536 +2023-11-23 23:37:35 - train: epoch 040, train_loss: 4.0821 +2023-11-24 00:11:22 - eval: epoch: 040, acc1: 82.827%, acc5: 96.228%, test_loss: 0.7784, per_image_load_time: 0.238ms, per_image_inference_time: 3.838ms +2023-11-24 00:11:26 - until epoch: 040, best_acc1: 82.827% +2023-11-24 00:11:26 - epoch 041 lr: 0.000281 +2023-11-24 00:13:21 - train: epoch 0041, iter [00010, 00202], lr: 0.000280, loss: 4.0745 +2023-11-24 00:15:14 - train: epoch 0041, iter [00020, 00202], lr: 0.000280, loss: 4.1137 +2023-11-24 00:17:07 - train: epoch 0041, iter [00030, 00202], lr: 0.000280, loss: 4.1235 +2023-11-24 00:19:01 - train: epoch 0041, iter [00040, 00202], lr: 0.000279, loss: 3.6177 +2023-11-24 00:20:53 - train: epoch 0041, iter [00050, 00202], lr: 0.000279, loss: 4.5902 +2023-11-24 00:22:46 - train: epoch 0041, iter [00060, 00202], lr: 0.000279, loss: 4.5619 +2023-11-24 00:24:38 - train: epoch 0041, iter [00070, 00202], lr: 0.000279, loss: 3.5900 +2023-11-24 00:26:30 - train: epoch 0041, iter [00080, 00202], lr: 0.000278, loss: 4.4604 +2023-11-24 00:28:23 - train: epoch 0041, iter [00090, 00202], lr: 0.000278, loss: 4.3456 +2023-11-24 00:30:16 - train: epoch 0041, iter [00100, 00202], lr: 0.000278, loss: 3.3640 +2023-11-24 00:32:08 - train: epoch 0041, iter [00110, 00202], lr: 0.000277, loss: 3.9183 +2023-11-24 00:34:01 - train: epoch 0041, iter [00120, 00202], lr: 0.000277, loss: 4.1306 +2023-11-24 00:35:54 - train: epoch 0041, iter [00130, 00202], lr: 0.000277, loss: 4.5879 +2023-11-24 00:37:46 - train: epoch 0041, iter [00140, 00202], lr: 0.000276, loss: 4.5922 +2023-11-24 00:39:38 - train: epoch 0041, iter [00150, 00202], lr: 0.000276, loss: 4.7476 +2023-11-24 00:41:31 - train: epoch 0041, iter [00160, 00202], lr: 0.000276, loss: 3.0154 +2023-11-24 00:43:24 - train: epoch 0041, iter [00170, 00202], lr: 0.000276, loss: 4.9466 +2023-11-24 00:45:17 - train: epoch 0041, iter [00180, 00202], lr: 0.000275, loss: 3.4442 +2023-11-24 00:47:09 - train: epoch 0041, iter [00190, 00202], lr: 0.000275, loss: 4.1254 +2023-11-24 00:49:01 - train: epoch 0041, iter [00200, 00202], lr: 0.000275, loss: 3.7958 +2023-11-24 00:49:31 - train: epoch 041, train_loss: 4.0379 +2023-11-24 01:23:30 - eval: epoch: 041, acc1: 83.138%, acc5: 96.402%, test_loss: 0.7709, per_image_load_time: 0.222ms, per_image_inference_time: 3.838ms +2023-11-24 01:23:34 - until epoch: 041, best_acc1: 83.138% +2023-11-24 01:23:34 - epoch 042 lr: 0.000275 +2023-11-24 01:25:31 - train: epoch 0042, iter [00010, 00202], lr: 0.000274, loss: 4.2142 +2023-11-24 01:27:22 - train: epoch 0042, iter [00020, 00202], lr: 0.000274, loss: 3.8414 +2023-11-24 01:29:13 - train: epoch 0042, iter [00030, 00202], lr: 0.000274, loss: 4.0388 +2023-11-24 01:31:04 - train: epoch 0042, iter [00040, 00202], lr: 0.000273, loss: 4.3543 +2023-11-24 01:32:55 - train: epoch 0042, iter [00050, 00202], lr: 0.000273, loss: 4.4088 +2023-11-24 01:34:46 - train: epoch 0042, iter [00060, 00202], lr: 0.000273, loss: 4.4528 +2023-11-24 01:36:38 - train: epoch 0042, iter [00070, 00202], lr: 0.000272, loss: 4.3963 +2023-11-24 01:38:30 - train: epoch 0042, iter [00080, 00202], lr: 0.000272, loss: 4.7185 +2023-11-24 01:40:22 - train: epoch 0042, iter [00090, 00202], lr: 0.000272, loss: 4.5081 +2023-11-24 01:42:13 - train: epoch 0042, iter [00100, 00202], lr: 0.000272, loss: 4.5388 +2023-11-24 01:44:04 - train: epoch 0042, iter [00110, 00202], lr: 0.000271, loss: 4.0111 +2023-11-24 01:45:55 - train: epoch 0042, iter [00120, 00202], lr: 0.000271, loss: 4.1446 +2023-11-24 01:47:46 - train: epoch 0042, iter [00130, 00202], lr: 0.000271, loss: 3.7203 +2023-11-24 01:49:38 - train: epoch 0042, iter [00140, 00202], lr: 0.000270, loss: 4.7364 +2023-11-24 01:51:29 - train: epoch 0042, iter [00150, 00202], lr: 0.000270, loss: 4.2652 +2023-11-24 01:53:20 - train: epoch 0042, iter [00160, 00202], lr: 0.000270, loss: 4.3078 +2023-11-24 01:55:11 - train: epoch 0042, iter [00170, 00202], lr: 0.000269, loss: 4.3051 +2023-11-24 01:57:02 - train: epoch 0042, iter [00180, 00202], lr: 0.000269, loss: 3.5459 +2023-11-24 01:58:52 - train: epoch 0042, iter [00190, 00202], lr: 0.000269, loss: 4.1766 +2023-11-24 02:00:44 - train: epoch 0042, iter [00200, 00202], lr: 0.000268, loss: 3.8036 +2023-11-24 02:01:13 - train: epoch 042, train_loss: 4.1256 +2023-11-24 02:34:53 - eval: epoch: 042, acc1: 83.566%, acc5: 96.645%, test_loss: 0.7479, per_image_load_time: 0.222ms, per_image_inference_time: 3.837ms +2023-11-24 02:34:57 - until epoch: 042, best_acc1: 83.566% +2023-11-24 02:34:57 - epoch 043 lr: 0.000268 +2023-11-24 02:36:52 - train: epoch 0043, iter [00010, 00202], lr: 0.000268, loss: 4.3247 +2023-11-24 02:38:44 - train: epoch 0043, iter [00020, 00202], lr: 0.000268, loss: 3.5576 +2023-11-24 02:40:35 - train: epoch 0043, iter [00030, 00202], lr: 0.000267, loss: 4.2366 +2023-11-24 02:42:26 - train: epoch 0043, iter [00040, 00202], lr: 0.000267, loss: 3.7095 +2023-11-24 02:44:16 - train: epoch 0043, iter [00050, 00202], lr: 0.000267, loss: 4.3548 +2023-11-24 02:46:07 - train: epoch 0043, iter [00060, 00202], lr: 0.000267, loss: 3.8465 +2023-11-24 02:47:58 - train: epoch 0043, iter [00070, 00202], lr: 0.000266, loss: 4.2740 +2023-11-24 02:49:49 - train: epoch 0043, iter [00080, 00202], lr: 0.000266, loss: 4.2646 +2023-11-24 02:51:40 - train: epoch 0043, iter [00090, 00202], lr: 0.000266, loss: 3.3715 +2023-11-24 02:53:30 - train: epoch 0043, iter [00100, 00202], lr: 0.000265, loss: 4.3493 +2023-11-24 02:55:21 - train: epoch 0043, iter [00110, 00202], lr: 0.000265, loss: 3.9825 +2023-11-24 02:57:12 - train: epoch 0043, iter [00120, 00202], lr: 0.000265, loss: 3.8512 +2023-11-24 02:59:02 - train: epoch 0043, iter [00130, 00202], lr: 0.000264, loss: 3.9787 +2023-11-24 03:00:53 - train: epoch 0043, iter [00140, 00202], lr: 0.000264, loss: 4.4748 +2023-11-24 03:02:44 - train: epoch 0043, iter [00150, 00202], lr: 0.000264, loss: 3.8968 +2023-11-24 03:04:36 - train: epoch 0043, iter [00160, 00202], lr: 0.000263, loss: 3.4602 +2023-11-24 03:06:27 - train: epoch 0043, iter [00170, 00202], lr: 0.000263, loss: 4.0783 +2023-11-24 03:08:16 - train: epoch 0043, iter [00180, 00202], lr: 0.000263, loss: 4.7808 +2023-11-24 03:10:07 - train: epoch 0043, iter [00190, 00202], lr: 0.000263, loss: 4.1080 +2023-11-24 03:11:57 - train: epoch 0043, iter [00200, 00202], lr: 0.000262, loss: 3.8404 +2023-11-24 03:12:26 - train: epoch 043, train_loss: 4.0373 +2023-11-24 03:45:12 - eval: epoch: 043, acc1: 83.849%, acc5: 96.831%, test_loss: 0.7340, per_image_load_time: 0.243ms, per_image_inference_time: 3.838ms +2023-11-24 03:45:15 - until epoch: 043, best_acc1: 83.849% +2023-11-24 03:45:15 - epoch 044 lr: 0.000262 +2023-11-24 03:47:09 - train: epoch 0044, iter [00010, 00202], lr: 0.000262, loss: 2.8023 +2023-11-24 03:49:00 - train: epoch 0044, iter [00020, 00202], lr: 0.000262, loss: 3.4624 +2023-11-24 03:50:50 - train: epoch 0044, iter [00030, 00202], lr: 0.000261, loss: 3.0626 +2023-11-24 03:52:41 - train: epoch 0044, iter [00040, 00202], lr: 0.000261, loss: 4.1680 +2023-11-24 03:54:31 - train: epoch 0044, iter [00050, 00202], lr: 0.000261, loss: 3.9602 +2023-11-24 03:56:21 - train: epoch 0044, iter [00060, 00202], lr: 0.000260, loss: 4.1070 +2023-11-24 03:58:10 - train: epoch 0044, iter [00070, 00202], lr: 0.000260, loss: 4.1393 +2023-11-24 04:00:00 - train: epoch 0044, iter [00080, 00202], lr: 0.000260, loss: 3.3897 +2023-11-24 04:01:50 - train: epoch 0044, iter [00090, 00202], lr: 0.000259, loss: 3.9114 +2023-11-24 04:03:40 - train: epoch 0044, iter [00100, 00202], lr: 0.000259, loss: 4.0763 +2023-11-24 04:05:30 - train: epoch 0044, iter [00110, 00202], lr: 0.000259, loss: 4.0496 +2023-11-24 04:07:21 - train: epoch 0044, iter [00120, 00202], lr: 0.000258, loss: 4.0528 +2023-11-24 04:09:11 - train: epoch 0044, iter [00130, 00202], lr: 0.000258, loss: 3.5346 +2023-11-24 04:11:01 - train: epoch 0044, iter [00140, 00202], lr: 0.000258, loss: 4.4027 +2023-11-24 04:12:51 - train: epoch 0044, iter [00150, 00202], lr: 0.000257, loss: 3.9200 +2023-11-24 04:14:41 - train: epoch 0044, iter [00160, 00202], lr: 0.000257, loss: 4.3822 +2023-11-24 04:16:32 - train: epoch 0044, iter [00170, 00202], lr: 0.000257, loss: 4.4616 +2023-11-24 04:18:22 - train: epoch 0044, iter [00180, 00202], lr: 0.000257, loss: 4.0712 +2023-11-24 04:20:12 - train: epoch 0044, iter [00190, 00202], lr: 0.000256, loss: 3.5886 +2023-11-24 04:22:02 - train: epoch 0044, iter [00200, 00202], lr: 0.000256, loss: 3.6359 +2023-11-24 04:22:31 - train: epoch 044, train_loss: 4.0094 +2023-11-24 04:55:05 - eval: epoch: 044, acc1: 84.171%, acc5: 96.998%, test_loss: 0.7087, per_image_load_time: 0.242ms, per_image_inference_time: 3.838ms +2023-11-24 04:55:08 - until epoch: 044, best_acc1: 84.171% +2023-11-24 04:55:08 - epoch 045 lr: 0.000256 +2023-11-24 04:57:02 - train: epoch 0045, iter [00010, 00202], lr: 0.000256, loss: 3.8061 +2023-11-24 04:58:52 - train: epoch 0045, iter [00020, 00202], lr: 0.000255, loss: 4.4994 +2023-11-24 05:00:42 - train: epoch 0045, iter [00030, 00202], lr: 0.000255, loss: 3.0112 +2023-11-24 05:02:33 - train: epoch 0045, iter [00040, 00202], lr: 0.000255, loss: 3.6814 +2023-11-24 05:04:23 - train: epoch 0045, iter [00050, 00202], lr: 0.000254, loss: 4.3550 +2023-11-24 05:06:13 - train: epoch 0045, iter [00060, 00202], lr: 0.000254, loss: 3.8076 +2023-11-24 05:08:03 - train: epoch 0045, iter [00070, 00202], lr: 0.000254, loss: 4.4424 +2023-11-24 05:09:52 - train: epoch 0045, iter [00080, 00202], lr: 0.000253, loss: 4.2219 +2023-11-24 05:11:42 - train: epoch 0045, iter [00090, 00202], lr: 0.000253, loss: 4.3600 +2023-11-24 05:13:32 - train: epoch 0045, iter [00100, 00202], lr: 0.000253, loss: 4.0374 +2023-11-24 05:15:21 - train: epoch 0045, iter [00110, 00202], lr: 0.000252, loss: 3.5984 +2023-11-24 05:17:12 - train: epoch 0045, iter [00120, 00202], lr: 0.000252, loss: 4.8443 +2023-11-24 05:19:02 - train: epoch 0045, iter [00130, 00202], lr: 0.000252, loss: 3.5827 +2023-11-24 05:20:51 - train: epoch 0045, iter [00140, 00202], lr: 0.000251, loss: 4.9931 +2023-11-24 05:22:41 - train: epoch 0045, iter [00150, 00202], lr: 0.000251, loss: 3.8951 +2023-11-24 05:24:31 - train: epoch 0045, iter [00160, 00202], lr: 0.000251, loss: 4.2752 +2023-11-24 05:26:20 - train: epoch 0045, iter [00170, 00202], lr: 0.000251, loss: 2.7407 +2023-11-24 05:28:11 - train: epoch 0045, iter [00180, 00202], lr: 0.000250, loss: 3.6075 +2023-11-24 05:30:00 - train: epoch 0045, iter [00190, 00202], lr: 0.000250, loss: 3.7410 +2023-11-24 05:31:50 - train: epoch 0045, iter [00200, 00202], lr: 0.000250, loss: 3.2634 +2023-11-24 05:32:19 - train: epoch 045, train_loss: 4.0264 +2023-11-24 06:04:37 - eval: epoch: 045, acc1: 84.556%, acc5: 97.172%, test_loss: 0.6869, per_image_load_time: 0.262ms, per_image_inference_time: 3.838ms +2023-11-24 06:04:41 - until epoch: 045, best_acc1: 84.556% +2023-11-24 06:04:41 - epoch 046 lr: 0.000249 +2023-11-24 06:06:35 - train: epoch 0046, iter [00010, 00202], lr: 0.000249, loss: 4.0770 +2023-11-24 06:08:24 - train: epoch 0046, iter [00020, 00202], lr: 0.000249, loss: 4.1091 +2023-11-24 06:10:13 - train: epoch 0046, iter [00030, 00202], lr: 0.000249, loss: 4.3324 +2023-11-24 06:12:02 - train: epoch 0046, iter [00040, 00202], lr: 0.000248, loss: 3.6962 +2023-11-24 06:13:52 - train: epoch 0046, iter [00050, 00202], lr: 0.000248, loss: 3.8016 +2023-11-24 06:15:42 - train: epoch 0046, iter [00060, 00202], lr: 0.000248, loss: 3.5765 +2023-11-24 06:17:32 - train: epoch 0046, iter [00070, 00202], lr: 0.000247, loss: 4.0325 +2023-11-24 06:19:21 - train: epoch 0046, iter [00080, 00202], lr: 0.000247, loss: 4.1742 +2023-11-24 06:21:11 - train: epoch 0046, iter [00090, 00202], lr: 0.000247, loss: 4.7966 +2023-11-24 06:23:00 - train: epoch 0046, iter [00100, 00202], lr: 0.000246, loss: 4.1859 +2023-11-24 06:24:49 - train: epoch 0046, iter [00110, 00202], lr: 0.000246, loss: 3.9367 +2023-11-24 06:26:39 - train: epoch 0046, iter [00120, 00202], lr: 0.000246, loss: 4.3433 +2023-11-24 06:28:29 - train: epoch 0046, iter [00130, 00202], lr: 0.000245, loss: 2.8095 +2023-11-24 06:30:19 - train: epoch 0046, iter [00140, 00202], lr: 0.000245, loss: 3.9239 +2023-11-24 06:32:08 - train: epoch 0046, iter [00150, 00202], lr: 0.000245, loss: 3.5116 +2023-11-24 06:33:58 - train: epoch 0046, iter [00160, 00202], lr: 0.000244, loss: 3.7697 +2023-11-24 06:35:47 - train: epoch 0046, iter [00170, 00202], lr: 0.000244, loss: 4.2378 +2023-11-24 06:37:36 - train: epoch 0046, iter [00180, 00202], lr: 0.000244, loss: 3.4729 +2023-11-24 06:39:27 - train: epoch 0046, iter [00190, 00202], lr: 0.000243, loss: 3.5823 +2023-11-24 06:41:17 - train: epoch 0046, iter [00200, 00202], lr: 0.000243, loss: 4.6280 +2023-11-24 06:41:46 - train: epoch 046, train_loss: 3.9373 +2023-11-24 07:14:04 - eval: epoch: 046, acc1: 84.906%, acc5: 97.355%, test_loss: 0.6747, per_image_load_time: 0.254ms, per_image_inference_time: 3.840ms +2023-11-24 07:14:08 - until epoch: 046, best_acc1: 84.906% +2023-11-24 07:14:08 - epoch 047 lr: 0.000243 +2023-11-24 07:16:01 - train: epoch 0047, iter [00010, 00202], lr: 0.000243, loss: 4.0787 +2023-11-24 07:17:51 - train: epoch 0047, iter [00020, 00202], lr: 0.000242, loss: 4.3852 +2023-11-24 07:19:41 - train: epoch 0047, iter [00030, 00202], lr: 0.000242, loss: 4.3362 +2023-11-24 07:21:30 - train: epoch 0047, iter [00040, 00202], lr: 0.000242, loss: 4.1878 +2023-11-24 07:23:19 - train: epoch 0047, iter [00050, 00202], lr: 0.000241, loss: 4.5392 +2023-11-24 07:25:09 - train: epoch 0047, iter [00060, 00202], lr: 0.000241, loss: 4.4460 +2023-11-24 07:26:59 - train: epoch 0047, iter [00070, 00202], lr: 0.000241, loss: 3.8784 +2023-11-24 07:28:49 - train: epoch 0047, iter [00080, 00202], lr: 0.000241, loss: 4.4498 +2023-11-24 07:30:38 - train: epoch 0047, iter [00090, 00202], lr: 0.000240, loss: 3.8462 +2023-11-24 07:32:27 - train: epoch 0047, iter [00100, 00202], lr: 0.000240, loss: 3.6560 +2023-11-24 07:34:17 - train: epoch 0047, iter [00110, 00202], lr: 0.000240, loss: 4.4284 +2023-11-24 07:36:06 - train: epoch 0047, iter [00120, 00202], lr: 0.000239, loss: 4.2060 +2023-11-24 07:37:56 - train: epoch 0047, iter [00130, 00202], lr: 0.000239, loss: 3.5876 +2023-11-24 07:39:45 - train: epoch 0047, iter [00140, 00202], lr: 0.000239, loss: 4.1038 +2023-11-24 07:41:36 - train: epoch 0047, iter [00150, 00202], lr: 0.000238, loss: 3.4577 +2023-11-24 07:43:25 - train: epoch 0047, iter [00160, 00202], lr: 0.000238, loss: 3.2819 +2023-11-24 07:45:15 - train: epoch 0047, iter [00170, 00202], lr: 0.000238, loss: 3.9600 +2023-11-24 07:47:05 - train: epoch 0047, iter [00180, 00202], lr: 0.000237, loss: 4.4516 +2023-11-24 07:48:55 - train: epoch 0047, iter [00190, 00202], lr: 0.000237, loss: 3.2529 +2023-11-24 07:50:44 - train: epoch 0047, iter [00200, 00202], lr: 0.000237, loss: 4.0924 +2023-11-24 07:51:13 - train: epoch 047, train_loss: 3.9114 +2023-11-24 08:23:36 - eval: epoch: 047, acc1: 85.198%, acc5: 97.484%, test_loss: 0.6514, per_image_load_time: 0.253ms, per_image_inference_time: 3.842ms +2023-11-24 08:23:39 - until epoch: 047, best_acc1: 85.198% +2023-11-24 08:23:39 - epoch 048 lr: 0.000237 +2023-11-24 08:25:33 - train: epoch 0048, iter [00010, 00202], lr: 0.000236, loss: 3.9238 +2023-11-24 08:27:24 - train: epoch 0048, iter [00020, 00202], lr: 0.000236, loss: 3.7536 +2023-11-24 08:29:14 - train: epoch 0048, iter [00030, 00202], lr: 0.000236, loss: 4.3690 +2023-11-24 08:31:04 - train: epoch 0048, iter [00040, 00202], lr: 0.000235, loss: 3.1629 +2023-11-24 08:32:54 - train: epoch 0048, iter [00050, 00202], lr: 0.000235, loss: 3.6480 +2023-11-24 08:34:43 - train: epoch 0048, iter [00060, 00202], lr: 0.000235, loss: 4.0021 +2023-11-24 08:36:33 - train: epoch 0048, iter [00070, 00202], lr: 0.000234, loss: 4.1293 +2023-11-24 08:38:22 - train: epoch 0048, iter [00080, 00202], lr: 0.000234, loss: 4.2394 +2023-11-24 08:40:12 - train: epoch 0048, iter [00090, 00202], lr: 0.000234, loss: 4.3662 +2023-11-24 08:42:01 - train: epoch 0048, iter [00100, 00202], lr: 0.000233, loss: 4.4811 +2023-11-24 08:43:51 - train: epoch 0048, iter [00110, 00202], lr: 0.000233, loss: 4.3208 +2023-11-24 08:45:41 - train: epoch 0048, iter [00120, 00202], lr: 0.000233, loss: 4.5111 +2023-11-24 08:47:30 - train: epoch 0048, iter [00130, 00202], lr: 0.000232, loss: 3.2241 +2023-11-24 08:49:19 - train: epoch 0048, iter [00140, 00202], lr: 0.000232, loss: 3.3932 +2023-11-24 08:51:09 - train: epoch 0048, iter [00150, 00202], lr: 0.000232, loss: 3.8375 +2023-11-24 08:53:00 - train: epoch 0048, iter [00160, 00202], lr: 0.000231, loss: 4.1279 +2023-11-24 08:54:50 - train: epoch 0048, iter [00170, 00202], lr: 0.000231, loss: 3.3132 +2023-11-24 08:56:39 - train: epoch 0048, iter [00180, 00202], lr: 0.000231, loss: 3.9278 +2023-11-24 08:58:28 - train: epoch 0048, iter [00190, 00202], lr: 0.000230, loss: 3.3806 +2023-11-24 09:00:18 - train: epoch 0048, iter [00200, 00202], lr: 0.000230, loss: 3.7751 +2023-11-24 09:00:47 - train: epoch 048, train_loss: 3.8547 +2023-11-24 09:33:19 - eval: epoch: 048, acc1: 85.498%, acc5: 97.652%, test_loss: 0.6454, per_image_load_time: 0.260ms, per_image_inference_time: 3.842ms +2023-11-24 09:33:23 - until epoch: 048, best_acc1: 85.498% +2023-11-24 09:33:23 - epoch 049 lr: 0.000230 +2023-11-24 09:35:16 - train: epoch 0049, iter [00010, 00202], lr: 0.000230, loss: 4.2396 +2023-11-24 09:37:07 - train: epoch 0049, iter [00020, 00202], lr: 0.000229, loss: 4.0650 +2023-11-24 09:38:58 - train: epoch 0049, iter [00030, 00202], lr: 0.000229, loss: 4.7653 +2023-11-24 09:40:49 - train: epoch 0049, iter [00040, 00202], lr: 0.000229, loss: 3.3058 +2023-11-24 09:42:39 - train: epoch 0049, iter [00050, 00202], lr: 0.000228, loss: 4.1773 +2023-11-24 09:44:29 - train: epoch 0049, iter [00060, 00202], lr: 0.000228, loss: 3.5927 +2023-11-24 09:46:19 - train: epoch 0049, iter [00070, 00202], lr: 0.000228, loss: 4.2503 +2023-11-24 09:48:10 - train: epoch 0049, iter [00080, 00202], lr: 0.000228, loss: 3.9953 +2023-11-24 09:50:01 - train: epoch 0049, iter [00090, 00202], lr: 0.000227, loss: 3.9044 +2023-11-24 09:51:52 - train: epoch 0049, iter [00100, 00202], lr: 0.000227, loss: 3.5079 +2023-11-24 09:53:44 - train: epoch 0049, iter [00110, 00202], lr: 0.000227, loss: 4.3665 +2023-11-24 09:55:36 - train: epoch 0049, iter [00120, 00202], lr: 0.000226, loss: 2.5780 +2023-11-24 09:57:27 - train: epoch 0049, iter [00130, 00202], lr: 0.000226, loss: 4.2194 +2023-11-24 09:59:17 - train: epoch 0049, iter [00140, 00202], lr: 0.000226, loss: 4.3275 +2023-11-24 10:01:09 - train: epoch 0049, iter [00150, 00202], lr: 0.000225, loss: 3.7521 +2023-11-24 10:03:01 - train: epoch 0049, iter [00160, 00202], lr: 0.000225, loss: 3.8187 +2023-11-24 10:04:52 - train: epoch 0049, iter [00170, 00202], lr: 0.000225, loss: 4.4214 +2023-11-24 10:06:44 - train: epoch 0049, iter [00180, 00202], lr: 0.000224, loss: 3.6901 +2023-11-24 10:08:35 - train: epoch 0049, iter [00190, 00202], lr: 0.000224, loss: 4.1184 +2023-11-24 10:10:27 - train: epoch 0049, iter [00200, 00202], lr: 0.000224, loss: 4.4074 +2023-11-24 10:10:57 - train: epoch 049, train_loss: 3.8544 +2023-11-24 10:44:54 - eval: epoch: 049, acc1: 85.675%, acc5: 97.740%, test_loss: 0.6284, per_image_load_time: 0.240ms, per_image_inference_time: 3.838ms +2023-11-24 10:44:57 - until epoch: 049, best_acc1: 85.675% +2023-11-24 10:44:57 - epoch 050 lr: 0.000224 +2023-11-24 10:46:57 - train: epoch 0050, iter [00010, 00202], lr: 0.000223, loss: 3.4615 +2023-11-24 10:48:51 - train: epoch 0050, iter [00020, 00202], lr: 0.000223, loss: 3.8883 +2023-11-24 10:50:46 - train: epoch 0050, iter [00030, 00202], lr: 0.000223, loss: 4.0774 +2023-11-24 10:52:40 - train: epoch 0050, iter [00040, 00202], lr: 0.000222, loss: 4.2007 +2023-11-24 10:54:35 - train: epoch 0050, iter [00050, 00202], lr: 0.000222, loss: 4.2407 +2023-11-24 10:56:31 - train: epoch 0050, iter [00060, 00202], lr: 0.000222, loss: 4.3082 +2023-11-24 10:58:27 - train: epoch 0050, iter [00070, 00202], lr: 0.000221, loss: 4.3526 +2023-11-24 11:00:22 - train: epoch 0050, iter [00080, 00202], lr: 0.000221, loss: 4.6650 +2023-11-24 11:02:17 - train: epoch 0050, iter [00090, 00202], lr: 0.000221, loss: 3.2283 +2023-11-24 11:04:12 - train: epoch 0050, iter [00100, 00202], lr: 0.000220, loss: 4.4671 +2023-11-24 11:06:07 - train: epoch 0050, iter [00110, 00202], lr: 0.000220, loss: 4.4751 +2023-11-24 11:08:02 - train: epoch 0050, iter [00120, 00202], lr: 0.000220, loss: 3.4280 +2023-11-24 11:09:58 - train: epoch 0050, iter [00130, 00202], lr: 0.000219, loss: 4.0391 +2023-11-24 11:11:54 - train: epoch 0050, iter [00140, 00202], lr: 0.000219, loss: 4.3932 +2023-11-24 11:13:49 - train: epoch 0050, iter [00150, 00202], lr: 0.000219, loss: 2.7752 +2023-11-24 11:15:46 - train: epoch 0050, iter [00160, 00202], lr: 0.000218, loss: 4.0091 +2023-11-24 11:17:43 - train: epoch 0050, iter [00170, 00202], lr: 0.000218, loss: 3.7456 +2023-11-24 11:19:38 - train: epoch 0050, iter [00180, 00202], lr: 0.000218, loss: 4.3077 +2023-11-24 11:21:35 - train: epoch 0050, iter [00190, 00202], lr: 0.000217, loss: 4.1765 +2023-11-24 11:23:33 - train: epoch 0050, iter [00200, 00202], lr: 0.000217, loss: 3.5041 +2023-11-24 11:24:04 - train: epoch 050, train_loss: 3.8933 +2023-11-24 12:00:50 - eval: epoch: 050, acc1: 86.012%, acc5: 97.877%, test_loss: 0.6078, per_image_load_time: 0.180ms, per_image_inference_time: 3.829ms +2023-11-24 12:00:53 - until epoch: 050, best_acc1: 86.012% +2023-11-24 12:00:53 - epoch 051 lr: 0.000217 +2023-11-24 12:02:57 - train: epoch 0051, iter [00010, 00202], lr: 0.000217, loss: 3.9610 +2023-11-24 12:04:54 - train: epoch 0051, iter [00020, 00202], lr: 0.000216, loss: 3.4886 +2023-11-24 12:06:52 - train: epoch 0051, iter [00030, 00202], lr: 0.000216, loss: 4.0538 +2023-11-24 12:08:51 - train: epoch 0051, iter [00040, 00202], lr: 0.000216, loss: 3.3643 +2023-11-24 12:10:50 - train: epoch 0051, iter [00050, 00202], lr: 0.000215, loss: 4.1771 +2023-11-24 12:12:49 - train: epoch 0051, iter [00060, 00202], lr: 0.000215, loss: 3.9486 +2023-11-24 12:14:47 - train: epoch 0051, iter [00070, 00202], lr: 0.000215, loss: 4.4395 +2023-11-24 12:16:45 - train: epoch 0051, iter [00080, 00202], lr: 0.000214, loss: 3.0979 +2023-11-24 12:18:42 - train: epoch 0051, iter [00090, 00202], lr: 0.000214, loss: 4.7318 +2023-11-24 12:20:40 - train: epoch 0051, iter [00100, 00202], lr: 0.000214, loss: 4.6802 +2023-11-24 12:22:40 - train: epoch 0051, iter [00110, 00202], lr: 0.000213, loss: 3.6266 +2023-11-24 12:24:41 - train: epoch 0051, iter [00120, 00202], lr: 0.000213, loss: 4.2302 +2023-11-24 12:26:45 - train: epoch 0051, iter [00130, 00202], lr: 0.000213, loss: 4.4080 +2023-11-24 12:28:49 - train: epoch 0051, iter [00140, 00202], lr: 0.000212, loss: 3.8140 +2023-11-24 12:30:51 - train: epoch 0051, iter [00150, 00202], lr: 0.000212, loss: 3.2645 +2023-11-24 12:32:52 - train: epoch 0051, iter [00160, 00202], lr: 0.000212, loss: 3.4029 +2023-11-24 12:34:54 - train: epoch 0051, iter [00170, 00202], lr: 0.000211, loss: 2.6732 +2023-11-24 12:36:54 - train: epoch 0051, iter [00180, 00202], lr: 0.000211, loss: 4.3569 +2023-11-24 12:38:55 - train: epoch 0051, iter [00190, 00202], lr: 0.000211, loss: 4.4728 +2023-11-24 12:40:51 - train: epoch 0051, iter [00200, 00202], lr: 0.000210, loss: 3.2397 +2023-11-24 12:41:22 - train: epoch 051, train_loss: 3.8901 +2023-11-24 13:17:03 - eval: epoch: 051, acc1: 86.353%, acc5: 98.016%, test_loss: 0.5941, per_image_load_time: 0.206ms, per_image_inference_time: 3.834ms +2023-11-24 13:17:06 - until epoch: 051, best_acc1: 86.353% +2023-11-24 13:17:06 - epoch 052 lr: 0.000210 +2023-11-24 13:19:09 - train: epoch 0052, iter [00010, 00202], lr: 0.000210, loss: 4.3575 +2023-11-24 13:21:06 - train: epoch 0052, iter [00020, 00202], lr: 0.000210, loss: 3.6007 +2023-11-24 13:23:03 - train: epoch 0052, iter [00030, 00202], lr: 0.000209, loss: 4.0775 +2023-11-24 13:25:00 - train: epoch 0052, iter [00040, 00202], lr: 0.000209, loss: 4.2328 +2023-11-24 13:26:57 - train: epoch 0052, iter [00050, 00202], lr: 0.000209, loss: 4.0349 +2023-11-24 13:28:54 - train: epoch 0052, iter [00060, 00202], lr: 0.000208, loss: 3.0254 +2023-11-24 13:30:52 - train: epoch 0052, iter [00070, 00202], lr: 0.000208, loss: 3.8639 +2023-11-24 13:32:50 - train: epoch 0052, iter [00080, 00202], lr: 0.000208, loss: 3.7182 +2023-11-24 13:34:49 - train: epoch 0052, iter [00090, 00202], lr: 0.000207, loss: 4.8298 +2023-11-24 13:36:47 - train: epoch 0052, iter [00100, 00202], lr: 0.000207, loss: 3.6343 +2023-11-24 13:38:45 - train: epoch 0052, iter [00110, 00202], lr: 0.000207, loss: 4.4096 +2023-11-24 13:40:44 - train: epoch 0052, iter [00120, 00202], lr: 0.000206, loss: 3.8044 +2023-11-24 13:42:42 - train: epoch 0052, iter [00130, 00202], lr: 0.000206, loss: 3.6118 +2023-11-24 13:44:43 - train: epoch 0052, iter [00140, 00202], lr: 0.000206, loss: 4.2738 +2023-11-24 13:46:43 - train: epoch 0052, iter [00150, 00202], lr: 0.000206, loss: 3.9573 +2023-11-24 13:48:42 - train: epoch 0052, iter [00160, 00202], lr: 0.000205, loss: 3.2216 +2023-11-24 13:50:42 - train: epoch 0052, iter [00170, 00202], lr: 0.000205, loss: 4.5650 +2023-11-24 13:52:41 - train: epoch 0052, iter [00180, 00202], lr: 0.000205, loss: 3.9176 +2023-11-24 13:54:40 - train: epoch 0052, iter [00190, 00202], lr: 0.000204, loss: 3.7938 +2023-11-24 13:56:39 - train: epoch 0052, iter [00200, 00202], lr: 0.000204, loss: 3.3985 +2023-11-24 13:57:10 - train: epoch 052, train_loss: 3.8547 +2023-11-24 14:34:24 - eval: epoch: 052, acc1: 86.567%, acc5: 98.105%, test_loss: 0.5900, per_image_load_time: 0.189ms, per_image_inference_time: 3.834ms +2023-11-24 14:34:28 - until epoch: 052, best_acc1: 86.567% +2023-11-24 14:34:28 - epoch 053 lr: 0.000204 +2023-11-24 14:36:26 - train: epoch 0053, iter [00010, 00202], lr: 0.000203, loss: 4.0632 +2023-11-24 14:38:20 - train: epoch 0053, iter [00020, 00202], lr: 0.000203, loss: 3.6710 +2023-11-24 14:40:13 - train: epoch 0053, iter [00030, 00202], lr: 0.000203, loss: 3.7750 +2023-11-24 14:42:08 - train: epoch 0053, iter [00040, 00202], lr: 0.000202, loss: 3.7349 +2023-11-24 14:44:03 - train: epoch 0053, iter [00050, 00202], lr: 0.000202, loss: 3.5541 +2023-11-24 14:46:00 - train: epoch 0053, iter [00060, 00202], lr: 0.000202, loss: 4.3394 +2023-11-24 14:47:59 - train: epoch 0053, iter [00070, 00202], lr: 0.000202, loss: 4.5916 +2023-11-24 14:49:57 - train: epoch 0053, iter [00080, 00202], lr: 0.000201, loss: 3.9923 +2023-11-24 14:51:57 - train: epoch 0053, iter [00090, 00202], lr: 0.000201, loss: 4.1151 +2023-11-24 14:53:54 - train: epoch 0053, iter [00100, 00202], lr: 0.000201, loss: 3.4458 +2023-11-24 14:55:50 - train: epoch 0053, iter [00110, 00202], lr: 0.000200, loss: 3.9202 +2023-11-24 14:57:48 - train: epoch 0053, iter [00120, 00202], lr: 0.000200, loss: 3.7807 +2023-11-24 14:59:47 - train: epoch 0053, iter [00130, 00202], lr: 0.000200, loss: 4.0572 +2023-11-24 15:01:46 - train: epoch 0053, iter [00140, 00202], lr: 0.000199, loss: 3.1795 +2023-11-24 15:03:46 - train: epoch 0053, iter [00150, 00202], lr: 0.000199, loss: 4.2649 +2023-11-24 15:05:47 - train: epoch 0053, iter [00160, 00202], lr: 0.000199, loss: 3.9109 +2023-11-24 15:07:46 - train: epoch 0053, iter [00170, 00202], lr: 0.000198, loss: 3.6765 +2023-11-24 15:09:44 - train: epoch 0053, iter [00180, 00202], lr: 0.000198, loss: 3.1380 +2023-11-24 15:11:41 - train: epoch 0053, iter [00190, 00202], lr: 0.000198, loss: 3.7814 +2023-11-24 15:13:37 - train: epoch 0053, iter [00200, 00202], lr: 0.000197, loss: 4.6151 +2023-11-24 15:14:07 - train: epoch 053, train_loss: 3.8420 +2023-11-24 15:49:58 - eval: epoch: 053, acc1: 86.763%, acc5: 98.178%, test_loss: 0.5682, per_image_load_time: 0.183ms, per_image_inference_time: 3.833ms +2023-11-24 15:50:02 - until epoch: 053, best_acc1: 86.763% +2023-11-24 15:50:02 - epoch 054 lr: 0.000197 +2023-11-24 15:52:10 - train: epoch 0054, iter [00010, 00202], lr: 0.000197, loss: 4.1600 +2023-11-24 15:54:11 - train: epoch 0054, iter [00020, 00202], lr: 0.000197, loss: 3.7383 +2023-11-24 15:56:10 - train: epoch 0054, iter [00030, 00202], lr: 0.000196, loss: 4.3374 +2023-11-24 15:58:04 - train: epoch 0054, iter [00040, 00202], lr: 0.000196, loss: 3.6433 +2023-11-24 15:59:58 - train: epoch 0054, iter [00050, 00202], lr: 0.000196, loss: 4.4405 +2023-11-24 16:01:51 - train: epoch 0054, iter [00060, 00202], lr: 0.000195, loss: 3.6128 +2023-11-24 16:03:45 - train: epoch 0054, iter [00070, 00202], lr: 0.000195, loss: 2.7997 +2023-11-24 16:05:38 - train: epoch 0054, iter [00080, 00202], lr: 0.000195, loss: 4.4569 +2023-11-24 16:07:29 - train: epoch 0054, iter [00090, 00202], lr: 0.000194, loss: 4.3980 +2023-11-24 16:09:19 - train: epoch 0054, iter [00100, 00202], lr: 0.000194, loss: 3.6093 +2023-11-24 16:11:10 - train: epoch 0054, iter [00110, 00202], lr: 0.000194, loss: 4.5148 +2023-11-24 16:13:00 - train: epoch 0054, iter [00120, 00202], lr: 0.000193, loss: 4.0708 +2023-11-24 16:14:50 - train: epoch 0054, iter [00130, 00202], lr: 0.000193, loss: 3.7894 +2023-11-24 16:16:40 - train: epoch 0054, iter [00140, 00202], lr: 0.000193, loss: 3.7002 +2023-11-24 16:18:30 - train: epoch 0054, iter [00150, 00202], lr: 0.000192, loss: 3.9175 +2023-11-24 16:20:20 - train: epoch 0054, iter [00160, 00202], lr: 0.000192, loss: 3.3790 +2023-11-24 16:22:11 - train: epoch 0054, iter [00170, 00202], lr: 0.000192, loss: 4.3816 +2023-11-24 16:24:02 - train: epoch 0054, iter [00180, 00202], lr: 0.000191, loss: 3.9209 +2023-11-24 16:25:53 - train: epoch 0054, iter [00190, 00202], lr: 0.000191, loss: 3.3029 +2023-11-24 16:27:46 - train: epoch 0054, iter [00200, 00202], lr: 0.000191, loss: 4.2064 +2023-11-24 16:28:16 - train: epoch 054, train_loss: 3.8543 +2023-11-24 17:03:48 - eval: epoch: 054, acc1: 87.058%, acc5: 98.292%, test_loss: 0.5591, per_image_load_time: 0.188ms, per_image_inference_time: 3.835ms +2023-11-24 17:03:52 - until epoch: 054, best_acc1: 87.058% +2023-11-24 17:03:52 - epoch 055 lr: 0.000191 +2023-11-24 17:05:56 - train: epoch 0055, iter [00010, 00202], lr: 0.000190, loss: 3.2878 +2023-11-24 17:07:53 - train: epoch 0055, iter [00020, 00202], lr: 0.000190, loss: 4.3044 +2023-11-24 17:09:49 - train: epoch 0055, iter [00030, 00202], lr: 0.000190, loss: 4.0793 +2023-11-24 17:11:46 - train: epoch 0055, iter [00040, 00202], lr: 0.000189, loss: 4.5468 +2023-11-24 17:13:43 - train: epoch 0055, iter [00050, 00202], lr: 0.000189, loss: 4.1712 +2023-11-24 17:15:41 - train: epoch 0055, iter [00060, 00202], lr: 0.000189, loss: 4.2579 +2023-11-24 17:17:36 - train: epoch 0055, iter [00070, 00202], lr: 0.000188, loss: 3.4231 +2023-11-24 17:19:31 - train: epoch 0055, iter [00080, 00202], lr: 0.000188, loss: 4.4305 +2023-11-24 17:21:25 - train: epoch 0055, iter [00090, 00202], lr: 0.000188, loss: 3.4449 +2023-11-24 17:23:20 - train: epoch 0055, iter [00100, 00202], lr: 0.000187, loss: 3.8594 +2023-11-24 17:25:13 - train: epoch 0055, iter [00110, 00202], lr: 0.000187, loss: 3.9721 +2023-11-24 17:27:07 - train: epoch 0055, iter [00120, 00202], lr: 0.000187, loss: 3.0704 +2023-11-24 17:28:59 - train: epoch 0055, iter [00130, 00202], lr: 0.000186, loss: 4.0033 +2023-11-24 17:30:52 - train: epoch 0055, iter [00140, 00202], lr: 0.000186, loss: 3.6756 +2023-11-24 17:32:45 - train: epoch 0055, iter [00150, 00202], lr: 0.000186, loss: 4.0109 +2023-11-24 17:34:38 - train: epoch 0055, iter [00160, 00202], lr: 0.000185, loss: 4.0811 +2023-11-24 17:36:33 - train: epoch 0055, iter [00170, 00202], lr: 0.000185, loss: 4.0911 +2023-11-24 17:38:28 - train: epoch 0055, iter [00180, 00202], lr: 0.000185, loss: 4.0122 +2023-11-24 17:40:23 - train: epoch 0055, iter [00190, 00202], lr: 0.000184, loss: 3.0823 +2023-11-24 17:42:17 - train: epoch 0055, iter [00200, 00202], lr: 0.000184, loss: 3.0968 +2023-11-24 17:42:47 - train: epoch 055, train_loss: 3.7910 +2023-11-24 18:19:06 - eval: epoch: 055, acc1: 87.284%, acc5: 98.374%, test_loss: 0.5485, per_image_load_time: 0.177ms, per_image_inference_time: 3.833ms +2023-11-24 18:19:10 - until epoch: 055, best_acc1: 87.284% +2023-11-24 18:19:10 - epoch 056 lr: 0.000184 +2023-11-24 18:21:12 - train: epoch 0056, iter [00010, 00202], lr: 0.000184, loss: 3.6349 +2023-11-24 18:23:08 - train: epoch 0056, iter [00020, 00202], lr: 0.000183, loss: 3.1074 +2023-11-24 18:25:03 - train: epoch 0056, iter [00030, 00202], lr: 0.000183, loss: 4.0353 +2023-11-24 18:26:58 - train: epoch 0056, iter [00040, 00202], lr: 0.000183, loss: 3.2626 +2023-11-24 18:28:52 - train: epoch 0056, iter [00050, 00202], lr: 0.000182, loss: 3.1946 +2023-11-24 18:30:47 - train: epoch 0056, iter [00060, 00202], lr: 0.000182, loss: 4.2285 +2023-11-24 18:32:40 - train: epoch 0056, iter [00070, 00202], lr: 0.000182, loss: 4.5454 +2023-11-24 18:34:35 - train: epoch 0056, iter [00080, 00202], lr: 0.000181, loss: 3.4545 +2023-11-24 18:36:29 - train: epoch 0056, iter [00090, 00202], lr: 0.000181, loss: 3.7545 +2023-11-24 18:38:22 - train: epoch 0056, iter [00100, 00202], lr: 0.000181, loss: 3.6617 +2023-11-24 18:40:16 - train: epoch 0056, iter [00110, 00202], lr: 0.000180, loss: 3.3126 +2023-11-24 18:42:11 - train: epoch 0056, iter [00120, 00202], lr: 0.000180, loss: 3.2710 +2023-11-24 18:44:06 - train: epoch 0056, iter [00130, 00202], lr: 0.000180, loss: 3.3901 +2023-11-24 18:46:02 - train: epoch 0056, iter [00140, 00202], lr: 0.000179, loss: 3.7894 +2023-11-24 18:47:57 - train: epoch 0056, iter [00150, 00202], lr: 0.000179, loss: 4.1154 +2023-11-24 18:49:52 - train: epoch 0056, iter [00160, 00202], lr: 0.000179, loss: 3.9145 +2023-11-24 18:51:48 - train: epoch 0056, iter [00170, 00202], lr: 0.000179, loss: 3.2603 +2023-11-24 18:53:42 - train: epoch 0056, iter [00180, 00202], lr: 0.000178, loss: 4.4415 +2023-11-24 18:55:37 - train: epoch 0056, iter [00190, 00202], lr: 0.000178, loss: 3.5071 +2023-11-24 18:57:32 - train: epoch 0056, iter [00200, 00202], lr: 0.000178, loss: 2.4282 +2023-11-24 18:58:03 - train: epoch 056, train_loss: 3.7087 +2023-11-24 19:34:22 - eval: epoch: 056, acc1: 87.472%, acc5: 98.454%, test_loss: 0.5373, per_image_load_time: 0.187ms, per_image_inference_time: 3.835ms +2023-11-24 19:34:26 - until epoch: 056, best_acc1: 87.472% +2023-11-24 19:34:26 - epoch 057 lr: 0.000177 +2023-11-24 19:36:21 - train: epoch 0057, iter [00010, 00202], lr: 0.000177, loss: 3.8813 +2023-11-24 19:38:11 - train: epoch 0057, iter [00020, 00202], lr: 0.000177, loss: 3.7837 +2023-11-24 19:40:02 - train: epoch 0057, iter [00030, 00202], lr: 0.000176, loss: 4.1281 +2023-11-24 19:41:53 - train: epoch 0057, iter [00040, 00202], lr: 0.000176, loss: 3.1311 +2023-11-24 19:43:44 - train: epoch 0057, iter [00050, 00202], lr: 0.000176, loss: 2.9805 +2023-11-24 19:45:35 - train: epoch 0057, iter [00060, 00202], lr: 0.000176, loss: 3.9893 +2023-11-24 19:47:25 - train: epoch 0057, iter [00070, 00202], lr: 0.000175, loss: 3.7525 +2023-11-24 19:49:16 - train: epoch 0057, iter [00080, 00202], lr: 0.000175, loss: 3.6820 +2023-11-24 19:51:08 - train: epoch 0057, iter [00090, 00202], lr: 0.000175, loss: 4.0446 +2023-11-24 19:52:59 - train: epoch 0057, iter [00100, 00202], lr: 0.000174, loss: 3.8377 +2023-11-24 19:54:51 - train: epoch 0057, iter [00110, 00202], lr: 0.000174, loss: 3.6691 +2023-11-24 19:56:43 - train: epoch 0057, iter [00120, 00202], lr: 0.000174, loss: 3.0442 +2023-11-24 19:58:34 - train: epoch 0057, iter [00130, 00202], lr: 0.000173, loss: 4.3301 +2023-11-24 20:00:26 - train: epoch 0057, iter [00140, 00202], lr: 0.000173, loss: 4.0423 +2023-11-24 20:02:18 - train: epoch 0057, iter [00150, 00202], lr: 0.000173, loss: 4.6476 +2023-11-24 20:04:10 - train: epoch 0057, iter [00160, 00202], lr: 0.000172, loss: 3.2061 +2023-11-24 20:06:03 - train: epoch 0057, iter [00170, 00202], lr: 0.000172, loss: 3.4314 +2023-11-24 20:07:57 - train: epoch 0057, iter [00180, 00202], lr: 0.000172, loss: 4.3936 +2023-11-24 20:09:51 - train: epoch 0057, iter [00190, 00202], lr: 0.000171, loss: 4.0401 +2023-11-24 20:11:46 - train: epoch 0057, iter [00200, 00202], lr: 0.000171, loss: 3.1495 +2023-11-24 20:12:16 - train: epoch 057, train_loss: 3.7763 +2023-11-24 20:46:05 - eval: epoch: 057, acc1: 87.588%, acc5: 98.512%, test_loss: 0.5308, per_image_load_time: 0.216ms, per_image_inference_time: 3.837ms +2023-11-24 20:46:08 - until epoch: 057, best_acc1: 87.588% +2023-11-24 20:46:08 - epoch 058 lr: 0.000171 +2023-11-24 20:48:20 - train: epoch 0058, iter [00010, 00202], lr: 0.000171, loss: 3.3913 +2023-11-24 20:50:29 - train: epoch 0058, iter [00020, 00202], lr: 0.000170, loss: 4.0017 +2023-11-24 20:52:38 - train: epoch 0058, iter [00030, 00202], lr: 0.000170, loss: 4.2840 +2023-11-24 20:54:49 - train: epoch 0058, iter [00040, 00202], lr: 0.000170, loss: 3.7274 +2023-11-24 20:57:02 - train: epoch 0058, iter [00050, 00202], lr: 0.000169, loss: 3.9706 +2023-11-24 20:59:16 - train: epoch 0058, iter [00060, 00202], lr: 0.000169, loss: 3.0741 +2023-11-24 21:01:33 - train: epoch 0058, iter [00070, 00202], lr: 0.000169, loss: 4.1527 +2023-11-24 21:03:49 - train: epoch 0058, iter [00080, 00202], lr: 0.000168, loss: 3.3340 +2023-11-24 21:06:04 - train: epoch 0058, iter [00090, 00202], lr: 0.000168, loss: 3.4120 +2023-11-24 21:08:18 - train: epoch 0058, iter [00100, 00202], lr: 0.000168, loss: 4.7672 +2023-11-24 21:10:31 - train: epoch 0058, iter [00110, 00202], lr: 0.000167, loss: 4.1954 +2023-11-24 21:12:43 - train: epoch 0058, iter [00120, 00202], lr: 0.000167, loss: 3.7292 +2023-11-24 21:14:56 - train: epoch 0058, iter [00130, 00202], lr: 0.000167, loss: 3.6800 +2023-11-24 21:17:09 - train: epoch 0058, iter [00140, 00202], lr: 0.000166, loss: 3.2395 +2023-11-24 21:19:20 - train: epoch 0058, iter [00150, 00202], lr: 0.000166, loss: 3.8519 +2023-11-24 21:21:30 - train: epoch 0058, iter [00160, 00202], lr: 0.000166, loss: 4.3693 +2023-11-24 21:23:40 - train: epoch 0058, iter [00170, 00202], lr: 0.000165, loss: 3.4987 +2023-11-24 21:25:48 - train: epoch 0058, iter [00180, 00202], lr: 0.000165, loss: 4.1517 +2023-11-24 21:27:57 - train: epoch 0058, iter [00190, 00202], lr: 0.000165, loss: 3.6813 +2023-11-24 21:30:05 - train: epoch 0058, iter [00200, 00202], lr: 0.000165, loss: 3.6018 +2023-11-24 21:30:38 - train: epoch 058, train_loss: 3.7674 +2023-11-24 22:05:30 - eval: epoch: 058, acc1: 87.818%, acc5: 98.591%, test_loss: 0.5251, per_image_load_time: 0.205ms, per_image_inference_time: 3.835ms +2023-11-24 22:05:34 - until epoch: 058, best_acc1: 87.818% +2023-11-24 22:05:34 - epoch 059 lr: 0.000164 +2023-11-24 22:07:35 - train: epoch 0059, iter [00010, 00202], lr: 0.000164, loss: 4.2489 +2023-11-24 22:09:46 - train: epoch 0059, iter [00020, 00202], lr: 0.000164, loss: 2.7226 +2023-11-24 22:12:01 - train: epoch 0059, iter [00030, 00202], lr: 0.000163, loss: 3.0666 +2023-11-24 22:13:58 - train: epoch 0059, iter [00040, 00202], lr: 0.000163, loss: 3.7112 +2023-11-24 22:15:51 - train: epoch 0059, iter [00050, 00202], lr: 0.000163, loss: 3.8598 +2023-11-24 22:17:44 - train: epoch 0059, iter [00060, 00202], lr: 0.000162, loss: 3.7205 +2023-11-24 22:19:35 - train: epoch 0059, iter [00070, 00202], lr: 0.000162, loss: 3.8088 +2023-11-24 22:21:27 - train: epoch 0059, iter [00080, 00202], lr: 0.000162, loss: 3.2780 +2023-11-24 22:23:18 - train: epoch 0059, iter [00090, 00202], lr: 0.000162, loss: 3.0658 +2023-11-24 22:25:11 - train: epoch 0059, iter [00100, 00202], lr: 0.000161, loss: 3.7694 +2023-11-24 22:27:14 - train: epoch 0059, iter [00110, 00202], lr: 0.000161, loss: 4.3153 +2023-11-24 22:29:24 - train: epoch 0059, iter [00120, 00202], lr: 0.000161, loss: 4.1709 +2023-11-24 22:31:34 - train: epoch 0059, iter [00130, 00202], lr: 0.000160, loss: 3.0189 +2023-11-24 22:33:44 - train: epoch 0059, iter [00140, 00202], lr: 0.000160, loss: 3.8149 +2023-11-24 22:35:53 - train: epoch 0059, iter [00150, 00202], lr: 0.000160, loss: 3.9076 +2023-11-24 22:37:48 - train: epoch 0059, iter [00160, 00202], lr: 0.000159, loss: 3.8118 +2023-11-24 22:39:40 - train: epoch 0059, iter [00170, 00202], lr: 0.000159, loss: 3.3814 +2023-11-24 22:41:31 - train: epoch 0059, iter [00180, 00202], lr: 0.000159, loss: 4.3364 +2023-11-24 22:43:21 - train: epoch 0059, iter [00190, 00202], lr: 0.000158, loss: 4.5320 +2023-11-24 22:45:14 - train: epoch 0059, iter [00200, 00202], lr: 0.000158, loss: 3.4775 +2023-11-24 22:45:44 - train: epoch 059, train_loss: 3.7618 +2023-11-24 23:20:25 - eval: epoch: 059, acc1: 87.974%, acc5: 98.650%, test_loss: 0.5219, per_image_load_time: 0.199ms, per_image_inference_time: 3.835ms +2023-11-24 23:20:29 - until epoch: 059, best_acc1: 87.974% +2023-11-24 23:20:29 - epoch 060 lr: 0.000158 +2023-11-24 23:22:25 - train: epoch 0060, iter [00010, 00202], lr: 0.000158, loss: 4.0385 +2023-11-24 23:24:16 - train: epoch 0060, iter [00020, 00202], lr: 0.000157, loss: 3.9782 +2023-11-24 23:26:07 - train: epoch 0060, iter [00030, 00202], lr: 0.000157, loss: 3.5444 +2023-11-24 23:27:57 - train: epoch 0060, iter [00040, 00202], lr: 0.000157, loss: 3.1752 +2023-11-24 23:29:47 - train: epoch 0060, iter [00050, 00202], lr: 0.000156, loss: 3.4548 +2023-11-24 23:31:37 - train: epoch 0060, iter [00060, 00202], lr: 0.000156, loss: 2.4357 +2023-11-24 23:33:27 - train: epoch 0060, iter [00070, 00202], lr: 0.000156, loss: 3.4203 +2023-11-24 23:35:16 - train: epoch 0060, iter [00080, 00202], lr: 0.000155, loss: 3.7203 +2023-11-24 23:37:07 - train: epoch 0060, iter [00090, 00202], lr: 0.000155, loss: 3.4965 +2023-11-24 23:38:58 - train: epoch 0060, iter [00100, 00202], lr: 0.000155, loss: 4.0172 +2023-11-24 23:40:49 - train: epoch 0060, iter [00110, 00202], lr: 0.000154, loss: 3.4787 +2023-11-24 23:42:41 - train: epoch 0060, iter [00120, 00202], lr: 0.000154, loss: 3.5614 +2023-11-24 23:44:32 - train: epoch 0060, iter [00130, 00202], lr: 0.000154, loss: 3.5876 +2023-11-24 23:46:24 - train: epoch 0060, iter [00140, 00202], lr: 0.000154, loss: 3.8343 +2023-11-24 23:48:16 - train: epoch 0060, iter [00150, 00202], lr: 0.000153, loss: 3.3103 +2023-11-24 23:50:07 - train: epoch 0060, iter [00160, 00202], lr: 0.000153, loss: 3.7626 +2023-11-24 23:51:58 - train: epoch 0060, iter [00170, 00202], lr: 0.000153, loss: 3.4160 +2023-11-24 23:53:51 - train: epoch 0060, iter [00180, 00202], lr: 0.000152, loss: 3.9290 +2023-11-24 23:55:43 - train: epoch 0060, iter [00190, 00202], lr: 0.000152, loss: 3.1757 +2023-11-24 23:57:36 - train: epoch 0060, iter [00200, 00202], lr: 0.000152, loss: 4.0829 +2023-11-24 23:58:06 - train: epoch 060, train_loss: 3.6966 +2023-11-25 00:31:36 - eval: epoch: 060, acc1: 88.216%, acc5: 98.705%, test_loss: 0.4980, per_image_load_time: 0.223ms, per_image_inference_time: 3.838ms +2023-11-25 00:31:40 - until epoch: 060, best_acc1: 88.216% +2023-11-25 00:31:40 - epoch 061 lr: 0.000152 +2023-11-25 00:33:34 - train: epoch 0061, iter [00010, 00202], lr: 0.000151, loss: 3.5454 +2023-11-25 00:35:25 - train: epoch 0061, iter [00020, 00202], lr: 0.000151, loss: 4.0765 +2023-11-25 00:37:16 - train: epoch 0061, iter [00030, 00202], lr: 0.000151, loss: 2.9718 +2023-11-25 00:39:06 - train: epoch 0061, iter [00040, 00202], lr: 0.000150, loss: 4.1279 +2023-11-25 00:41:01 - train: epoch 0061, iter [00050, 00202], lr: 0.000150, loss: 3.2664 +2023-11-25 00:43:01 - train: epoch 0061, iter [00060, 00202], lr: 0.000150, loss: 3.2711 +2023-11-25 00:44:56 - train: epoch 0061, iter [00070, 00202], lr: 0.000149, loss: 3.4326 +2023-11-25 00:46:53 - train: epoch 0061, iter [00080, 00202], lr: 0.000149, loss: 4.1433 +2023-11-25 00:48:55 - train: epoch 0061, iter [00090, 00202], lr: 0.000149, loss: 4.0204 +2023-11-25 00:50:49 - train: epoch 0061, iter [00100, 00202], lr: 0.000148, loss: 3.4169 +2023-11-25 00:52:42 - train: epoch 0061, iter [00110, 00202], lr: 0.000148, loss: 3.4305 +2023-11-25 00:54:35 - train: epoch 0061, iter [00120, 00202], lr: 0.000148, loss: 3.1796 +2023-11-25 00:56:28 - train: epoch 0061, iter [00130, 00202], lr: 0.000147, loss: 3.1996 +2023-11-25 00:58:21 - train: epoch 0061, iter [00140, 00202], lr: 0.000147, loss: 3.6021 +2023-11-25 01:00:14 - train: epoch 0061, iter [00150, 00202], lr: 0.000147, loss: 3.3001 +2023-11-25 01:02:06 - train: epoch 0061, iter [00160, 00202], lr: 0.000146, loss: 4.3075 +2023-11-25 01:03:58 - train: epoch 0061, iter [00170, 00202], lr: 0.000146, loss: 3.5707 +2023-11-25 01:05:50 - train: epoch 0061, iter [00180, 00202], lr: 0.000146, loss: 4.3244 +2023-11-25 01:07:42 - train: epoch 0061, iter [00190, 00202], lr: 0.000146, loss: 3.4324 +2023-11-25 01:09:34 - train: epoch 0061, iter [00200, 00202], lr: 0.000145, loss: 4.5151 +2023-11-25 01:10:04 - train: epoch 061, train_loss: 3.6599 +2023-11-25 01:43:20 - eval: epoch: 061, acc1: 88.339%, acc5: 98.772%, test_loss: 0.4991, per_image_load_time: 0.227ms, per_image_inference_time: 3.837ms +2023-11-25 01:43:24 - until epoch: 061, best_acc1: 88.339% +2023-11-25 01:43:24 - epoch 062 lr: 0.000145 +2023-11-25 01:45:20 - train: epoch 0062, iter [00010, 00202], lr: 0.000145, loss: 3.3642 +2023-11-25 01:47:11 - train: epoch 0062, iter [00020, 00202], lr: 0.000145, loss: 3.5145 +2023-11-25 01:49:04 - train: epoch 0062, iter [00030, 00202], lr: 0.000144, loss: 4.0954 +2023-11-25 01:50:55 - train: epoch 0062, iter [00040, 00202], lr: 0.000144, loss: 2.9726 +2023-11-25 01:52:47 - train: epoch 0062, iter [00050, 00202], lr: 0.000144, loss: 3.1809 +2023-11-25 01:54:40 - train: epoch 0062, iter [00060, 00202], lr: 0.000143, loss: 3.9711 +2023-11-25 01:56:32 - train: epoch 0062, iter [00070, 00202], lr: 0.000143, loss: 4.0437 +2023-11-25 01:58:25 - train: epoch 0062, iter [00080, 00202], lr: 0.000143, loss: 3.0062 +2023-11-25 02:00:19 - train: epoch 0062, iter [00090, 00202], lr: 0.000142, loss: 2.8396 +2023-11-25 02:02:13 - train: epoch 0062, iter [00100, 00202], lr: 0.000142, loss: 3.9351 +2023-11-25 02:04:05 - train: epoch 0062, iter [00110, 00202], lr: 0.000142, loss: 3.3768 +2023-11-25 02:05:58 - train: epoch 0062, iter [00120, 00202], lr: 0.000141, loss: 3.9907 +2023-11-25 02:07:51 - train: epoch 0062, iter [00130, 00202], lr: 0.000141, loss: 3.2569 +2023-11-25 02:09:44 - train: epoch 0062, iter [00140, 00202], lr: 0.000141, loss: 3.7022 +2023-11-25 02:11:37 - train: epoch 0062, iter [00150, 00202], lr: 0.000140, loss: 3.7940 +2023-11-25 02:13:30 - train: epoch 0062, iter [00160, 00202], lr: 0.000140, loss: 4.3132 +2023-11-25 02:15:23 - train: epoch 0062, iter [00170, 00202], lr: 0.000140, loss: 4.8811 +2023-11-25 02:17:14 - train: epoch 0062, iter [00180, 00202], lr: 0.000140, loss: 4.0024 +2023-11-25 02:19:06 - train: epoch 0062, iter [00190, 00202], lr: 0.000139, loss: 3.8181 +2023-11-25 02:20:57 - train: epoch 0062, iter [00200, 00202], lr: 0.000139, loss: 4.2132 +2023-11-25 02:21:27 - train: epoch 062, train_loss: 3.7410 +2023-11-25 02:55:05 - eval: epoch: 062, acc1: 88.534%, acc5: 98.822%, test_loss: 0.5029, per_image_load_time: 0.217ms, per_image_inference_time: 3.838ms +2023-11-25 02:55:08 - until epoch: 062, best_acc1: 88.534% +2023-11-25 02:55:08 - epoch 063 lr: 0.000139 +2023-11-25 02:57:07 - train: epoch 0063, iter [00010, 00202], lr: 0.000139, loss: 3.6550 +2023-11-25 02:59:00 - train: epoch 0063, iter [00020, 00202], lr: 0.000138, loss: 4.0377 +2023-11-25 03:00:52 - train: epoch 0063, iter [00030, 00202], lr: 0.000138, loss: 3.1171 +2023-11-25 03:02:45 - train: epoch 0063, iter [00040, 00202], lr: 0.000138, loss: 3.9303 +2023-11-25 03:04:38 - train: epoch 0063, iter [00050, 00202], lr: 0.000137, loss: 3.4307 +2023-11-25 03:06:30 - train: epoch 0063, iter [00060, 00202], lr: 0.000137, loss: 3.6101 +2023-11-25 03:08:22 - train: epoch 0063, iter [00070, 00202], lr: 0.000137, loss: 3.1167 +2023-11-25 03:10:14 - train: epoch 0063, iter [00080, 00202], lr: 0.000136, loss: 4.1517 +2023-11-25 03:12:06 - train: epoch 0063, iter [00090, 00202], lr: 0.000136, loss: 2.8460 +2023-11-25 03:13:57 - train: epoch 0063, iter [00100, 00202], lr: 0.000136, loss: 4.0802 +2023-11-25 03:15:49 - train: epoch 0063, iter [00110, 00202], lr: 0.000135, loss: 3.5780 +2023-11-25 03:17:40 - train: epoch 0063, iter [00120, 00202], lr: 0.000135, loss: 4.0553 +2023-11-25 03:19:31 - train: epoch 0063, iter [00130, 00202], lr: 0.000135, loss: 3.4846 +2023-11-25 03:21:23 - train: epoch 0063, iter [00140, 00202], lr: 0.000135, loss: 3.4432 +2023-11-25 03:23:15 - train: epoch 0063, iter [00150, 00202], lr: 0.000134, loss: 3.5104 +2023-11-25 03:25:06 - train: epoch 0063, iter [00160, 00202], lr: 0.000134, loss: 2.6144 +2023-11-25 03:26:56 - train: epoch 0063, iter [00170, 00202], lr: 0.000134, loss: 4.2189 +2023-11-25 03:28:47 - train: epoch 0063, iter [00180, 00202], lr: 0.000133, loss: 3.9811 +2023-11-25 03:30:39 - train: epoch 0063, iter [00190, 00202], lr: 0.000133, loss: 3.5504 +2023-11-25 03:32:30 - train: epoch 0063, iter [00200, 00202], lr: 0.000133, loss: 3.7319 +2023-11-25 03:33:00 - train: epoch 063, train_loss: 3.7181 +2023-11-25 04:06:47 - eval: epoch: 063, acc1: 88.660%, acc5: 98.865%, test_loss: 0.4877, per_image_load_time: 0.211ms, per_image_inference_time: 3.836ms +2023-11-25 04:06:50 - until epoch: 063, best_acc1: 88.660% +2023-11-25 04:06:50 - epoch 064 lr: 0.000133 +2023-11-25 04:08:47 - train: epoch 0064, iter [00010, 00202], lr: 0.000132, loss: 2.6077 +2023-11-25 04:10:40 - train: epoch 0064, iter [00020, 00202], lr: 0.000132, loss: 3.0094 +2023-11-25 04:12:32 - train: epoch 0064, iter [00030, 00202], lr: 0.000132, loss: 4.1239 +2023-11-25 04:14:24 - train: epoch 0064, iter [00040, 00202], lr: 0.000131, loss: 4.1634 +2023-11-25 04:16:17 - train: epoch 0064, iter [00050, 00202], lr: 0.000131, loss: 4.2419 +2023-11-25 04:18:09 - train: epoch 0064, iter [00060, 00202], lr: 0.000131, loss: 3.6855 +2023-11-25 04:20:01 - train: epoch 0064, iter [00070, 00202], lr: 0.000130, loss: 4.0697 +2023-11-25 04:21:53 - train: epoch 0064, iter [00080, 00202], lr: 0.000130, loss: 4.8205 +2023-11-25 04:23:44 - train: epoch 0064, iter [00090, 00202], lr: 0.000130, loss: 3.1155 +2023-11-25 04:25:36 - train: epoch 0064, iter [00100, 00202], lr: 0.000130, loss: 4.0504 +2023-11-25 04:27:27 - train: epoch 0064, iter [00110, 00202], lr: 0.000129, loss: 3.2499 +2023-11-25 04:29:19 - train: epoch 0064, iter [00120, 00202], lr: 0.000129, loss: 3.6868 +2023-11-25 04:31:10 - train: epoch 0064, iter [00130, 00202], lr: 0.000129, loss: 3.7284 +2023-11-25 04:33:02 - train: epoch 0064, iter [00140, 00202], lr: 0.000128, loss: 3.9633 +2023-11-25 04:34:52 - train: epoch 0064, iter [00150, 00202], lr: 0.000128, loss: 3.3221 +2023-11-25 04:36:43 - train: epoch 0064, iter [00160, 00202], lr: 0.000128, loss: 3.2191 +2023-11-25 04:38:34 - train: epoch 0064, iter [00170, 00202], lr: 0.000127, loss: 3.2937 +2023-11-25 04:40:27 - train: epoch 0064, iter [00180, 00202], lr: 0.000127, loss: 3.5766 +2023-11-25 04:42:18 - train: epoch 0064, iter [00190, 00202], lr: 0.000127, loss: 3.4330 +2023-11-25 04:44:10 - train: epoch 0064, iter [00200, 00202], lr: 0.000127, loss: 3.5585 +2023-11-25 04:44:40 - train: epoch 064, train_loss: 3.6700 +2023-11-25 05:18:22 - eval: epoch: 064, acc1: 88.761%, acc5: 98.905%, test_loss: 0.4742, per_image_load_time: 0.220ms, per_image_inference_time: 3.836ms +2023-11-25 05:18:25 - until epoch: 064, best_acc1: 88.761% +2023-11-25 05:18:25 - epoch 065 lr: 0.000126 +2023-11-25 05:20:21 - train: epoch 0065, iter [00010, 00202], lr: 0.000126, loss: 4.3760 +2023-11-25 05:22:13 - train: epoch 0065, iter [00020, 00202], lr: 0.000126, loss: 3.6944 +2023-11-25 05:24:05 - train: epoch 0065, iter [00030, 00202], lr: 0.000126, loss: 3.3543 +2023-11-25 05:25:56 - train: epoch 0065, iter [00040, 00202], lr: 0.000125, loss: 3.9281 +2023-11-25 05:27:48 - train: epoch 0065, iter [00050, 00202], lr: 0.000125, loss: 3.4561 +2023-11-25 05:29:40 - train: epoch 0065, iter [00060, 00202], lr: 0.000125, loss: 4.6145 +2023-11-25 05:31:33 - train: epoch 0065, iter [00070, 00202], lr: 0.000124, loss: 4.1890 +2023-11-25 05:33:24 - train: epoch 0065, iter [00080, 00202], lr: 0.000124, loss: 4.2221 +2023-11-25 05:35:15 - train: epoch 0065, iter [00090, 00202], lr: 0.000124, loss: 3.7218 +2023-11-25 05:37:07 - train: epoch 0065, iter [00100, 00202], lr: 0.000123, loss: 2.9183 +2023-11-25 05:38:57 - train: epoch 0065, iter [00110, 00202], lr: 0.000123, loss: 4.2237 +2023-11-25 05:40:48 - train: epoch 0065, iter [00120, 00202], lr: 0.000123, loss: 3.4009 +2023-11-25 05:42:39 - train: epoch 0065, iter [00130, 00202], lr: 0.000123, loss: 3.9714 +2023-11-25 05:44:30 - train: epoch 0065, iter [00140, 00202], lr: 0.000122, loss: 3.9317 +2023-11-25 05:46:22 - train: epoch 0065, iter [00150, 00202], lr: 0.000122, loss: 3.6276 +2023-11-25 05:48:13 - train: epoch 0065, iter [00160, 00202], lr: 0.000122, loss: 3.8795 +2023-11-25 05:50:05 - train: epoch 0065, iter [00170, 00202], lr: 0.000121, loss: 4.0756 +2023-11-25 05:51:57 - train: epoch 0065, iter [00180, 00202], lr: 0.000121, loss: 3.6519 +2023-11-25 05:53:48 - train: epoch 0065, iter [00190, 00202], lr: 0.000121, loss: 3.9215 +2023-11-25 05:55:39 - train: epoch 0065, iter [00200, 00202], lr: 0.000120, loss: 3.9127 +2023-11-25 05:56:09 - train: epoch 065, train_loss: 3.6849 +2023-11-25 06:30:06 - eval: epoch: 065, acc1: 88.949%, acc5: 98.948%, test_loss: 0.4704, per_image_load_time: 0.215ms, per_image_inference_time: 3.836ms +2023-11-25 06:30:10 - until epoch: 065, best_acc1: 88.949% +2023-11-25 06:30:10 - epoch 066 lr: 0.000120 +2023-11-25 06:32:04 - train: epoch 0066, iter [00010, 00202], lr: 0.000120, loss: 4.4027 +2023-11-25 06:33:56 - train: epoch 0066, iter [00020, 00202], lr: 0.000120, loss: 3.4557 +2023-11-25 06:35:48 - train: epoch 0066, iter [00030, 00202], lr: 0.000119, loss: 3.6521 +2023-11-25 06:37:39 - train: epoch 0066, iter [00040, 00202], lr: 0.000119, loss: 3.5027 +2023-11-25 06:39:30 - train: epoch 0066, iter [00050, 00202], lr: 0.000119, loss: 3.9231 +2023-11-25 06:41:21 - train: epoch 0066, iter [00060, 00202], lr: 0.000119, loss: 4.0078 +2023-11-25 06:43:13 - train: epoch 0066, iter [00070, 00202], lr: 0.000118, loss: 4.2015 +2023-11-25 06:45:05 - train: epoch 0066, iter [00080, 00202], lr: 0.000118, loss: 3.2403 +2023-11-25 06:46:58 - train: epoch 0066, iter [00090, 00202], lr: 0.000118, loss: 3.3012 +2023-11-25 06:48:51 - train: epoch 0066, iter [00100, 00202], lr: 0.000117, loss: 4.1124 +2023-11-25 06:50:44 - train: epoch 0066, iter [00110, 00202], lr: 0.000117, loss: 3.8628 +2023-11-25 06:52:37 - train: epoch 0066, iter [00120, 00202], lr: 0.000117, loss: 2.4455 +2023-11-25 06:54:29 - train: epoch 0066, iter [00130, 00202], lr: 0.000117, loss: 3.6929 +2023-11-25 06:56:22 - train: epoch 0066, iter [00140, 00202], lr: 0.000116, loss: 3.5904 +2023-11-25 06:58:14 - train: epoch 0066, iter [00150, 00202], lr: 0.000116, loss: 3.7265 +2023-11-25 07:00:07 - train: epoch 0066, iter [00160, 00202], lr: 0.000116, loss: 3.8523 +2023-11-25 07:02:00 - train: epoch 0066, iter [00170, 00202], lr: 0.000115, loss: 2.5529 +2023-11-25 07:03:53 - train: epoch 0066, iter [00180, 00202], lr: 0.000115, loss: 3.1611 +2023-11-25 07:05:45 - train: epoch 0066, iter [00190, 00202], lr: 0.000115, loss: 4.0520 +2023-11-25 07:07:37 - train: epoch 0066, iter [00200, 00202], lr: 0.000114, loss: 3.0159 +2023-11-25 07:08:07 - train: epoch 066, train_loss: 3.6616 +2023-11-25 07:41:38 - eval: epoch: 066, acc1: 89.057%, acc5: 98.977%, test_loss: 0.4636, per_image_load_time: 0.225ms, per_image_inference_time: 3.837ms +2023-11-25 07:41:42 - until epoch: 066, best_acc1: 89.057% +2023-11-25 07:41:42 - epoch 067 lr: 0.000114 +2023-11-25 07:43:36 - train: epoch 0067, iter [00010, 00202], lr: 0.000114, loss: 3.4362 +2023-11-25 07:45:27 - train: epoch 0067, iter [00020, 00202], lr: 0.000114, loss: 3.1164 +2023-11-25 07:47:18 - train: epoch 0067, iter [00030, 00202], lr: 0.000113, loss: 3.9195 +2023-11-25 07:49:10 - train: epoch 0067, iter [00040, 00202], lr: 0.000113, loss: 4.0919 +2023-11-25 07:51:01 - train: epoch 0067, iter [00050, 00202], lr: 0.000113, loss: 3.1124 +2023-11-25 07:52:53 - train: epoch 0067, iter [00060, 00202], lr: 0.000113, loss: 3.8670 +2023-11-25 07:54:44 - train: epoch 0067, iter [00070, 00202], lr: 0.000112, loss: 3.2091 +2023-11-25 07:56:35 - train: epoch 0067, iter [00080, 00202], lr: 0.000112, loss: 3.2224 +2023-11-25 07:58:27 - train: epoch 0067, iter [00090, 00202], lr: 0.000112, loss: 2.8355 +2023-11-25 08:00:18 - train: epoch 0067, iter [00100, 00202], lr: 0.000111, loss: 3.3393 +2023-11-25 08:02:09 - train: epoch 0067, iter [00110, 00202], lr: 0.000111, loss: 3.4553 +2023-11-25 08:04:01 - train: epoch 0067, iter [00120, 00202], lr: 0.000111, loss: 2.3136 +2023-11-25 08:05:52 - train: epoch 0067, iter [00130, 00202], lr: 0.000111, loss: 3.4045 +2023-11-25 08:07:44 - train: epoch 0067, iter [00140, 00202], lr: 0.000110, loss: 3.8037 +2023-11-25 08:09:35 - train: epoch 0067, iter [00150, 00202], lr: 0.000110, loss: 4.2103 +2023-11-25 08:11:27 - train: epoch 0067, iter [00160, 00202], lr: 0.000110, loss: 3.9960 +2023-11-25 08:13:19 - train: epoch 0067, iter [00170, 00202], lr: 0.000109, loss: 3.0649 +2023-11-25 08:15:10 - train: epoch 0067, iter [00180, 00202], lr: 0.000109, loss: 4.4007 +2023-11-25 08:17:02 - train: epoch 0067, iter [00190, 00202], lr: 0.000109, loss: 3.8200 +2023-11-25 08:18:52 - train: epoch 0067, iter [00200, 00202], lr: 0.000109, loss: 3.0504 +2023-11-25 08:19:22 - train: epoch 067, train_loss: 3.6715 +2023-11-25 08:52:31 - eval: epoch: 067, acc1: 89.193%, acc5: 99.017%, test_loss: 0.4624, per_image_load_time: 0.241ms, per_image_inference_time: 3.839ms +2023-11-25 08:52:34 - until epoch: 067, best_acc1: 89.193% +2023-11-25 08:52:34 - epoch 068 lr: 0.000108 +2023-11-25 08:54:29 - train: epoch 0068, iter [00010, 00202], lr: 0.000108, loss: 3.4104 +2023-11-25 08:56:20 - train: epoch 0068, iter [00020, 00202], lr: 0.000108, loss: 3.2353 +2023-11-25 08:58:12 - train: epoch 0068, iter [00030, 00202], lr: 0.000108, loss: 3.0318 +2023-11-25 09:00:04 - train: epoch 0068, iter [00040, 00202], lr: 0.000107, loss: 3.2169 +2023-11-25 09:01:55 - train: epoch 0068, iter [00050, 00202], lr: 0.000107, loss: 3.2650 +2023-11-25 09:03:47 - train: epoch 0068, iter [00060, 00202], lr: 0.000107, loss: 4.5296 +2023-11-25 09:05:39 - train: epoch 0068, iter [00070, 00202], lr: 0.000106, loss: 3.8940 +2023-11-25 09:07:30 - train: epoch 0068, iter [00080, 00202], lr: 0.000106, loss: 3.4264 +2023-11-25 09:09:21 - train: epoch 0068, iter [00090, 00202], lr: 0.000106, loss: 3.1611 +2023-11-25 09:11:13 - train: epoch 0068, iter [00100, 00202], lr: 0.000106, loss: 4.4085 +2023-11-25 09:13:04 - train: epoch 0068, iter [00110, 00202], lr: 0.000105, loss: 4.3756 +2023-11-25 09:14:57 - train: epoch 0068, iter [00120, 00202], lr: 0.000105, loss: 4.2117 +2023-11-25 09:16:49 - train: epoch 0068, iter [00130, 00202], lr: 0.000105, loss: 4.1524 +2023-11-25 09:18:41 - train: epoch 0068, iter [00140, 00202], lr: 0.000104, loss: 3.3709 +2023-11-25 09:20:32 - train: epoch 0068, iter [00150, 00202], lr: 0.000104, loss: 4.1799 +2023-11-25 09:22:24 - train: epoch 0068, iter [00160, 00202], lr: 0.000104, loss: 4.0468 +2023-11-25 09:24:15 - train: epoch 0068, iter [00170, 00202], lr: 0.000104, loss: 3.8568 +2023-11-25 09:26:06 - train: epoch 0068, iter [00180, 00202], lr: 0.000103, loss: 3.6075 +2023-11-25 09:27:57 - train: epoch 0068, iter [00190, 00202], lr: 0.000103, loss: 4.0042 +2023-11-25 09:29:47 - train: epoch 0068, iter [00200, 00202], lr: 0.000103, loss: 3.1062 +2023-11-25 09:30:17 - train: epoch 068, train_loss: 3.6354 +2023-11-25 10:03:39 - eval: epoch: 068, acc1: 89.311%, acc5: 99.046%, test_loss: 0.4513, per_image_load_time: 0.231ms, per_image_inference_time: 3.839ms +2023-11-25 10:03:43 - until epoch: 068, best_acc1: 89.311% +2023-11-25 10:03:43 - epoch 069 lr: 0.000103 +2023-11-25 10:05:40 - train: epoch 0069, iter [00010, 00202], lr: 0.000102, loss: 3.8170 +2023-11-25 10:07:34 - train: epoch 0069, iter [00020, 00202], lr: 0.000102, loss: 3.6336 +2023-11-25 10:09:27 - train: epoch 0069, iter [00030, 00202], lr: 0.000102, loss: 3.6452 +2023-11-25 10:11:19 - train: epoch 0069, iter [00040, 00202], lr: 0.000102, loss: 3.7977 +2023-11-25 10:13:11 - train: epoch 0069, iter [00050, 00202], lr: 0.000101, loss: 3.8764 +2023-11-25 10:15:04 - train: epoch 0069, iter [00060, 00202], lr: 0.000101, loss: 3.4704 +2023-11-25 10:16:55 - train: epoch 0069, iter [00070, 00202], lr: 0.000101, loss: 3.7496 +2023-11-25 10:18:47 - train: epoch 0069, iter [00080, 00202], lr: 0.000100, loss: 3.9019 +2023-11-25 10:20:40 - train: epoch 0069, iter [00090, 00202], lr: 0.000100, loss: 3.5728 +2023-11-25 10:22:32 - train: epoch 0069, iter [00100, 00202], lr: 0.000100, loss: 3.7738 +2023-11-25 10:24:24 - train: epoch 0069, iter [00110, 00202], lr: 0.000100, loss: 4.0046 +2023-11-25 10:26:16 - train: epoch 0069, iter [00120, 00202], lr: 0.000099, loss: 4.1717 +2023-11-25 10:28:08 - train: epoch 0069, iter [00130, 00202], lr: 0.000099, loss: 4.0943 +2023-11-25 10:29:59 - train: epoch 0069, iter [00140, 00202], lr: 0.000099, loss: 3.3250 +2023-11-25 10:31:50 - train: epoch 0069, iter [00150, 00202], lr: 0.000098, loss: 3.3426 +2023-11-25 10:33:41 - train: epoch 0069, iter [00160, 00202], lr: 0.000098, loss: 4.1262 +2023-11-25 10:35:33 - train: epoch 0069, iter [00170, 00202], lr: 0.000098, loss: 4.1268 +2023-11-25 10:37:25 - train: epoch 0069, iter [00180, 00202], lr: 0.000098, loss: 2.5959 +2023-11-25 10:39:17 - train: epoch 0069, iter [00190, 00202], lr: 0.000097, loss: 4.0185 +2023-11-25 10:41:09 - train: epoch 0069, iter [00200, 00202], lr: 0.000097, loss: 4.0839 +2023-11-25 10:41:38 - train: epoch 069, train_loss: 3.6376 +2023-11-25 11:15:25 - eval: epoch: 069, acc1: 89.411%, acc5: 99.080%, test_loss: 0.4479, per_image_load_time: 0.218ms, per_image_inference_time: 3.839ms +2023-11-25 11:15:29 - until epoch: 069, best_acc1: 89.411% +2023-11-25 11:15:29 - epoch 070 lr: 0.000097 +2023-11-25 11:17:24 - train: epoch 0070, iter [00010, 00202], lr: 0.000097, loss: 3.9648 +2023-11-25 11:19:14 - train: epoch 0070, iter [00020, 00202], lr: 0.000096, loss: 3.1928 +2023-11-25 11:21:05 - train: epoch 0070, iter [00030, 00202], lr: 0.000096, loss: 3.4843 +2023-11-25 11:22:57 - train: epoch 0070, iter [00040, 00202], lr: 0.000096, loss: 3.6416 +2023-11-25 11:24:48 - train: epoch 0070, iter [00050, 00202], lr: 0.000096, loss: 3.2666 +2023-11-25 11:26:39 - train: epoch 0070, iter [00060, 00202], lr: 0.000095, loss: 3.3903 +2023-11-25 11:28:31 - train: epoch 0070, iter [00070, 00202], lr: 0.000095, loss: 3.9352 +2023-11-25 11:30:22 - train: epoch 0070, iter [00080, 00202], lr: 0.000095, loss: 3.6505 +2023-11-25 11:32:13 - train: epoch 0070, iter [00090, 00202], lr: 0.000094, loss: 3.9568 +2023-11-25 11:34:05 - train: epoch 0070, iter [00100, 00202], lr: 0.000094, loss: 3.7687 +2023-11-25 11:35:57 - train: epoch 0070, iter [00110, 00202], lr: 0.000094, loss: 3.4820 +2023-11-25 11:37:50 - train: epoch 0070, iter [00120, 00202], lr: 0.000094, loss: 3.5467 +2023-11-25 11:39:43 - train: epoch 0070, iter [00130, 00202], lr: 0.000093, loss: 3.1916 +2023-11-25 11:41:35 - train: epoch 0070, iter [00140, 00202], lr: 0.000093, loss: 4.1296 +2023-11-25 11:43:28 - train: epoch 0070, iter [00150, 00202], lr: 0.000093, loss: 3.7642 +2023-11-25 11:45:20 - train: epoch 0070, iter [00160, 00202], lr: 0.000093, loss: 3.3241 +2023-11-25 11:47:13 - train: epoch 0070, iter [00170, 00202], lr: 0.000092, loss: 4.2725 +2023-11-25 11:49:06 - train: epoch 0070, iter [00180, 00202], lr: 0.000092, loss: 2.8158 +2023-11-25 11:50:59 - train: epoch 0070, iter [00190, 00202], lr: 0.000092, loss: 3.4571 +2023-11-25 11:52:52 - train: epoch 0070, iter [00200, 00202], lr: 0.000091, loss: 3.6667 +2023-11-25 11:53:21 - train: epoch 070, train_loss: 3.6632 +2023-11-25 12:26:49 - eval: epoch: 070, acc1: 89.508%, acc5: 99.097%, test_loss: 0.4421, per_image_load_time: 0.225ms, per_image_inference_time: 3.838ms +2023-11-25 12:26:53 - until epoch: 070, best_acc1: 89.508% +2023-11-25 12:26:53 - epoch 071 lr: 0.000091 +2023-11-25 12:28:50 - train: epoch 0071, iter [00010, 00202], lr: 0.000091, loss: 3.2343 +2023-11-25 12:30:43 - train: epoch 0071, iter [00020, 00202], lr: 0.000091, loss: 3.8302 +2023-11-25 12:32:35 - train: epoch 0071, iter [00030, 00202], lr: 0.000091, loss: 3.9258 +2023-11-25 12:34:28 - train: epoch 0071, iter [00040, 00202], lr: 0.000090, loss: 3.2161 +2023-11-25 12:36:21 - train: epoch 0071, iter [00050, 00202], lr: 0.000090, loss: 2.9821 +2023-11-25 12:38:16 - train: epoch 0071, iter [00060, 00202], lr: 0.000090, loss: 3.7364 +2023-11-25 12:40:09 - train: epoch 0071, iter [00070, 00202], lr: 0.000089, loss: 3.0883 +2023-11-25 12:42:02 - train: epoch 0071, iter [00080, 00202], lr: 0.000089, loss: 4.4037 +2023-11-25 12:43:54 - train: epoch 0071, iter [00090, 00202], lr: 0.000089, loss: 4.0044 +2023-11-25 12:45:45 - train: epoch 0071, iter [00100, 00202], lr: 0.000089, loss: 3.8050 +2023-11-25 12:47:36 - train: epoch 0071, iter [00110, 00202], lr: 0.000088, loss: 4.0430 +2023-11-25 12:49:27 - train: epoch 0071, iter [00120, 00202], lr: 0.000088, loss: 3.5630 +2023-11-25 12:51:18 - train: epoch 0071, iter [00130, 00202], lr: 0.000088, loss: 3.8279 +2023-11-25 12:53:09 - train: epoch 0071, iter [00140, 00202], lr: 0.000088, loss: 3.7599 +2023-11-25 12:55:00 - train: epoch 0071, iter [00150, 00202], lr: 0.000087, loss: 3.7680 +2023-11-25 12:56:51 - train: epoch 0071, iter [00160, 00202], lr: 0.000087, loss: 4.3104 +2023-11-25 12:58:41 - train: epoch 0071, iter [00170, 00202], lr: 0.000087, loss: 4.5276 +2023-11-25 13:00:32 - train: epoch 0071, iter [00180, 00202], lr: 0.000087, loss: 3.9353 +2023-11-25 13:02:22 - train: epoch 0071, iter [00190, 00202], lr: 0.000086, loss: 3.8027 +2023-11-25 13:04:14 - train: epoch 0071, iter [00200, 00202], lr: 0.000086, loss: 4.4019 +2023-11-25 13:04:43 - train: epoch 071, train_loss: 3.6403 +2023-11-25 13:38:36 - eval: epoch: 071, acc1: 89.595%, acc5: 99.127%, test_loss: 0.4355, per_image_load_time: 0.225ms, per_image_inference_time: 3.838ms +2023-11-25 13:38:40 - until epoch: 071, best_acc1: 89.595% +2023-11-25 13:38:40 - epoch 072 lr: 0.000086 +2023-11-25 13:40:34 - train: epoch 0072, iter [00010, 00202], lr: 0.000086, loss: 3.1515 +2023-11-25 13:42:25 - train: epoch 0072, iter [00020, 00202], lr: 0.000085, loss: 3.3179 +2023-11-25 13:44:14 - train: epoch 0072, iter [00030, 00202], lr: 0.000085, loss: 3.8531 +2023-11-25 13:46:04 - train: epoch 0072, iter [00040, 00202], lr: 0.000085, loss: 3.8602 +2023-11-25 13:47:53 - train: epoch 0072, iter [00050, 00202], lr: 0.000085, loss: 4.0278 +2023-11-25 13:49:43 - train: epoch 0072, iter [00060, 00202], lr: 0.000084, loss: 4.3322 +2023-11-25 13:51:34 - train: epoch 0072, iter [00070, 00202], lr: 0.000084, loss: 3.5191 +2023-11-25 13:53:26 - train: epoch 0072, iter [00080, 00202], lr: 0.000084, loss: 3.1055 +2023-11-25 13:55:19 - train: epoch 0072, iter [00090, 00202], lr: 0.000084, loss: 3.1607 +2023-11-25 13:57:11 - train: epoch 0072, iter [00100, 00202], lr: 0.000083, loss: 4.2819 +2023-11-25 13:59:03 - train: epoch 0072, iter [00110, 00202], lr: 0.000083, loss: 3.7303 +2023-11-25 14:00:56 - train: epoch 0072, iter [00120, 00202], lr: 0.000083, loss: 3.7662 +2023-11-25 14:02:48 - train: epoch 0072, iter [00130, 00202], lr: 0.000082, loss: 3.3909 +2023-11-25 14:04:41 - train: epoch 0072, iter [00140, 00202], lr: 0.000082, loss: 3.8652 +2023-11-25 14:06:33 - train: epoch 0072, iter [00150, 00202], lr: 0.000082, loss: 3.1346 +2023-11-25 14:08:27 - train: epoch 0072, iter [00160, 00202], lr: 0.000082, loss: 3.6638 +2023-11-25 14:10:21 - train: epoch 0072, iter [00170, 00202], lr: 0.000081, loss: 3.8952 +2023-11-25 14:12:15 - train: epoch 0072, iter [00180, 00202], lr: 0.000081, loss: 3.4143 +2023-11-25 14:14:08 - train: epoch 0072, iter [00190, 00202], lr: 0.000081, loss: 3.8895 +2023-11-25 14:16:01 - train: epoch 0072, iter [00200, 00202], lr: 0.000081, loss: 3.7209 +2023-11-25 14:16:31 - train: epoch 072, train_loss: 3.5924 +2023-11-25 14:49:49 - eval: epoch: 072, acc1: 89.691%, acc5: 99.146%, test_loss: 0.4363, per_image_load_time: 0.229ms, per_image_inference_time: 3.838ms +2023-11-25 14:49:53 - until epoch: 072, best_acc1: 89.691% +2023-11-25 14:49:53 - epoch 073 lr: 0.000081 +2023-11-25 14:51:52 - train: epoch 0073, iter [00010, 00202], lr: 0.000080, loss: 2.9843 +2023-11-25 14:53:46 - train: epoch 0073, iter [00020, 00202], lr: 0.000080, loss: 3.7419 +2023-11-25 14:55:39 - train: epoch 0073, iter [00030, 00202], lr: 0.000080, loss: 4.0696 +2023-11-25 14:57:33 - train: epoch 0073, iter [00040, 00202], lr: 0.000080, loss: 3.5329 +2023-11-25 14:59:25 - train: epoch 0073, iter [00050, 00202], lr: 0.000079, loss: 3.0857 +2023-11-25 15:01:17 - train: epoch 0073, iter [00060, 00202], lr: 0.000079, loss: 3.0934 +2023-11-25 15:03:09 - train: epoch 0073, iter [00070, 00202], lr: 0.000079, loss: 2.9028 +2023-11-25 15:05:01 - train: epoch 0073, iter [00080, 00202], lr: 0.000079, loss: 3.4424 +2023-11-25 15:06:51 - train: epoch 0073, iter [00090, 00202], lr: 0.000078, loss: 3.0837 +2023-11-25 15:08:41 - train: epoch 0073, iter [00100, 00202], lr: 0.000078, loss: 3.8377 +2023-11-25 15:10:32 - train: epoch 0073, iter [00110, 00202], lr: 0.000078, loss: 4.2355 +2023-11-25 15:12:23 - train: epoch 0073, iter [00120, 00202], lr: 0.000077, loss: 3.7345 +2023-11-25 15:14:14 - train: epoch 0073, iter [00130, 00202], lr: 0.000077, loss: 4.2387 +2023-11-25 15:16:05 - train: epoch 0073, iter [00140, 00202], lr: 0.000077, loss: 2.9729 +2023-11-25 15:17:56 - train: epoch 0073, iter [00150, 00202], lr: 0.000077, loss: 4.0556 +2023-11-25 15:19:49 - train: epoch 0073, iter [00160, 00202], lr: 0.000076, loss: 3.4037 +2023-11-25 15:21:41 - train: epoch 0073, iter [00170, 00202], lr: 0.000076, loss: 3.4917 +2023-11-25 15:23:34 - train: epoch 0073, iter [00180, 00202], lr: 0.000076, loss: 3.3162 +2023-11-25 15:25:29 - train: epoch 0073, iter [00190, 00202], lr: 0.000076, loss: 3.4531 +2023-11-25 15:27:23 - train: epoch 0073, iter [00200, 00202], lr: 0.000075, loss: 3.6969 +2023-11-25 15:27:53 - train: epoch 073, train_loss: 3.6031 +2023-11-25 16:02:16 - eval: epoch: 073, acc1: 89.767%, acc5: 99.165%, test_loss: 0.4285, per_image_load_time: 0.207ms, per_image_inference_time: 3.838ms +2023-11-25 16:02:20 - until epoch: 073, best_acc1: 89.767% +2023-11-25 16:02:20 - epoch 074 lr: 0.000075 +2023-11-25 16:04:17 - train: epoch 0074, iter [00010, 00202], lr: 0.000075, loss: 3.0456 +2023-11-25 16:06:09 - train: epoch 0074, iter [00020, 00202], lr: 0.000075, loss: 4.1135 +2023-11-25 16:08:01 - train: epoch 0074, iter [00030, 00202], lr: 0.000075, loss: 3.5773 +2023-11-25 16:09:53 - train: epoch 0074, iter [00040, 00202], lr: 0.000074, loss: 3.0643 +2023-11-25 16:11:44 - train: epoch 0074, iter [00050, 00202], lr: 0.000074, loss: 4.0205 +2023-11-25 16:13:36 - train: epoch 0074, iter [00060, 00202], lr: 0.000074, loss: 3.9985 +2023-11-25 16:15:27 - train: epoch 0074, iter [00070, 00202], lr: 0.000074, loss: 3.3590 +2023-11-25 16:17:18 - train: epoch 0074, iter [00080, 00202], lr: 0.000073, loss: 4.1768 +2023-11-25 16:19:09 - train: epoch 0074, iter [00090, 00202], lr: 0.000073, loss: 4.0914 +2023-11-25 16:21:00 - train: epoch 0074, iter [00100, 00202], lr: 0.000073, loss: 2.4194 +2023-11-25 16:22:51 - train: epoch 0074, iter [00110, 00202], lr: 0.000073, loss: 3.7953 +2023-11-25 16:24:43 - train: epoch 0074, iter [00120, 00202], lr: 0.000072, loss: 3.9141 +2023-11-25 16:26:35 - train: epoch 0074, iter [00130, 00202], lr: 0.000072, loss: 3.3493 +2023-11-25 16:28:27 - train: epoch 0074, iter [00140, 00202], lr: 0.000072, loss: 3.1651 +2023-11-25 16:30:19 - train: epoch 0074, iter [00150, 00202], lr: 0.000072, loss: 3.2642 +2023-11-25 16:32:10 - train: epoch 0074, iter [00160, 00202], lr: 0.000071, loss: 3.8210 +2023-11-25 16:34:02 - train: epoch 0074, iter [00170, 00202], lr: 0.000071, loss: 2.9895 +2023-11-25 16:35:53 - train: epoch 0074, iter [00180, 00202], lr: 0.000071, loss: 3.1008 +2023-11-25 16:37:46 - train: epoch 0074, iter [00190, 00202], lr: 0.000071, loss: 3.3154 +2023-11-25 16:39:38 - train: epoch 0074, iter [00200, 00202], lr: 0.000070, loss: 2.9735 +2023-11-25 16:40:07 - train: epoch 074, train_loss: 3.5719 +2023-11-25 17:13:55 - eval: epoch: 074, acc1: 89.868%, acc5: 99.202%, test_loss: 0.4228, per_image_load_time: 0.225ms, per_image_inference_time: 3.839ms +2023-11-25 17:13:59 - until epoch: 074, best_acc1: 89.868% +2023-11-25 17:13:59 - epoch 075 lr: 0.000070 +2023-11-25 17:15:54 - train: epoch 0075, iter [00010, 00202], lr: 0.000070, loss: 4.3793 +2023-11-25 17:17:47 - train: epoch 0075, iter [00020, 00202], lr: 0.000070, loss: 3.6535 +2023-11-25 17:19:40 - train: epoch 0075, iter [00030, 00202], lr: 0.000070, loss: 3.7279 +2023-11-25 17:21:32 - train: epoch 0075, iter [00040, 00202], lr: 0.000069, loss: 3.5834 +2023-11-25 17:23:24 - train: epoch 0075, iter [00050, 00202], lr: 0.000069, loss: 3.6455 +2023-11-25 17:25:16 - train: epoch 0075, iter [00060, 00202], lr: 0.000069, loss: 3.5317 +2023-11-25 17:27:09 - train: epoch 0075, iter [00070, 00202], lr: 0.000069, loss: 2.9674 +2023-11-25 17:29:01 - train: epoch 0075, iter [00080, 00202], lr: 0.000068, loss: 3.1139 +2023-11-25 17:30:53 - train: epoch 0075, iter [00090, 00202], lr: 0.000068, loss: 2.9337 +2023-11-25 17:32:45 - train: epoch 0075, iter [00100, 00202], lr: 0.000068, loss: 3.7222 +2023-11-25 17:34:36 - train: epoch 0075, iter [00110, 00202], lr: 0.000068, loss: 4.1320 +2023-11-25 17:36:29 - train: epoch 0075, iter [00120, 00202], lr: 0.000067, loss: 3.0715 +2023-11-25 17:38:21 - train: epoch 0075, iter [00130, 00202], lr: 0.000067, loss: 2.5718 +2023-11-25 17:40:13 - train: epoch 0075, iter [00140, 00202], lr: 0.000067, loss: 3.6476 +2023-11-25 17:42:05 - train: epoch 0075, iter [00150, 00202], lr: 0.000067, loss: 3.1986 +2023-11-25 17:43:57 - train: epoch 0075, iter [00160, 00202], lr: 0.000066, loss: 3.2735 +2023-11-25 17:45:48 - train: epoch 0075, iter [00170, 00202], lr: 0.000066, loss: 3.2648 +2023-11-25 17:47:40 - train: epoch 0075, iter [00180, 00202], lr: 0.000066, loss: 3.0095 +2023-11-25 17:49:32 - train: epoch 0075, iter [00190, 00202], lr: 0.000066, loss: 3.7673 +2023-11-25 17:51:24 - train: epoch 0075, iter [00200, 00202], lr: 0.000065, loss: 3.6484 +2023-11-25 17:51:53 - train: epoch 075, train_loss: 3.5935 +2023-11-25 18:29:26 - eval: epoch: 075, acc1: 89.944%, acc5: 99.225%, test_loss: 0.4183, per_image_load_time: 0.166ms, per_image_inference_time: 3.829ms +2023-11-25 18:29:30 - until epoch: 075, best_acc1: 89.944% +2023-11-25 18:29:30 - epoch 076 lr: 0.000065 +2023-11-25 18:31:37 - train: epoch 0076, iter [00010, 00202], lr: 0.000065, loss: 3.3022 +2023-11-25 18:33:37 - train: epoch 0076, iter [00020, 00202], lr: 0.000065, loss: 3.3134 +2023-11-25 18:35:37 - train: epoch 0076, iter [00030, 00202], lr: 0.000065, loss: 3.2781 +2023-11-25 18:37:37 - train: epoch 0076, iter [00040, 00202], lr: 0.000064, loss: 3.2962 +2023-11-25 18:39:36 - train: epoch 0076, iter [00050, 00202], lr: 0.000064, loss: 3.9499 +2023-11-25 18:41:35 - train: epoch 0076, iter [00060, 00202], lr: 0.000064, loss: 2.9792 +2023-11-25 18:43:34 - train: epoch 0076, iter [00070, 00202], lr: 0.000064, loss: 3.1559 +2023-11-25 18:45:33 - train: epoch 0076, iter [00080, 00202], lr: 0.000063, loss: 3.9463 +2023-11-25 18:47:31 - train: epoch 0076, iter [00090, 00202], lr: 0.000063, loss: 3.4155 +2023-11-25 18:49:31 - train: epoch 0076, iter [00100, 00202], lr: 0.000063, loss: 3.4523 +2023-11-25 18:51:31 - train: epoch 0076, iter [00110, 00202], lr: 0.000063, loss: 3.3058 +2023-11-25 18:53:33 - train: epoch 0076, iter [00120, 00202], lr: 0.000063, loss: 3.9427 +2023-11-25 18:55:35 - train: epoch 0076, iter [00130, 00202], lr: 0.000062, loss: 2.9043 +2023-11-25 18:57:37 - train: epoch 0076, iter [00140, 00202], lr: 0.000062, loss: 2.3745 +2023-11-25 18:59:37 - train: epoch 0076, iter [00150, 00202], lr: 0.000062, loss: 3.7367 +2023-11-25 19:01:32 - train: epoch 0076, iter [00160, 00202], lr: 0.000062, loss: 2.7701 +2023-11-25 19:03:25 - train: epoch 0076, iter [00170, 00202], lr: 0.000061, loss: 3.7300 +2023-11-25 19:05:18 - train: epoch 0076, iter [00180, 00202], lr: 0.000061, loss: 3.3945 +2023-11-25 19:07:09 - train: epoch 0076, iter [00190, 00202], lr: 0.000061, loss: 3.6317 +2023-11-25 19:09:01 - train: epoch 0076, iter [00200, 00202], lr: 0.000061, loss: 3.3246 +2023-11-25 19:09:31 - train: epoch 076, train_loss: 3.6017 +2023-11-25 19:44:25 - eval: epoch: 076, acc1: 90.032%, acc5: 99.232%, test_loss: 0.4162, per_image_load_time: 0.197ms, per_image_inference_time: 3.836ms +2023-11-25 19:44:29 - until epoch: 076, best_acc1: 90.032% +2023-11-25 19:44:29 - epoch 077 lr: 0.000061 +2023-11-25 19:46:36 - train: epoch 0077, iter [00010, 00202], lr: 0.000060, loss: 2.6849 +2023-11-25 19:48:37 - train: epoch 0077, iter [00020, 00202], lr: 0.000060, loss: 3.0127 +2023-11-25 19:50:37 - train: epoch 0077, iter [00030, 00202], lr: 0.000060, loss: 3.9046 +2023-11-25 19:52:38 - train: epoch 0077, iter [00040, 00202], lr: 0.000060, loss: 3.3403 +2023-11-25 19:54:40 - train: epoch 0077, iter [00050, 00202], lr: 0.000059, loss: 3.6866 +2023-11-25 19:56:44 - train: epoch 0077, iter [00060, 00202], lr: 0.000059, loss: 4.0409 +2023-11-25 19:58:48 - train: epoch 0077, iter [00070, 00202], lr: 0.000059, loss: 4.0939 +2023-11-25 20:00:51 - train: epoch 0077, iter [00080, 00202], lr: 0.000059, loss: 3.5552 +2023-11-25 20:02:53 - train: epoch 0077, iter [00090, 00202], lr: 0.000059, loss: 3.5447 +2023-11-25 20:04:55 - train: epoch 0077, iter [00100, 00202], lr: 0.000058, loss: 3.5995 +2023-11-25 20:06:58 - train: epoch 0077, iter [00110, 00202], lr: 0.000058, loss: 3.3240 +2023-11-25 20:08:58 - train: epoch 0077, iter [00120, 00202], lr: 0.000058, loss: 3.7099 +2023-11-25 20:11:00 - train: epoch 0077, iter [00130, 00202], lr: 0.000058, loss: 3.2654 +2023-11-25 20:12:56 - train: epoch 0077, iter [00140, 00202], lr: 0.000057, loss: 4.0146 +2023-11-25 20:14:48 - train: epoch 0077, iter [00150, 00202], lr: 0.000057, loss: 3.1038 +2023-11-25 20:16:40 - train: epoch 0077, iter [00160, 00202], lr: 0.000057, loss: 3.2689 +2023-11-25 20:18:33 - train: epoch 0077, iter [00170, 00202], lr: 0.000057, loss: 2.7659 +2023-11-25 20:20:26 - train: epoch 0077, iter [00180, 00202], lr: 0.000056, loss: 3.5971 +2023-11-25 20:22:18 - train: epoch 0077, iter [00190, 00202], lr: 0.000056, loss: 3.1848 +2023-11-25 20:24:10 - train: epoch 0077, iter [00200, 00202], lr: 0.000056, loss: 3.7916 +2023-11-25 20:24:40 - train: epoch 077, train_loss: 3.5547 +2023-11-25 21:00:49 - eval: epoch: 077, acc1: 90.080%, acc5: 99.252%, test_loss: 0.4061, per_image_load_time: 0.191ms, per_image_inference_time: 3.833ms +2023-11-25 21:00:53 - until epoch: 077, best_acc1: 90.080% +2023-11-25 21:00:53 - epoch 078 lr: 0.000056 +2023-11-25 21:03:03 - train: epoch 0078, iter [00010, 00202], lr: 0.000056, loss: 3.1176 +2023-11-25 21:05:06 - train: epoch 0078, iter [00020, 00202], lr: 0.000056, loss: 2.8997 +2023-11-25 21:07:07 - train: epoch 0078, iter [00030, 00202], lr: 0.000055, loss: 3.7070 +2023-11-25 21:09:09 - train: epoch 0078, iter [00040, 00202], lr: 0.000055, loss: 4.1264 +2023-11-25 21:11:15 - train: epoch 0078, iter [00050, 00202], lr: 0.000055, loss: 3.3349 +2023-11-25 21:13:19 - train: epoch 0078, iter [00060, 00202], lr: 0.000055, loss: 2.8884 +2023-11-25 21:15:23 - train: epoch 0078, iter [00070, 00202], lr: 0.000054, loss: 3.9982 +2023-11-25 21:17:25 - train: epoch 0078, iter [00080, 00202], lr: 0.000054, loss: 3.6869 +2023-11-25 21:19:28 - train: epoch 0078, iter [00090, 00202], lr: 0.000054, loss: 4.1228 +2023-11-25 21:21:30 - train: epoch 0078, iter [00100, 00202], lr: 0.000054, loss: 4.2650 +2023-11-25 21:23:33 - train: epoch 0078, iter [00110, 00202], lr: 0.000054, loss: 2.7048 +2023-11-25 21:25:35 - train: epoch 0078, iter [00120, 00202], lr: 0.000053, loss: 4.2503 +2023-11-25 21:27:38 - train: epoch 0078, iter [00130, 00202], lr: 0.000053, loss: 3.1053 +2023-11-25 21:29:42 - train: epoch 0078, iter [00140, 00202], lr: 0.000053, loss: 3.5414 +2023-11-25 21:31:48 - train: epoch 0078, iter [00150, 00202], lr: 0.000053, loss: 3.5747 +2023-11-25 21:33:53 - train: epoch 0078, iter [00160, 00202], lr: 0.000052, loss: 3.0397 +2023-11-25 21:35:52 - train: epoch 0078, iter [00170, 00202], lr: 0.000052, loss: 3.2313 +2023-11-25 21:37:46 - train: epoch 0078, iter [00180, 00202], lr: 0.000052, loss: 3.4699 +2023-11-25 21:39:39 - train: epoch 0078, iter [00190, 00202], lr: 0.000052, loss: 3.7335 +2023-11-25 21:41:31 - train: epoch 0078, iter [00200, 00202], lr: 0.000052, loss: 4.2884 +2023-11-25 21:42:00 - train: epoch 078, train_loss: 3.5763 +2023-11-25 22:20:18 - eval: epoch: 078, acc1: 90.129%, acc5: 99.261%, test_loss: 0.4082, per_image_load_time: 0.172ms, per_image_inference_time: 3.828ms +2023-11-25 22:20:22 - until epoch: 078, best_acc1: 90.129% +2023-11-25 22:20:22 - epoch 079 lr: 0.000052 +2023-11-25 22:22:37 - train: epoch 0079, iter [00010, 00202], lr: 0.000051, loss: 4.2300 +2023-11-25 22:24:42 - train: epoch 0079, iter [00020, 00202], lr: 0.000051, loss: 3.1532 +2023-11-25 22:26:47 - train: epoch 0079, iter [00030, 00202], lr: 0.000051, loss: 3.3820 +2023-11-25 22:28:54 - train: epoch 0079, iter [00040, 00202], lr: 0.000051, loss: 2.9862 +2023-11-25 22:31:02 - train: epoch 0079, iter [00050, 00202], lr: 0.000050, loss: 3.3993 +2023-11-25 22:33:14 - train: epoch 0079, iter [00060, 00202], lr: 0.000050, loss: 3.9025 +2023-11-25 22:35:28 - train: epoch 0079, iter [00070, 00202], lr: 0.000050, loss: 4.3217 +2023-11-25 22:37:43 - train: epoch 0079, iter [00080, 00202], lr: 0.000050, loss: 3.4854 +2023-11-25 22:39:57 - train: epoch 0079, iter [00090, 00202], lr: 0.000050, loss: 3.6918 +2023-11-25 22:42:09 - train: epoch 0079, iter [00100, 00202], lr: 0.000049, loss: 4.1392 +2023-11-25 22:44:18 - train: epoch 0079, iter [00110, 00202], lr: 0.000049, loss: 3.7816 +2023-11-25 22:46:24 - train: epoch 0079, iter [00120, 00202], lr: 0.000049, loss: 3.0864 +2023-11-25 22:48:29 - train: epoch 0079, iter [00130, 00202], lr: 0.000049, loss: 3.9592 +2023-11-25 22:50:31 - train: epoch 0079, iter [00140, 00202], lr: 0.000049, loss: 3.5938 +2023-11-25 22:52:31 - train: epoch 0079, iter [00150, 00202], lr: 0.000048, loss: 3.5984 +2023-11-25 22:54:29 - train: epoch 0079, iter [00160, 00202], lr: 0.000048, loss: 3.4231 +2023-11-25 22:56:28 - train: epoch 0079, iter [00170, 00202], lr: 0.000048, loss: 2.9762 +2023-11-25 22:58:30 - train: epoch 0079, iter [00180, 00202], lr: 0.000048, loss: 3.7726 +2023-11-25 23:00:34 - train: epoch 0079, iter [00190, 00202], lr: 0.000047, loss: 4.2344 +2023-11-25 23:02:33 - train: epoch 0079, iter [00200, 00202], lr: 0.000047, loss: 3.0030 +2023-11-25 23:03:03 - train: epoch 079, train_loss: 3.5767 +2023-11-25 23:36:55 - eval: epoch: 079, acc1: 90.191%, acc5: 99.275%, test_loss: 0.4088, per_image_load_time: 0.220ms, per_image_inference_time: 3.837ms +2023-11-25 23:36:59 - until epoch: 079, best_acc1: 90.191% +2023-11-25 23:36:59 - epoch 080 lr: 0.000047 +2023-11-25 23:38:58 - train: epoch 0080, iter [00010, 00202], lr: 0.000047, loss: 3.6460 +2023-11-25 23:40:56 - train: epoch 0080, iter [00020, 00202], lr: 0.000047, loss: 2.8872 +2023-11-25 23:42:59 - train: epoch 0080, iter [00030, 00202], lr: 0.000047, loss: 3.9092 +2023-11-25 23:45:02 - train: epoch 0080, iter [00040, 00202], lr: 0.000046, loss: 3.3600 +2023-11-25 23:47:07 - train: epoch 0080, iter [00050, 00202], lr: 0.000046, loss: 3.0046 +2023-11-25 23:49:07 - train: epoch 0080, iter [00060, 00202], lr: 0.000046, loss: 3.7096 +2023-11-25 23:51:01 - train: epoch 0080, iter [00070, 00202], lr: 0.000046, loss: 4.2390 +2023-11-25 23:52:53 - train: epoch 0080, iter [00080, 00202], lr: 0.000046, loss: 4.0653 +2023-11-25 23:54:46 - train: epoch 0080, iter [00090, 00202], lr: 0.000045, loss: 2.8790 +2023-11-25 23:56:39 - train: epoch 0080, iter [00100, 00202], lr: 0.000045, loss: 3.2166 +2023-11-25 23:58:31 - train: epoch 0080, iter [00110, 00202], lr: 0.000045, loss: 3.0862 +2023-11-26 00:00:24 - train: epoch 0080, iter [00120, 00202], lr: 0.000045, loss: 2.7644 +2023-11-26 00:02:18 - train: epoch 0080, iter [00130, 00202], lr: 0.000045, loss: 3.3386 +2023-11-26 00:04:17 - train: epoch 0080, iter [00140, 00202], lr: 0.000044, loss: 4.1582 +2023-11-26 00:06:21 - train: epoch 0080, iter [00150, 00202], lr: 0.000044, loss: 2.8138 +2023-11-26 00:08:25 - train: epoch 0080, iter [00160, 00202], lr: 0.000044, loss: 3.3584 +2023-11-26 00:10:30 - train: epoch 0080, iter [00170, 00202], lr: 0.000044, loss: 3.8719 +2023-11-26 00:12:37 - train: epoch 0080, iter [00180, 00202], lr: 0.000044, loss: 4.0875 +2023-11-26 00:14:45 - train: epoch 0080, iter [00190, 00202], lr: 0.000043, loss: 4.0835 +2023-11-26 00:16:53 - train: epoch 0080, iter [00200, 00202], lr: 0.000043, loss: 3.3940 +2023-11-26 00:17:25 - train: epoch 080, train_loss: 3.5533 +2023-11-26 00:56:44 - eval: epoch: 080, acc1: 90.217%, acc5: 99.286%, test_loss: 0.4029, per_image_load_time: 0.160ms, per_image_inference_time: 3.826ms +2023-11-26 00:56:48 - until epoch: 080, best_acc1: 90.217% +2023-11-26 00:56:48 - epoch 081 lr: 0.000043 +2023-11-26 00:58:47 - train: epoch 0081, iter [00010, 00202], lr: 0.000043, loss: 4.2532 +2023-11-26 01:00:47 - train: epoch 0081, iter [00020, 00202], lr: 0.000043, loss: 4.1268 +2023-11-26 01:02:45 - train: epoch 0081, iter [00030, 00202], lr: 0.000042, loss: 3.7189 +2023-11-26 01:04:39 - train: epoch 0081, iter [00040, 00202], lr: 0.000042, loss: 3.6764 +2023-11-26 01:06:33 - train: epoch 0081, iter [00050, 00202], lr: 0.000042, loss: 3.7613 +2023-11-26 01:08:30 - train: epoch 0081, iter [00060, 00202], lr: 0.000042, loss: 3.4099 +2023-11-26 01:10:30 - train: epoch 0081, iter [00070, 00202], lr: 0.000042, loss: 3.9453 +2023-11-26 01:12:31 - train: epoch 0081, iter [00080, 00202], lr: 0.000041, loss: 3.1714 +2023-11-26 01:14:32 - train: epoch 0081, iter [00090, 00202], lr: 0.000041, loss: 2.9056 +2023-11-26 01:16:29 - train: epoch 0081, iter [00100, 00202], lr: 0.000041, loss: 3.5788 +2023-11-26 01:18:22 - train: epoch 0081, iter [00110, 00202], lr: 0.000041, loss: 2.8127 +2023-11-26 01:20:15 - train: epoch 0081, iter [00120, 00202], lr: 0.000041, loss: 3.8911 +2023-11-26 01:22:08 - train: epoch 0081, iter [00130, 00202], lr: 0.000041, loss: 3.7208 +2023-11-26 01:24:00 - train: epoch 0081, iter [00140, 00202], lr: 0.000040, loss: 4.4287 +2023-11-26 01:25:52 - train: epoch 0081, iter [00150, 00202], lr: 0.000040, loss: 3.6098 +2023-11-26 01:27:44 - train: epoch 0081, iter [00160, 00202], lr: 0.000040, loss: 3.7945 +2023-11-26 01:29:36 - train: epoch 0081, iter [00170, 00202], lr: 0.000040, loss: 3.6067 +2023-11-26 01:31:28 - train: epoch 0081, iter [00180, 00202], lr: 0.000040, loss: 2.8924 +2023-11-26 01:33:21 - train: epoch 0081, iter [00190, 00202], lr: 0.000039, loss: 3.8894 +2023-11-26 01:35:14 - train: epoch 0081, iter [00200, 00202], lr: 0.000039, loss: 3.0903 +2023-11-26 01:35:44 - train: epoch 081, train_loss: 3.6058 +2023-11-26 02:09:33 - eval: epoch: 081, acc1: 90.294%, acc5: 99.305%, test_loss: 0.3970, per_image_load_time: 0.225ms, per_image_inference_time: 3.837ms +2023-11-26 02:09:36 - until epoch: 081, best_acc1: 90.294% +2023-11-26 02:09:36 - epoch 082 lr: 0.000039 +2023-11-26 02:11:35 - train: epoch 0082, iter [00010, 00202], lr: 0.000039, loss: 3.7624 +2023-11-26 02:13:27 - train: epoch 0082, iter [00020, 00202], lr: 0.000039, loss: 3.8602 +2023-11-26 02:15:19 - train: epoch 0082, iter [00030, 00202], lr: 0.000039, loss: 2.8420 +2023-11-26 02:17:11 - train: epoch 0082, iter [00040, 00202], lr: 0.000038, loss: 3.8319 +2023-11-26 02:19:02 - train: epoch 0082, iter [00050, 00202], lr: 0.000038, loss: 3.1997 +2023-11-26 02:20:54 - train: epoch 0082, iter [00060, 00202], lr: 0.000038, loss: 3.8223 +2023-11-26 02:22:46 - train: epoch 0082, iter [00070, 00202], lr: 0.000038, loss: 3.2159 +2023-11-26 02:24:38 - train: epoch 0082, iter [00080, 00202], lr: 0.000038, loss: 3.5548 +2023-11-26 02:26:31 - train: epoch 0082, iter [00090, 00202], lr: 0.000037, loss: 3.5536 +2023-11-26 02:28:23 - train: epoch 0082, iter [00100, 00202], lr: 0.000037, loss: 3.3433 +2023-11-26 02:30:15 - train: epoch 0082, iter [00110, 00202], lr: 0.000037, loss: 4.1362 +2023-11-26 02:32:07 - train: epoch 0082, iter [00120, 00202], lr: 0.000037, loss: 4.3042 +2023-11-26 02:33:58 - train: epoch 0082, iter [00130, 00202], lr: 0.000037, loss: 3.2685 +2023-11-26 02:35:49 - train: epoch 0082, iter [00140, 00202], lr: 0.000036, loss: 3.8306 +2023-11-26 02:37:40 - train: epoch 0082, iter [00150, 00202], lr: 0.000036, loss: 3.7314 +2023-11-26 02:39:32 - train: epoch 0082, iter [00160, 00202], lr: 0.000036, loss: 3.5163 +2023-11-26 02:41:24 - train: epoch 0082, iter [00170, 00202], lr: 0.000036, loss: 3.7172 +2023-11-26 02:43:15 - train: epoch 0082, iter [00180, 00202], lr: 0.000036, loss: 3.8822 +2023-11-26 02:45:07 - train: epoch 0082, iter [00190, 00202], lr: 0.000036, loss: 3.3308 +2023-11-26 02:46:58 - train: epoch 0082, iter [00200, 00202], lr: 0.000035, loss: 3.7037 +2023-11-26 02:47:28 - train: epoch 082, train_loss: 3.5659 +2023-11-26 03:20:55 - eval: epoch: 082, acc1: 90.351%, acc5: 99.314%, test_loss: 0.3959, per_image_load_time: 0.234ms, per_image_inference_time: 3.839ms +2023-11-26 03:20:59 - until epoch: 082, best_acc1: 90.351% +2023-11-26 03:20:59 - epoch 083 lr: 0.000035 +2023-11-26 03:22:55 - train: epoch 0083, iter [00010, 00202], lr: 0.000035, loss: 4.1782 +2023-11-26 03:24:47 - train: epoch 0083, iter [00020, 00202], lr: 0.000035, loss: 4.1532 +2023-11-26 03:26:40 - train: epoch 0083, iter [00030, 00202], lr: 0.000035, loss: 3.6402 +2023-11-26 03:28:31 - train: epoch 0083, iter [00040, 00202], lr: 0.000035, loss: 3.6277 +2023-11-26 03:30:23 - train: epoch 0083, iter [00050, 00202], lr: 0.000034, loss: 3.5836 +2023-11-26 03:32:14 - train: epoch 0083, iter [00060, 00202], lr: 0.000034, loss: 3.4368 +2023-11-26 03:34:06 - train: epoch 0083, iter [00070, 00202], lr: 0.000034, loss: 3.0824 +2023-11-26 03:35:57 - train: epoch 0083, iter [00080, 00202], lr: 0.000034, loss: 3.4331 +2023-11-26 03:37:49 - train: epoch 0083, iter [00090, 00202], lr: 0.000034, loss: 3.2720 +2023-11-26 03:39:41 - train: epoch 0083, iter [00100, 00202], lr: 0.000034, loss: 3.3304 +2023-11-26 03:41:33 - train: epoch 0083, iter [00110, 00202], lr: 0.000033, loss: 4.1442 +2023-11-26 03:43:25 - train: epoch 0083, iter [00120, 00202], lr: 0.000033, loss: 3.5786 +2023-11-26 03:45:16 - train: epoch 0083, iter [00130, 00202], lr: 0.000033, loss: 2.4902 +2023-11-26 03:47:07 - train: epoch 0083, iter [00140, 00202], lr: 0.000033, loss: 2.8287 +2023-11-26 03:48:59 - train: epoch 0083, iter [00150, 00202], lr: 0.000033, loss: 3.7261 +2023-11-26 03:50:50 - train: epoch 0083, iter [00160, 00202], lr: 0.000032, loss: 2.9984 +2023-11-26 03:52:41 - train: epoch 0083, iter [00170, 00202], lr: 0.000032, loss: 3.5083 +2023-11-26 03:54:33 - train: epoch 0083, iter [00180, 00202], lr: 0.000032, loss: 2.8840 +2023-11-26 03:56:25 - train: epoch 0083, iter [00190, 00202], lr: 0.000032, loss: 3.0911 +2023-11-26 03:58:16 - train: epoch 0083, iter [00200, 00202], lr: 0.000032, loss: 2.2681 +2023-11-26 03:58:46 - train: epoch 083, train_loss: 3.5481 +2023-11-26 04:31:53 - eval: epoch: 083, acc1: 90.397%, acc5: 99.318%, test_loss: 0.3962, per_image_load_time: 0.237ms, per_image_inference_time: 3.838ms +2023-11-26 04:31:56 - until epoch: 083, best_acc1: 90.397% +2023-11-26 04:31:56 - epoch 084 lr: 0.000032 +2023-11-26 04:33:51 - train: epoch 0084, iter [00010, 00202], lr: 0.000032, loss: 4.3067 +2023-11-26 04:35:42 - train: epoch 0084, iter [00020, 00202], lr: 0.000031, loss: 3.6603 +2023-11-26 04:37:33 - train: epoch 0084, iter [00030, 00202], lr: 0.000031, loss: 3.7925 +2023-11-26 04:39:24 - train: epoch 0084, iter [00040, 00202], lr: 0.000031, loss: 3.7535 +2023-11-26 04:41:15 - train: epoch 0084, iter [00050, 00202], lr: 0.000031, loss: 3.7737 +2023-11-26 04:43:05 - train: epoch 0084, iter [00060, 00202], lr: 0.000031, loss: 3.1170 +2023-11-26 04:44:56 - train: epoch 0084, iter [00070, 00202], lr: 0.000031, loss: 3.9479 +2023-11-26 04:46:46 - train: epoch 0084, iter [00080, 00202], lr: 0.000030, loss: 4.2535 +2023-11-26 04:48:37 - train: epoch 0084, iter [00090, 00202], lr: 0.000030, loss: 3.8461 +2023-11-26 04:50:28 - train: epoch 0084, iter [00100, 00202], lr: 0.000030, loss: 4.3971 +2023-11-26 04:52:19 - train: epoch 0084, iter [00110, 00202], lr: 0.000030, loss: 4.4380 +2023-11-26 04:54:10 - train: epoch 0084, iter [00120, 00202], lr: 0.000030, loss: 3.7696 +2023-11-26 04:56:00 - train: epoch 0084, iter [00130, 00202], lr: 0.000029, loss: 3.0881 +2023-11-26 04:57:51 - train: epoch 0084, iter [00140, 00202], lr: 0.000029, loss: 3.0569 +2023-11-26 04:59:41 - train: epoch 0084, iter [00150, 00202], lr: 0.000029, loss: 3.6231 +2023-11-26 05:01:31 - train: epoch 0084, iter [00160, 00202], lr: 0.000029, loss: 3.5230 +2023-11-26 05:03:22 - train: epoch 0084, iter [00170, 00202], lr: 0.000029, loss: 2.8377 +2023-11-26 05:05:13 - train: epoch 0084, iter [00180, 00202], lr: 0.000029, loss: 3.7568 +2023-11-26 05:07:04 - train: epoch 0084, iter [00190, 00202], lr: 0.000028, loss: 2.8582 +2023-11-26 05:08:55 - train: epoch 0084, iter [00200, 00202], lr: 0.000028, loss: 3.4881 +2023-11-26 05:09:25 - train: epoch 084, train_loss: 3.5622 +2023-11-26 05:42:12 - eval: epoch: 084, acc1: 90.412%, acc5: 99.330%, test_loss: 0.3983, per_image_load_time: 0.238ms, per_image_inference_time: 3.838ms +2023-11-26 05:42:16 - until epoch: 084, best_acc1: 90.412% +2023-11-26 05:42:16 - epoch 085 lr: 0.000028 +2023-11-26 05:44:09 - train: epoch 0085, iter [00010, 00202], lr: 0.000028, loss: 3.0577 +2023-11-26 05:46:00 - train: epoch 0085, iter [00020, 00202], lr: 0.000028, loss: 3.4705 +2023-11-26 05:47:50 - train: epoch 0085, iter [00030, 00202], lr: 0.000028, loss: 4.1902 +2023-11-26 05:49:40 - train: epoch 0085, iter [00040, 00202], lr: 0.000028, loss: 3.9746 +2023-11-26 05:51:29 - train: epoch 0085, iter [00050, 00202], lr: 0.000027, loss: 3.5911 +2023-11-26 05:53:19 - train: epoch 0085, iter [00060, 00202], lr: 0.000027, loss: 3.4173 +2023-11-26 05:55:09 - train: epoch 0085, iter [00070, 00202], lr: 0.000027, loss: 3.2506 +2023-11-26 05:57:00 - train: epoch 0085, iter [00080, 00202], lr: 0.000027, loss: 4.1184 +2023-11-26 05:58:50 - train: epoch 0085, iter [00090, 00202], lr: 0.000027, loss: 2.9239 +2023-11-26 06:00:41 - train: epoch 0085, iter [00100, 00202], lr: 0.000027, loss: 3.6290 +2023-11-26 06:02:31 - train: epoch 0085, iter [00110, 00202], lr: 0.000027, loss: 4.2681 +2023-11-26 06:04:20 - train: epoch 0085, iter [00120, 00202], lr: 0.000026, loss: 4.2813 +2023-11-26 06:06:10 - train: epoch 0085, iter [00130, 00202], lr: 0.000026, loss: 3.1386 +2023-11-26 06:08:01 - train: epoch 0085, iter [00140, 00202], lr: 0.000026, loss: 3.3483 +2023-11-26 06:09:53 - train: epoch 0085, iter [00150, 00202], lr: 0.000026, loss: 3.6326 +2023-11-26 06:11:44 - train: epoch 0085, iter [00160, 00202], lr: 0.000026, loss: 3.1278 +2023-11-26 06:13:36 - train: epoch 0085, iter [00170, 00202], lr: 0.000026, loss: 3.0276 +2023-11-26 06:15:30 - train: epoch 0085, iter [00180, 00202], lr: 0.000025, loss: 3.6197 +2023-11-26 06:17:22 - train: epoch 0085, iter [00190, 00202], lr: 0.000025, loss: 3.2853 +2023-11-26 06:19:14 - train: epoch 0085, iter [00200, 00202], lr: 0.000025, loss: 4.2366 +2023-11-26 06:19:44 - train: epoch 085, train_loss: 3.5517 +2023-11-26 06:53:32 - eval: epoch: 085, acc1: 90.455%, acc5: 99.334%, test_loss: 0.3945, per_image_load_time: 0.215ms, per_image_inference_time: 3.837ms +2023-11-26 06:53:36 - until epoch: 085, best_acc1: 90.455% +2023-11-26 06:53:36 - epoch 086 lr: 0.000025 +2023-11-26 06:55:32 - train: epoch 0086, iter [00010, 00202], lr: 0.000025, loss: 3.9832 +2023-11-26 06:57:22 - train: epoch 0086, iter [00020, 00202], lr: 0.000025, loss: 3.5071 +2023-11-26 06:59:13 - train: epoch 0086, iter [00030, 00202], lr: 0.000025, loss: 3.5343 +2023-11-26 07:01:03 - train: epoch 0086, iter [00040, 00202], lr: 0.000024, loss: 2.9307 +2023-11-26 07:02:54 - train: epoch 0086, iter [00050, 00202], lr: 0.000024, loss: 3.9087 +2023-11-26 07:04:44 - train: epoch 0086, iter [00060, 00202], lr: 0.000024, loss: 3.9297 +2023-11-26 07:06:35 - train: epoch 0086, iter [00070, 00202], lr: 0.000024, loss: 3.5983 +2023-11-26 07:08:26 - train: epoch 0086, iter [00080, 00202], lr: 0.000024, loss: 2.9760 +2023-11-26 07:10:17 - train: epoch 0086, iter [00090, 00202], lr: 0.000024, loss: 3.4158 +2023-11-26 07:12:07 - train: epoch 0086, iter [00100, 00202], lr: 0.000024, loss: 3.5525 +2023-11-26 07:13:57 - train: epoch 0086, iter [00110, 00202], lr: 0.000023, loss: 4.0659 +2023-11-26 07:15:47 - train: epoch 0086, iter [00120, 00202], lr: 0.000023, loss: 3.1574 +2023-11-26 07:17:37 - train: epoch 0086, iter [00130, 00202], lr: 0.000023, loss: 3.5730 +2023-11-26 07:19:28 - train: epoch 0086, iter [00140, 00202], lr: 0.000023, loss: 3.3533 +2023-11-26 07:21:19 - train: epoch 0086, iter [00150, 00202], lr: 0.000023, loss: 4.4066 +2023-11-26 07:23:09 - train: epoch 0086, iter [00160, 00202], lr: 0.000023, loss: 3.9282 +2023-11-26 07:25:00 - train: epoch 0086, iter [00170, 00202], lr: 0.000022, loss: 2.9558 +2023-11-26 07:26:50 - train: epoch 0086, iter [00180, 00202], lr: 0.000022, loss: 3.8400 +2023-11-26 07:28:40 - train: epoch 0086, iter [00190, 00202], lr: 0.000022, loss: 3.6725 +2023-11-26 07:30:31 - train: epoch 0086, iter [00200, 00202], lr: 0.000022, loss: 4.0294 +2023-11-26 07:31:00 - train: epoch 086, train_loss: 3.5692 +2023-11-26 08:03:58 - eval: epoch: 086, acc1: 90.502%, acc5: 99.344%, test_loss: 0.3902, per_image_load_time: 0.227ms, per_image_inference_time: 3.839ms +2023-11-26 08:04:02 - until epoch: 086, best_acc1: 90.502% +2023-11-26 08:04:02 - epoch 087 lr: 0.000022 +2023-11-26 08:05:56 - train: epoch 0087, iter [00010, 00202], lr: 0.000022, loss: 3.5306 +2023-11-26 08:07:48 - train: epoch 0087, iter [00020, 00202], lr: 0.000022, loss: 3.2109 +2023-11-26 08:09:38 - train: epoch 0087, iter [00030, 00202], lr: 0.000022, loss: 3.7201 +2023-11-26 08:11:29 - train: epoch 0087, iter [00040, 00202], lr: 0.000021, loss: 3.5395 +2023-11-26 08:13:20 - train: epoch 0087, iter [00050, 00202], lr: 0.000021, loss: 3.7204 +2023-11-26 08:15:11 - train: epoch 0087, iter [00060, 00202], lr: 0.000021, loss: 2.9511 +2023-11-26 08:17:02 - train: epoch 0087, iter [00070, 00202], lr: 0.000021, loss: 3.8352 +2023-11-26 08:18:53 - train: epoch 0087, iter [00080, 00202], lr: 0.000021, loss: 3.8305 +2023-11-26 08:20:44 - train: epoch 0087, iter [00090, 00202], lr: 0.000021, loss: 4.0645 +2023-11-26 08:22:34 - train: epoch 0087, iter [00100, 00202], lr: 0.000021, loss: 3.4097 +2023-11-26 08:24:24 - train: epoch 0087, iter [00110, 00202], lr: 0.000020, loss: 3.9266 +2023-11-26 08:26:15 - train: epoch 0087, iter [00120, 00202], lr: 0.000020, loss: 3.4917 +2023-11-26 08:28:05 - train: epoch 0087, iter [00130, 00202], lr: 0.000020, loss: 4.0278 +2023-11-26 08:29:57 - train: epoch 0087, iter [00140, 00202], lr: 0.000020, loss: 2.9556 +2023-11-26 08:31:48 - train: epoch 0087, iter [00150, 00202], lr: 0.000020, loss: 2.9920 +2023-11-26 08:33:39 - train: epoch 0087, iter [00160, 00202], lr: 0.000020, loss: 3.5065 +2023-11-26 08:35:29 - train: epoch 0087, iter [00170, 00202], lr: 0.000020, loss: 2.4378 +2023-11-26 08:37:20 - train: epoch 0087, iter [00180, 00202], lr: 0.000019, loss: 3.7463 +2023-11-26 08:39:10 - train: epoch 0087, iter [00190, 00202], lr: 0.000019, loss: 3.7842 +2023-11-26 08:41:01 - train: epoch 0087, iter [00200, 00202], lr: 0.000019, loss: 3.1593 +2023-11-26 08:41:30 - train: epoch 087, train_loss: 3.4954 +2023-11-26 09:15:20 - eval: epoch: 087, acc1: 90.508%, acc5: 99.347%, test_loss: 0.3908, per_image_load_time: 0.224ms, per_image_inference_time: 3.839ms +2023-11-26 09:15:24 - until epoch: 087, best_acc1: 90.508% +2023-11-26 11:03:30 - epoch 088 lr: 0.000019 +2023-11-26 11:05:19 - train: epoch 0088, iter [00010, 00202], lr: 0.000019, loss: 3.3808 +2023-11-26 11:07:04 - train: epoch 0088, iter [00020, 00202], lr: 0.000019, loss: 4.4609 +2023-11-26 11:08:49 - train: epoch 0088, iter [00030, 00202], lr: 0.000019, loss: 3.0488 +2023-11-26 11:10:35 - train: epoch 0088, iter [00040, 00202], lr: 0.000019, loss: 2.4480 +2023-11-26 11:12:20 - train: epoch 0088, iter [00050, 00202], lr: 0.000018, loss: 4.1198 +2023-11-26 11:14:05 - train: epoch 0088, iter [00060, 00202], lr: 0.000018, loss: 3.5254 +2023-11-26 11:15:50 - train: epoch 0088, iter [00070, 00202], lr: 0.000018, loss: 3.7054 +2023-11-26 11:17:35 - train: epoch 0088, iter [00080, 00202], lr: 0.000018, loss: 4.0630 +2023-11-26 11:19:20 - train: epoch 0088, iter [00090, 00202], lr: 0.000018, loss: 3.0500 +2023-11-26 11:21:05 - train: epoch 0088, iter [00100, 00202], lr: 0.000018, loss: 2.9347 +2023-11-26 11:22:50 - train: epoch 0088, iter [00110, 00202], lr: 0.000018, loss: 3.3015 +2023-11-26 11:24:35 - train: epoch 0088, iter [00120, 00202], lr: 0.000018, loss: 3.8649 +2023-11-26 11:26:20 - train: epoch 0088, iter [00130, 00202], lr: 0.000017, loss: 3.3505 +2023-11-26 11:28:06 - train: epoch 0088, iter [00140, 00202], lr: 0.000017, loss: 3.3206 +2023-11-26 11:29:51 - train: epoch 0088, iter [00150, 00202], lr: 0.000017, loss: 3.8282 +2023-11-26 11:31:36 - train: epoch 0088, iter [00160, 00202], lr: 0.000017, loss: 3.1952 +2023-11-26 11:33:21 - train: epoch 0088, iter [00170, 00202], lr: 0.000017, loss: 4.0030 +2023-11-26 11:35:06 - train: epoch 0088, iter [00180, 00202], lr: 0.000017, loss: 2.5376 +2023-11-26 11:36:51 - train: epoch 0088, iter [00190, 00202], lr: 0.000017, loss: 3.6159 +2023-11-26 11:38:36 - train: epoch 0088, iter [00200, 00202], lr: 0.000017, loss: 3.8789 +2023-11-26 11:39:04 - train: epoch 088, train_loss: 3.4810 +2023-11-26 12:07:09 - eval: epoch: 088, acc1: 90.562%, acc5: 99.358%, test_loss: 0.3854, per_image_load_time: 0.102ms, per_image_inference_time: 3.864ms +2023-11-26 12:07:13 - until epoch: 088, best_acc1: 90.562% +2023-11-26 12:07:13 - epoch 089 lr: 0.000017 +2023-11-26 12:09:01 - train: epoch 0089, iter [00010, 00202], lr: 0.000016, loss: 2.2760 +2023-11-26 12:10:46 - train: epoch 0089, iter [00020, 00202], lr: 0.000016, loss: 2.5460 +2023-11-26 12:12:31 - train: epoch 0089, iter [00030, 00202], lr: 0.000016, loss: 3.3216 +2023-11-26 12:14:16 - train: epoch 0089, iter [00040, 00202], lr: 0.000016, loss: 2.8463 +2023-11-26 12:16:01 - train: epoch 0089, iter [00050, 00202], lr: 0.000016, loss: 2.6606 +2023-11-26 12:17:46 - train: epoch 0089, iter [00060, 00202], lr: 0.000016, loss: 3.3372 +2023-11-26 12:19:31 - train: epoch 0089, iter [00070, 00202], lr: 0.000016, loss: 3.1177 +2023-11-26 12:21:16 - train: epoch 0089, iter [00080, 00202], lr: 0.000016, loss: 3.3432 +2023-11-26 12:23:01 - train: epoch 0089, iter [00090, 00202], lr: 0.000015, loss: 3.2490 +2023-11-26 12:24:46 - train: epoch 0089, iter [00100, 00202], lr: 0.000015, loss: 2.9997 +2023-11-26 12:26:31 - train: epoch 0089, iter [00110, 00202], lr: 0.000015, loss: 4.2171 +2023-11-26 12:28:16 - train: epoch 0089, iter [00120, 00202], lr: 0.000015, loss: 3.5047 +2023-11-26 12:30:02 - train: epoch 0089, iter [00130, 00202], lr: 0.000015, loss: 2.5364 +2023-11-26 12:31:47 - train: epoch 0089, iter [00140, 00202], lr: 0.000015, loss: 3.3000 +2023-11-26 12:33:32 - train: epoch 0089, iter [00150, 00202], lr: 0.000015, loss: 3.1569 +2023-11-26 12:35:17 - train: epoch 0089, iter [00160, 00202], lr: 0.000015, loss: 3.9334 +2023-11-26 12:37:02 - train: epoch 0089, iter [00170, 00202], lr: 0.000014, loss: 3.7587 +2023-11-26 12:38:47 - train: epoch 0089, iter [00180, 00202], lr: 0.000014, loss: 3.5184 +2023-11-26 12:40:32 - train: epoch 0089, iter [00190, 00202], lr: 0.000014, loss: 2.9238 +2023-11-26 12:42:17 - train: epoch 0089, iter [00200, 00202], lr: 0.000014, loss: 2.5783 +2023-11-26 12:42:45 - train: epoch 089, train_loss: 3.4706 +2023-11-26 13:10:52 - eval: epoch: 089, acc1: 90.583%, acc5: 99.364%, test_loss: 0.3873, per_image_load_time: 0.101ms, per_image_inference_time: 3.872ms +2023-11-26 13:10:56 - until epoch: 089, best_acc1: 90.583% +2023-11-26 13:10:56 - epoch 090 lr: 0.000014 +2023-11-26 13:12:45 - train: epoch 0090, iter [00010, 00202], lr: 0.000014, loss: 3.1525 +2023-11-26 13:14:30 - train: epoch 0090, iter [00020, 00202], lr: 0.000014, loss: 3.3730 +2023-11-26 13:16:15 - train: epoch 0090, iter [00030, 00202], lr: 0.000014, loss: 3.1640 +2023-11-26 13:18:00 - train: epoch 0090, iter [00040, 00202], lr: 0.000014, loss: 2.3055 +2023-11-26 13:19:45 - train: epoch 0090, iter [00050, 00202], lr: 0.000013, loss: 3.8938 +2023-11-26 13:21:30 - train: epoch 0090, iter [00060, 00202], lr: 0.000013, loss: 3.2889 +2023-11-26 13:23:15 - train: epoch 0090, iter [00070, 00202], lr: 0.000013, loss: 2.7506 +2023-11-26 13:25:00 - train: epoch 0090, iter [00080, 00202], lr: 0.000013, loss: 3.2144 +2023-11-26 13:26:45 - train: epoch 0090, iter [00090, 00202], lr: 0.000013, loss: 2.7821 +2023-11-26 13:28:30 - train: epoch 0090, iter [00100, 00202], lr: 0.000013, loss: 3.6767 +2023-11-26 13:30:15 - train: epoch 0090, iter [00110, 00202], lr: 0.000013, loss: 3.9887 +2023-11-26 13:32:00 - train: epoch 0090, iter [00120, 00202], lr: 0.000013, loss: 2.7832 +2023-11-26 13:33:45 - train: epoch 0090, iter [00130, 00202], lr: 0.000013, loss: 3.8734 +2023-11-26 13:35:30 - train: epoch 0090, iter [00140, 00202], lr: 0.000012, loss: 3.7535 +2023-11-26 13:37:15 - train: epoch 0090, iter [00150, 00202], lr: 0.000012, loss: 3.3408 +2023-11-26 13:39:00 - train: epoch 0090, iter [00160, 00202], lr: 0.000012, loss: 2.5229 +2023-11-26 13:40:45 - train: epoch 0090, iter [00170, 00202], lr: 0.000012, loss: 3.5866 +2023-11-26 13:42:29 - train: epoch 0090, iter [00180, 00202], lr: 0.000012, loss: 3.1541 +2023-11-26 13:44:14 - train: epoch 0090, iter [00190, 00202], lr: 0.000012, loss: 3.9889 +2023-11-26 13:46:00 - train: epoch 0090, iter [00200, 00202], lr: 0.000012, loss: 3.4736 +2023-11-26 13:46:28 - train: epoch 090, train_loss: 3.4682 +2023-11-26 14:14:34 - eval: epoch: 090, acc1: 90.576%, acc5: 99.365%, test_loss: 0.3888, per_image_load_time: 0.100ms, per_image_inference_time: 3.870ms +2023-11-26 14:14:37 - until epoch: 090, best_acc1: 90.583% +2023-11-26 14:14:37 - epoch 091 lr: 0.000012 +2023-11-26 14:16:26 - train: epoch 0091, iter [00010, 00202], lr: 0.000012, loss: 3.8395 +2023-11-26 14:18:11 - train: epoch 0091, iter [00020, 00202], lr: 0.000012, loss: 3.8355 +2023-11-26 14:19:56 - train: epoch 0091, iter [00030, 00202], lr: 0.000011, loss: 3.4830 +2023-11-26 14:21:41 - train: epoch 0091, iter [00040, 00202], lr: 0.000011, loss: 3.8739 +2023-11-26 14:23:26 - train: epoch 0091, iter [00050, 00202], lr: 0.000011, loss: 3.7926 +2023-11-26 14:25:11 - train: epoch 0091, iter [00060, 00202], lr: 0.000011, loss: 3.3872 +2023-11-26 14:26:56 - train: epoch 0091, iter [00070, 00202], lr: 0.000011, loss: 3.4304 +2023-11-26 14:28:41 - train: epoch 0091, iter [00080, 00202], lr: 0.000011, loss: 3.5472 +2023-11-26 14:30:26 - train: epoch 0091, iter [00090, 00202], lr: 0.000011, loss: 4.2487 +2023-11-26 14:32:11 - train: epoch 0091, iter [00100, 00202], lr: 0.000011, loss: 3.4621 +2023-11-26 14:33:57 - train: epoch 0091, iter [00110, 00202], lr: 0.000011, loss: 3.3347 +2023-11-26 14:35:42 - train: epoch 0091, iter [00120, 00202], lr: 0.000011, loss: 3.4422 +2023-11-26 14:37:27 - train: epoch 0091, iter [00130, 00202], lr: 0.000010, loss: 3.5813 +2023-11-26 14:39:12 - train: epoch 0091, iter [00140, 00202], lr: 0.000010, loss: 2.8998 +2023-11-26 14:40:57 - train: epoch 0091, iter [00150, 00202], lr: 0.000010, loss: 2.9926 +2023-11-26 14:42:42 - train: epoch 0091, iter [00160, 00202], lr: 0.000010, loss: 3.8951 +2023-11-26 14:44:27 - train: epoch 0091, iter [00170, 00202], lr: 0.000010, loss: 3.5356 +2023-11-26 14:46:12 - train: epoch 0091, iter [00180, 00202], lr: 0.000010, loss: 3.8465 +2023-11-26 14:47:57 - train: epoch 0091, iter [00190, 00202], lr: 0.000010, loss: 4.2987 +2023-11-26 14:49:42 - train: epoch 0091, iter [00200, 00202], lr: 0.000010, loss: 4.0190 +2023-11-26 14:50:10 - train: epoch 091, train_loss: 3.5558 +2023-11-26 15:18:13 - eval: epoch: 091, acc1: 90.586%, acc5: 99.369%, test_loss: 0.3849, per_image_load_time: 0.101ms, per_image_inference_time: 3.868ms +2023-11-26 15:18:18 - until epoch: 091, best_acc1: 90.586% +2023-11-26 15:18:18 - epoch 092 lr: 0.000010 +2023-11-26 15:20:05 - train: epoch 0092, iter [00010, 00202], lr: 0.000010, loss: 3.5241 +2023-11-26 15:21:50 - train: epoch 0092, iter [00020, 00202], lr: 0.000010, loss: 3.6981 +2023-11-26 15:23:35 - train: epoch 0092, iter [00030, 00202], lr: 0.000009, loss: 3.5613 +2023-11-26 15:25:20 - train: epoch 0092, iter [00040, 00202], lr: 0.000009, loss: 3.4447 +2023-11-26 15:27:05 - train: epoch 0092, iter [00050, 00202], lr: 0.000009, loss: 3.7029 +2023-11-26 15:28:50 - train: epoch 0092, iter [00060, 00202], lr: 0.000009, loss: 4.3791 +2023-11-26 15:30:35 - train: epoch 0092, iter [00070, 00202], lr: 0.000009, loss: 3.3236 +2023-11-26 15:32:20 - train: epoch 0092, iter [00080, 00202], lr: 0.000009, loss: 3.5203 +2023-11-26 15:34:05 - train: epoch 0092, iter [00090, 00202], lr: 0.000009, loss: 3.1210 +2023-11-26 15:35:50 - train: epoch 0092, iter [00100, 00202], lr: 0.000009, loss: 3.5241 +2023-11-26 15:37:35 - train: epoch 0092, iter [00110, 00202], lr: 0.000009, loss: 3.3166 +2023-11-26 15:39:20 - train: epoch 0092, iter [00120, 00202], lr: 0.000009, loss: 3.0583 +2023-11-26 15:41:05 - train: epoch 0092, iter [00130, 00202], lr: 0.000009, loss: 2.5377 +2023-11-26 15:42:50 - train: epoch 0092, iter [00140, 00202], lr: 0.000008, loss: 3.4613 +2023-11-26 15:44:35 - train: epoch 0092, iter [00150, 00202], lr: 0.000008, loss: 3.2077 +2023-11-26 15:46:20 - train: epoch 0092, iter [00160, 00202], lr: 0.000008, loss: 4.1266 +2023-11-26 15:48:05 - train: epoch 0092, iter [00170, 00202], lr: 0.000008, loss: 3.2878 +2023-11-26 15:49:50 - train: epoch 0092, iter [00180, 00202], lr: 0.000008, loss: 3.3186 +2023-11-26 15:51:35 - train: epoch 0092, iter [00190, 00202], lr: 0.000008, loss: 3.5745 +2023-11-26 15:53:20 - train: epoch 0092, iter [00200, 00202], lr: 0.000008, loss: 3.4393 +2023-11-26 15:53:49 - train: epoch 092, train_loss: 3.5194 +2023-11-26 16:22:04 - eval: epoch: 092, acc1: 90.630%, acc5: 99.371%, test_loss: 0.3859, per_image_load_time: 0.099ms, per_image_inference_time: 3.870ms +2023-11-26 16:22:08 - until epoch: 092, best_acc1: 90.630% +2023-11-26 16:22:08 - epoch 093 lr: 0.000008 +2023-11-26 16:23:56 - train: epoch 0093, iter [00010, 00202], lr: 0.000008, loss: 3.3714 +2023-11-26 16:25:41 - train: epoch 0093, iter [00020, 00202], lr: 0.000008, loss: 3.5354 +2023-11-26 16:27:26 - train: epoch 0093, iter [00030, 00202], lr: 0.000008, loss: 3.9049 +2023-11-26 16:29:12 - train: epoch 0093, iter [00040, 00202], lr: 0.000008, loss: 3.9402 +2023-11-26 16:30:57 - train: epoch 0093, iter [00050, 00202], lr: 0.000008, loss: 3.5633 +2023-11-26 16:32:42 - train: epoch 0093, iter [00060, 00202], lr: 0.000007, loss: 3.6770 +2023-11-26 16:34:27 - train: epoch 0093, iter [00070, 00202], lr: 0.000007, loss: 4.0481 +2023-11-26 16:36:12 - train: epoch 0093, iter [00080, 00202], lr: 0.000007, loss: 2.7821 +2023-11-26 16:37:57 - train: epoch 0093, iter [00090, 00202], lr: 0.000007, loss: 3.9529 +2023-11-26 16:39:42 - train: epoch 0093, iter [00100, 00202], lr: 0.000007, loss: 2.4586 +2023-11-26 16:41:27 - train: epoch 0093, iter [00110, 00202], lr: 0.000007, loss: 3.2339 +2023-11-26 16:43:12 - train: epoch 0093, iter [00120, 00202], lr: 0.000007, loss: 3.9070 +2023-11-26 16:44:57 - train: epoch 0093, iter [00130, 00202], lr: 0.000007, loss: 3.4611 +2023-11-26 16:46:42 - train: epoch 0093, iter [00140, 00202], lr: 0.000007, loss: 3.7535 +2023-11-26 16:48:28 - train: epoch 0093, iter [00150, 00202], lr: 0.000007, loss: 3.9671 +2023-11-26 16:50:13 - train: epoch 0093, iter [00160, 00202], lr: 0.000007, loss: 3.3583 +2023-11-26 16:51:58 - train: epoch 0093, iter [00170, 00202], lr: 0.000007, loss: 3.7233 +2023-11-26 16:53:43 - train: epoch 0093, iter [00180, 00202], lr: 0.000006, loss: 3.7650 +2023-11-26 16:55:28 - train: epoch 0093, iter [00190, 00202], lr: 0.000006, loss: 4.4580 +2023-11-26 16:57:13 - train: epoch 0093, iter [00200, 00202], lr: 0.000006, loss: 2.9312 +2023-11-26 16:57:42 - train: epoch 093, train_loss: 3.5112 +2023-11-26 17:25:46 - eval: epoch: 093, acc1: 90.638%, acc5: 99.378%, test_loss: 0.3837, per_image_load_time: 0.102ms, per_image_inference_time: 3.875ms +2023-11-26 17:25:51 - until epoch: 093, best_acc1: 90.638% +2023-11-26 17:25:51 - epoch 094 lr: 0.000006 +2023-11-26 17:27:39 - train: epoch 0094, iter [00010, 00202], lr: 0.000006, loss: 3.8007 +2023-11-26 17:29:24 - train: epoch 0094, iter [00020, 00202], lr: 0.000006, loss: 3.3695 +2023-11-26 17:31:09 - train: epoch 0094, iter [00030, 00202], lr: 0.000006, loss: 3.1122 +2023-11-26 17:32:54 - train: epoch 0094, iter [00040, 00202], lr: 0.000006, loss: 4.4997 +2023-11-26 17:34:39 - train: epoch 0094, iter [00050, 00202], lr: 0.000006, loss: 3.6924 +2023-11-26 17:36:24 - train: epoch 0094, iter [00060, 00202], lr: 0.000006, loss: 3.4800 +2023-11-26 17:38:09 - train: epoch 0094, iter [00070, 00202], lr: 0.000006, loss: 3.5249 +2023-11-26 17:39:54 - train: epoch 0094, iter [00080, 00202], lr: 0.000006, loss: 3.7790 +2023-11-26 17:41:39 - train: epoch 0094, iter [00090, 00202], lr: 0.000006, loss: 3.6644 +2023-11-26 17:43:24 - train: epoch 0094, iter [00100, 00202], lr: 0.000006, loss: 3.4038 +2023-11-26 17:45:09 - train: epoch 0094, iter [00110, 00202], lr: 0.000006, loss: 3.8476 +2023-11-26 17:46:54 - train: epoch 0094, iter [00120, 00202], lr: 0.000005, loss: 3.1104 +2023-11-26 17:48:40 - train: epoch 0094, iter [00130, 00202], lr: 0.000005, loss: 2.9685 +2023-11-26 17:50:25 - train: epoch 0094, iter [00140, 00202], lr: 0.000005, loss: 3.9008 +2023-11-26 17:52:10 - train: epoch 0094, iter [00150, 00202], lr: 0.000005, loss: 3.1221 +2023-11-26 17:53:55 - train: epoch 0094, iter [00160, 00202], lr: 0.000005, loss: 4.0017 +2023-11-26 17:55:40 - train: epoch 0094, iter [00170, 00202], lr: 0.000005, loss: 2.2299 +2023-11-26 17:57:25 - train: epoch 0094, iter [00180, 00202], lr: 0.000005, loss: 3.6210 +2023-11-26 17:59:10 - train: epoch 0094, iter [00190, 00202], lr: 0.000005, loss: 3.7658 +2023-11-26 18:00:55 - train: epoch 0094, iter [00200, 00202], lr: 0.000005, loss: 3.7353 +2023-11-26 18:01:23 - train: epoch 094, train_loss: 3.4675 +2023-11-26 18:29:34 - eval: epoch: 094, acc1: 90.648%, acc5: 99.380%, test_loss: 0.3846, per_image_load_time: 0.097ms, per_image_inference_time: 3.880ms +2023-11-26 18:29:38 - until epoch: 094, best_acc1: 90.648% +2023-11-26 18:29:38 - epoch 095 lr: 0.000005 +2023-11-26 18:31:27 - train: epoch 0095, iter [00010, 00202], lr: 0.000005, loss: 3.6889 +2023-11-26 18:33:12 - train: epoch 0095, iter [00020, 00202], lr: 0.000005, loss: 3.6372 +2023-11-26 18:34:57 - train: epoch 0095, iter [00030, 00202], lr: 0.000005, loss: 4.1441 +2023-11-26 18:36:43 - train: epoch 0095, iter [00040, 00202], lr: 0.000005, loss: 3.3590 +2023-11-26 18:38:28 - train: epoch 0095, iter [00050, 00202], lr: 0.000005, loss: 3.6855 +2023-11-26 18:40:13 - train: epoch 0095, iter [00060, 00202], lr: 0.000005, loss: 3.3676 +2023-11-26 18:41:58 - train: epoch 0095, iter [00070, 00202], lr: 0.000004, loss: 2.6916 +2023-11-26 18:43:44 - train: epoch 0095, iter [00080, 00202], lr: 0.000004, loss: 3.8952 +2023-11-26 18:45:29 - train: epoch 0095, iter [00090, 00202], lr: 0.000004, loss: 3.2519 +2023-11-26 18:47:14 - train: epoch 0095, iter [00100, 00202], lr: 0.000004, loss: 3.6427 +2023-11-26 18:48:59 - train: epoch 0095, iter [00110, 00202], lr: 0.000004, loss: 3.2121 +2023-11-26 18:50:44 - train: epoch 0095, iter [00120, 00202], lr: 0.000004, loss: 3.3943 +2023-11-26 18:52:30 - train: epoch 0095, iter [00130, 00202], lr: 0.000004, loss: 3.8540 +2023-11-26 18:54:15 - train: epoch 0095, iter [00140, 00202], lr: 0.000004, loss: 2.9819 +2023-11-26 18:56:00 - train: epoch 0095, iter [00150, 00202], lr: 0.000004, loss: 3.0788 +2023-11-26 18:57:45 - train: epoch 0095, iter [00160, 00202], lr: 0.000004, loss: 3.5365 +2023-11-26 18:59:31 - train: epoch 0095, iter [00170, 00202], lr: 0.000004, loss: 3.3000 +2023-11-26 19:01:16 - train: epoch 0095, iter [00180, 00202], lr: 0.000004, loss: 3.3901 +2023-11-26 19:03:01 - train: epoch 0095, iter [00190, 00202], lr: 0.000004, loss: 3.0762 +2023-11-26 19:04:46 - train: epoch 0095, iter [00200, 00202], lr: 0.000004, loss: 3.3672 +2023-11-26 19:05:15 - train: epoch 095, train_loss: 3.4688 +2023-11-26 19:33:24 - eval: epoch: 095, acc1: 90.673%, acc5: 99.380%, test_loss: 0.3792, per_image_load_time: 0.101ms, per_image_inference_time: 3.871ms +2023-11-26 19:33:29 - until epoch: 095, best_acc1: 90.673% +2023-11-26 19:33:29 - epoch 096 lr: 0.000004 +2023-11-26 19:35:16 - train: epoch 0096, iter [00010, 00202], lr: 0.000004, loss: 3.6385 +2023-11-26 19:37:01 - train: epoch 0096, iter [00020, 00202], lr: 0.000004, loss: 3.6445 +2023-11-26 19:38:47 - train: epoch 0096, iter [00030, 00202], lr: 0.000004, loss: 3.7354 +2023-11-26 19:40:32 - train: epoch 0096, iter [00040, 00202], lr: 0.000004, loss: 3.5950 +2023-11-26 19:42:17 - train: epoch 0096, iter [00050, 00202], lr: 0.000003, loss: 2.9857 +2023-11-26 19:44:02 - train: epoch 0096, iter [00060, 00202], lr: 0.000003, loss: 2.4867 +2023-11-26 19:45:47 - train: epoch 0096, iter [00070, 00202], lr: 0.000003, loss: 3.6161 +2023-11-26 19:47:32 - train: epoch 0096, iter [00080, 00202], lr: 0.000003, loss: 2.8617 +2023-11-26 19:49:17 - train: epoch 0096, iter [00090, 00202], lr: 0.000003, loss: 3.5202 +2023-11-26 19:51:03 - train: epoch 0096, iter [00100, 00202], lr: 0.000003, loss: 2.9897 +2023-11-26 19:52:48 - train: epoch 0096, iter [00110, 00202], lr: 0.000003, loss: 2.5320 +2023-11-26 19:54:33 - train: epoch 0096, iter [00120, 00202], lr: 0.000003, loss: 2.9856 +2023-11-26 19:56:18 - train: epoch 0096, iter [00130, 00202], lr: 0.000003, loss: 3.5683 +2023-11-26 19:58:03 - train: epoch 0096, iter [00140, 00202], lr: 0.000003, loss: 3.5570 +2023-11-26 19:59:48 - train: epoch 0096, iter [00150, 00202], lr: 0.000003, loss: 2.8702 +2023-11-26 20:01:33 - train: epoch 0096, iter [00160, 00202], lr: 0.000003, loss: 3.3576 +2023-11-26 20:03:18 - train: epoch 0096, iter [00170, 00202], lr: 0.000003, loss: 3.9762 +2023-11-26 20:05:04 - train: epoch 0096, iter [00180, 00202], lr: 0.000003, loss: 2.1813 +2023-11-26 20:06:49 - train: epoch 0096, iter [00190, 00202], lr: 0.000003, loss: 3.1714 +2023-11-26 20:08:34 - train: epoch 0096, iter [00200, 00202], lr: 0.000003, loss: 3.3952 +2023-11-26 20:09:02 - train: epoch 096, train_loss: 3.4081 +2023-11-26 20:37:11 - eval: epoch: 096, acc1: 90.669%, acc5: 99.383%, test_loss: 0.3829, per_image_load_time: 0.102ms, per_image_inference_time: 3.874ms +2023-11-26 20:37:15 - until epoch: 096, best_acc1: 90.673% +2023-11-26 20:37:15 - epoch 097 lr: 0.000003 +2023-11-26 20:39:03 - train: epoch 0097, iter [00010, 00202], lr: 0.000003, loss: 3.6907 +2023-11-26 20:40:48 - train: epoch 0097, iter [00020, 00202], lr: 0.000003, loss: 3.7816 +2023-11-26 20:42:34 - train: epoch 0097, iter [00030, 00202], lr: 0.000003, loss: 3.8339 +2023-11-26 20:44:19 - train: epoch 0097, iter [00040, 00202], lr: 0.000003, loss: 3.2371 +2023-11-26 20:46:04 - train: epoch 0097, iter [00050, 00202], lr: 0.000003, loss: 4.1299 +2023-11-26 20:47:49 - train: epoch 0097, iter [00060, 00202], lr: 0.000002, loss: 3.8866 +2023-11-26 20:49:34 - train: epoch 0097, iter [00070, 00202], lr: 0.000002, loss: 2.8677 +2023-11-26 20:51:19 - train: epoch 0097, iter [00080, 00202], lr: 0.000002, loss: 3.0239 +2023-11-26 20:53:04 - train: epoch 0097, iter [00090, 00202], lr: 0.000002, loss: 3.0287 +2023-11-26 20:54:50 - train: epoch 0097, iter [00100, 00202], lr: 0.000002, loss: 3.7488 +2023-11-26 20:56:35 - train: epoch 0097, iter [00110, 00202], lr: 0.000002, loss: 3.1269 +2023-11-26 20:58:20 - train: epoch 0097, iter [00120, 00202], lr: 0.000002, loss: 3.1344 +2023-11-26 21:00:05 - train: epoch 0097, iter [00130, 00202], lr: 0.000002, loss: 2.8615 +2023-11-26 21:01:50 - train: epoch 0097, iter [00140, 00202], lr: 0.000002, loss: 3.2634 +2023-11-26 21:03:35 - train: epoch 0097, iter [00150, 00202], lr: 0.000002, loss: 2.7895 +2023-11-26 21:05:21 - train: epoch 0097, iter [00160, 00202], lr: 0.000002, loss: 3.0693 +2023-11-26 21:07:06 - train: epoch 0097, iter [00170, 00202], lr: 0.000002, loss: 3.1079 +2023-11-26 21:08:51 - train: epoch 0097, iter [00180, 00202], lr: 0.000002, loss: 3.1190 +2023-11-26 21:10:36 - train: epoch 0097, iter [00190, 00202], lr: 0.000002, loss: 3.3927 +2023-11-26 21:12:21 - train: epoch 0097, iter [00200, 00202], lr: 0.000002, loss: 3.2615 +2023-11-26 21:12:50 - train: epoch 097, train_loss: 3.4743 +2023-11-26 21:40:52 - eval: epoch: 097, acc1: 90.674%, acc5: 99.385%, test_loss: 0.3833, per_image_load_time: 0.104ms, per_image_inference_time: 3.872ms +2023-11-26 21:40:57 - until epoch: 097, best_acc1: 90.674% +2023-11-26 21:40:57 - epoch 098 lr: 0.000002 +2023-11-26 21:42:44 - train: epoch 0098, iter [00010, 00202], lr: 0.000002, loss: 3.8433 +2023-11-26 21:44:29 - train: epoch 0098, iter [00020, 00202], lr: 0.000002, loss: 3.5564 +2023-11-26 21:46:14 - train: epoch 0098, iter [00030, 00202], lr: 0.000002, loss: 3.0258 +2023-11-26 21:47:59 - train: epoch 0098, iter [00040, 00202], lr: 0.000002, loss: 3.4780 +2023-11-26 21:49:45 - train: epoch 0098, iter [00050, 00202], lr: 0.000002, loss: 3.5637 +2023-11-26 21:51:30 - train: epoch 0098, iter [00060, 00202], lr: 0.000002, loss: 3.1561 +2023-11-26 21:53:15 - train: epoch 0098, iter [00070, 00202], lr: 0.000002, loss: 2.9781 +2023-11-26 21:55:00 - train: epoch 0098, iter [00080, 00202], lr: 0.000002, loss: 3.4065 +2023-11-26 21:56:45 - train: epoch 0098, iter [00090, 00202], lr: 0.000002, loss: 3.6270 +2023-11-26 21:58:30 - train: epoch 0098, iter [00100, 00202], lr: 0.000002, loss: 3.4630 +2023-11-26 22:00:15 - train: epoch 0098, iter [00110, 00202], lr: 0.000002, loss: 3.5200 +2023-11-26 22:02:00 - train: epoch 0098, iter [00120, 00202], lr: 0.000002, loss: 3.6435 +2023-11-26 22:03:45 - train: epoch 0098, iter [00130, 00202], lr: 0.000002, loss: 3.8987 +2023-11-26 22:05:30 - train: epoch 0098, iter [00140, 00202], lr: 0.000002, loss: 3.6230 +2023-11-26 22:07:16 - train: epoch 0098, iter [00150, 00202], lr: 0.000002, loss: 3.5002 +2023-11-26 22:09:01 - train: epoch 0098, iter [00160, 00202], lr: 0.000002, loss: 4.1184 +2023-11-26 22:10:46 - train: epoch 0098, iter [00170, 00202], lr: 0.000002, loss: 3.5711 +2023-11-26 22:12:31 - train: epoch 0098, iter [00180, 00202], lr: 0.000001, loss: 3.8061 +2023-11-26 22:14:16 - train: epoch 0098, iter [00190, 00202], lr: 0.000001, loss: 3.4126 +2023-11-26 22:16:01 - train: epoch 0098, iter [00200, 00202], lr: 0.000001, loss: 3.6936 +2023-11-26 22:16:30 - train: epoch 098, train_loss: 3.4530 +2023-11-26 22:44:38 - eval: epoch: 098, acc1: 90.681%, acc5: 99.386%, test_loss: 0.3781, per_image_load_time: 0.106ms, per_image_inference_time: 3.872ms +2023-11-26 22:44:42 - until epoch: 098, best_acc1: 90.681% +2023-11-26 22:44:42 - epoch 099 lr: 0.000001 +2023-11-26 22:46:31 - train: epoch 0099, iter [00010, 00202], lr: 0.000001, loss: 3.1804 +2023-11-26 22:48:16 - train: epoch 0099, iter [00020, 00202], lr: 0.000001, loss: 2.9274 +2023-11-26 22:50:01 - train: epoch 0099, iter [00030, 00202], lr: 0.000001, loss: 3.1176 +2023-11-26 22:51:45 - train: epoch 0099, iter [00040, 00202], lr: 0.000001, loss: 3.7542 +2023-11-26 22:53:30 - train: epoch 0099, iter [00050, 00202], lr: 0.000001, loss: 2.6749 +2023-11-26 22:55:15 - train: epoch 0099, iter [00060, 00202], lr: 0.000001, loss: 2.8065 +2023-11-26 22:57:00 - train: epoch 0099, iter [00070, 00202], lr: 0.000001, loss: 4.1493 +2023-11-26 22:58:45 - train: epoch 0099, iter [00080, 00202], lr: 0.000001, loss: 3.7135 +2023-11-26 23:00:30 - train: epoch 0099, iter [00090, 00202], lr: 0.000001, loss: 3.0914 +2023-11-26 23:02:15 - train: epoch 0099, iter [00100, 00202], lr: 0.000001, loss: 2.9837 +2023-11-26 23:04:00 - train: epoch 0099, iter [00110, 00202], lr: 0.000001, loss: 3.0729 +2023-11-26 23:05:45 - train: epoch 0099, iter [00120, 00202], lr: 0.000001, loss: 3.4299 +2023-11-26 23:07:30 - train: epoch 0099, iter [00130, 00202], lr: 0.000001, loss: 3.7217 +2023-11-26 23:09:15 - train: epoch 0099, iter [00140, 00202], lr: 0.000001, loss: 3.4103 +2023-11-26 23:11:00 - train: epoch 0099, iter [00150, 00202], lr: 0.000001, loss: 3.1931 +2023-11-26 23:12:45 - train: epoch 0099, iter [00160, 00202], lr: 0.000001, loss: 3.6328 +2023-11-26 23:14:31 - train: epoch 0099, iter [00170, 00202], lr: 0.000001, loss: 3.3670 +2023-11-26 23:16:16 - train: epoch 0099, iter [00180, 00202], lr: 0.000001, loss: 3.7985 +2023-11-26 23:18:01 - train: epoch 0099, iter [00190, 00202], lr: 0.000001, loss: 3.1777 +2023-11-26 23:19:46 - train: epoch 0099, iter [00200, 00202], lr: 0.000001, loss: 3.3069 +2023-11-26 23:20:14 - train: epoch 099, train_loss: 3.4973 +2023-11-26 23:48:22 - eval: epoch: 099, acc1: 90.693%, acc5: 99.389%, test_loss: 0.3808, per_image_load_time: 0.106ms, per_image_inference_time: 3.871ms +2023-11-26 23:48:27 - until epoch: 099, best_acc1: 90.693% +2023-11-26 23:48:27 - epoch 100 lr: 0.000001 +2023-11-26 23:50:15 - train: epoch 0100, iter [00010, 00202], lr: 0.000001, loss: 3.9924 +2023-11-26 23:52:00 - train: epoch 0100, iter [00020, 00202], lr: 0.000001, loss: 3.5197 +2023-11-26 23:53:45 - train: epoch 0100, iter [00030, 00202], lr: 0.000001, loss: 2.9738 +2023-11-26 23:55:30 - train: epoch 0100, iter [00040, 00202], lr: 0.000001, loss: 3.7255 +2023-11-26 23:57:15 - train: epoch 0100, iter [00050, 00202], lr: 0.000001, loss: 3.4087 +2023-11-26 23:59:00 - train: epoch 0100, iter [00060, 00202], lr: 0.000001, loss: 2.7187 +2023-11-27 00:00:46 - train: epoch 0100, iter [00070, 00202], lr: 0.000001, loss: 3.7932 +2023-11-27 00:02:31 - train: epoch 0100, iter [00080, 00202], lr: 0.000001, loss: 3.4404 +2023-11-27 00:04:16 - train: epoch 0100, iter [00090, 00202], lr: 0.000001, loss: 2.8661 +2023-11-27 00:06:01 - train: epoch 0100, iter [00100, 00202], lr: 0.000001, loss: 3.9444 +2023-11-27 00:07:46 - train: epoch 0100, iter [00110, 00202], lr: 0.000001, loss: 2.2992 +2023-11-27 00:09:31 - train: epoch 0100, iter [00120, 00202], lr: 0.000001, loss: 3.0916 +2023-11-27 00:11:16 - train: epoch 0100, iter [00130, 00202], lr: 0.000001, loss: 3.6644 +2023-11-27 00:13:01 - train: epoch 0100, iter [00140, 00202], lr: 0.000001, loss: 3.2715 +2023-11-27 00:14:46 - train: epoch 0100, iter [00150, 00202], lr: 0.000001, loss: 2.7133 +2023-11-27 00:16:31 - train: epoch 0100, iter [00160, 00202], lr: 0.000001, loss: 3.8491 +2023-11-27 00:18:17 - train: epoch 0100, iter [00170, 00202], lr: 0.000001, loss: 3.4898 +2023-11-27 00:20:02 - train: epoch 0100, iter [00180, 00202], lr: 0.000001, loss: 3.4861 +2023-11-27 00:21:47 - train: epoch 0100, iter [00190, 00202], lr: 0.000001, loss: 3.3215 +2023-11-27 00:23:32 - train: epoch 0100, iter [00200, 00202], lr: 0.000001, loss: 3.9259 +2023-11-27 00:24:00 - train: epoch 100, train_loss: 3.4519 +2023-11-27 00:52:11 - eval: epoch: 100, acc1: 90.690%, acc5: 99.388%, test_loss: 0.3795, per_image_load_time: 0.106ms, per_image_inference_time: 3.872ms +2023-11-27 00:52:15 - until epoch: 100, best_acc1: 90.693% +2023-11-27 00:52:15 - train done. model: vit_large_patch16, train time: 118.569 hours, best_acc1: 90.693%