diff --git "a/zoedepth_u4k/fine_pretrain/20240313_205222.log" "b/zoedepth_u4k/fine_pretrain/20240313_205222.log" new file mode 100644--- /dev/null +++ "b/zoedepth_u4k/fine_pretrain/20240313_205222.log" @@ -0,0 +1,3138 @@ +2024/03/13 20:52:24 - patchstitcher - INFO - +------------------------------------------------------------ +System environment: + sys.platform: linux + Python: 3.8.18 | packaged by conda-forge | (default, Oct 10 2023, 15:44:36) [GCC 12.3.0] + CUDA available: True + numpy_random_seed: 621 + GPU 0,1,2,3: NVIDIA A100-SXM4-80GB + CUDA_HOME: /sw/rl9g/cuda/11.8/rl9_binary + NVCC: Cuda compilation tools, release 11.8, V11.8.89 + GCC: gcc (GCC) 11.3.1 20220421 (Red Hat 11.3.1-2) + PyTorch: 2.1.2 + PyTorch compiling details: PyTorch built with: + - GCC 9.3 + - C++ Version: 201703 + - Intel(R) oneAPI Math Kernel Library Version 2022.1-Product Build 20220311 for Intel(R) 64 architecture applications + - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4) + - OpenMP 201511 (a.k.a. OpenMP 4.5) + - LAPACK is enabled (usually provided by MKL) + - NNPACK is enabled + - CPU capability usage: AVX2 + - CUDA Runtime 11.8 + - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_90,code=sm_90;-gencode;arch=compute_37,code=compute_37 + - CuDNN 8.7 + - Magma 2.6.1 + - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.8, CUDNN_VERSION=8.7.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.1.2, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, + + TorchVision: 0.16.2 + OpenCV: 4.8.1 + MMEngine: 0.10.2 + +Runtime environment: + cudnn_benchmark: True + mp_cfg: {'mp_start_method': 'forkserver'} + dist_cfg: {'backend': 'nccl'} + seed: 621 + Distributed launcher: pytorch + Distributed training: True + GPU number: 4 +------------------------------------------------------------ + +2024/03/13 20:52:24 - patchstitcher - INFO - Config: +collect_input_args = [ + 'image_lr', + 'crops_image_hr', + 'depth_gt', + 'crop_depths', + 'bboxs', + 'image_hr', +] +convert_syncbn = True +debug = True +env_cfg = dict( + cudnn_benchmark=True, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='forkserver')) +find_unused_parameters = True +general_dataloader = dict( + batch_size=1, + dataset=dict( + dataset_name='', gt_dir=None, rgb_image_dir='', type='ImageDataset'), + num_workers=2) +launcher = 'pytorch' +log_name = 'fine_pretrain' +max_depth = 80 +min_depth = 0.001 +model = dict( + coarse_branch=dict( + attractor_alpha=1000, + attractor_gamma=2, + attractor_kind='mean', + attractor_type='inv', + aug=True, + bin_centers_type='softplus', + bin_embedding_dim=128, + clip_grad=0.1, + dataset='nyu', + distributed=True, + do_resize=False, + force_keep_ar=True, + freeze_midas_bn=True, + gpu='NULL', + img_size=[ + 384, + 512, + ], + inverse_midas=False, + log_images_every=0.1, + max_depth=80, + max_temp=50.0, + max_translation=100, + memory_efficient=True, + midas_model_type='DPT_BEiT_L_384', + min_depth=0.001, + min_temp=0.0212, + model='zoedepth', + n_attractors=[ + 16, + 8, + 4, + 1, + ], + n_bins=64, + name='ZoeDepth', + notes='', + output_distribution='logbinomial', + prefetch=False, + pretrained_resource= + 'local::./work_dir/ZoeDepthv1_30-Dec_16-29-4e2bc436e4e1_best.pt', + print_losses=False, + project='ZoeDepth', + random_crop=False, + random_translate=False, + root='.', + save_dir='', + shared_dict='NULL', + tags='', + train_midas=True, + translate_prob=0.2, + type='ZoeDepth', + uid='NULL', + use_amp=False, + use_pretrained_midas=True, + use_shared_dict=False, + validate_every=0.25, + version_name='v1', + workers=16), + fine_branch=dict( + attractor_alpha=1000, + attractor_gamma=2, + attractor_kind='mean', + attractor_type='inv', + aug=True, + bin_centers_type='softplus', + bin_embedding_dim=128, + clip_grad=0.1, + dataset='nyu', + distributed=True, + do_resize=False, + force_keep_ar=True, + freeze_midas_bn=True, + gpu='NULL', + img_size=[ + 384, + 512, + ], + inverse_midas=False, + log_images_every=0.1, + max_depth=80, + max_temp=50.0, + max_translation=100, + memory_efficient=True, + midas_model_type='DPT_BEiT_L_384', + min_depth=0.001, + min_temp=0.0212, + model='zoedepth', + n_attractors=[ + 16, + 8, + 4, + 1, + ], + n_bins=64, + name='ZoeDepth', + notes='', + output_distribution='logbinomial', + prefetch=False, + pretrained_resource= + 'local::./work_dir/ZoeDepthv1_30-Dec_16-29-4e2bc436e4e1_best.pt', + print_losses=False, + project='ZoeDepth', + random_crop=False, + random_translate=False, + root='.', + save_dir='', + shared_dict='NULL', + tags='', + train_midas=True, + translate_prob=0.2, + type='ZoeDepth', + uid='NULL', + use_amp=False, + use_pretrained_midas=True, + use_shared_dict=False, + validate_every=0.25, + version_name='v1', + workers=16), + max_depth=80, + min_depth=0.001, + sigloss=dict(type='SILogLoss'), + target='fine', + type='BaselinePretrain') +optim_wrapper = dict( + clip_grad=dict(max_norm=0.1, norm_type=2, type='norm'), + optimizer=dict(lr=0.0002, type='AdamW', weight_decay=0.01), + paramwise_cfg=dict( + bypass_duplicate=True, + custom_keys=dict( + {'fine_branch.core': dict(decay_mult=1.0, lr_mult=0.1)}))) +param_scheduler = dict( + base_momentum=0.85, + cycle_momentum=True, + div_factor=1, + final_div_factor=10000, + max_momentum=0.95, + pct_start=0.5, + three_phase=False) +project = 'patchfusion' +tags = [ + 'fine', +] +test_in_dataloader = dict( + batch_size=1, + dataset=dict( + data_root='./data/u4k', + max_depth=80, + min_depth=0.001, + mode='infer', + split='./data/u4k/splits/test.txt', + transform_cfg=dict(network_process_size=[ + 384, + 512, + ]), + type='UnrealStereo4kDataset'), + num_workers=2) +test_out_dataloader = dict( + batch_size=1, + dataset=dict( + data_root='./data/u4k', + max_depth=80, + min_depth=0.001, + mode='infer', + split='./data/u4k/splits/test_out.txt', + transform_cfg=dict(network_process_size=[ + 384, + 512, + ]), + type='UnrealStereo4kDataset'), + num_workers=2) +train_cfg = dict( + eval_start=0, + log_interval=100, + max_epochs=24, + save_checkpoint_interval=24, + train_log_img_interval=100, + val_interval=2, + val_log_img_interval=50, + val_type='epoch_base') +train_dataloader = dict( + batch_size=4, + dataset=dict( + data_root='./data/u4k', + max_depth=80, + min_depth=0.001, + mode='train', + split='./data/u4k/splits/train.txt', + transform_cfg=dict( + degree=1.0, network_process_size=[ + 384, + 512, + ], random_crop=True), + type='UnrealStereo4kDataset'), + num_workers=4) +val_dataloader = dict( + batch_size=1, + dataset=dict( + data_root='./data/u4k', + max_depth=80, + min_depth=0.001, + mode='infer', + split='./data/u4k/splits/val.txt', + transform_cfg=dict(network_process_size=[ + 384, + 512, + ]), + type='UnrealStereo4kDataset'), + num_workers=2) +work_dir = './work_dir/fine_pretrain' +zoe_depth_config = dict( + attractor_alpha=1000, + attractor_gamma=2, + attractor_kind='mean', + attractor_type='inv', + aug=True, + bin_centers_type='softplus', + bin_embedding_dim=128, + clip_grad=0.1, + dataset='nyu', + distributed=True, + do_resize=False, + force_keep_ar=True, + freeze_midas_bn=True, + gpu='NULL', + img_size=[ + 384, + 512, + ], + inverse_midas=False, + log_images_every=0.1, + max_depth=80, + max_temp=50.0, + max_translation=100, + memory_efficient=True, + midas_model_type='DPT_BEiT_L_384', + min_depth=0.001, + min_temp=0.0212, + model='zoedepth', + n_attractors=[ + 16, + 8, + 4, + 1, + ], + n_bins=64, + name='ZoeDepth', + notes='', + output_distribution='logbinomial', + prefetch=False, + pretrained_resource= + 'local::./work_dir/ZoeDepthv1_30-Dec_16-29-4e2bc436e4e1_best.pt', + print_losses=False, + project='ZoeDepth', + random_crop=False, + random_translate=False, + root='.', + save_dir='', + shared_dict='NULL', + tags='', + train_midas=True, + translate_prob=0.2, + type='ZoeDepth', + uid='NULL', + use_amp=False, + use_pretrained_midas=True, + use_shared_dict=False, + validate_every=0.25, + version_name='v1', + workers=16) + +2024/03/13 20:52:45 - patchstitcher - INFO - Loading deepnet from local::./work_dir/ZoeDepthv1_30-Dec_16-29-4e2bc436e4e1_best.pt +2024/03/13 20:52:46 - patchstitcher - INFO - Current zoedepth.core.prep.resizer is +2024/03/13 20:52:46 - patchstitcher - INFO - DistributedDataParallel( + (module): BaselinePretrain( + (fine_branch): ZoeDepth( + (core): MidasCore( + (core): DPTDepthModel( + (pretrained): Module( + (model): Beit( + (patch_embed): PatchEmbed( + (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16)) + (norm): Identity() + ) + (pos_drop): Dropout(p=0.0, inplace=False) + (blocks): ModuleList( + (0-23): 24 x Block( + (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + (qkv): Linear(in_features=1024, out_features=3072, bias=False) + (attn_drop): Dropout(p=0.0, inplace=False) + (proj): Linear(in_features=1024, out_features=1024, bias=True) + (proj_drop): Dropout(p=0.0, inplace=False) + ) + (drop_path1): Identity() + (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=1024, out_features=4096, bias=True) + (act): GELU(approximate='none') + (drop1): Dropout(p=0.0, inplace=False) + (norm): Identity() + (fc2): Linear(in_features=4096, out_features=1024, bias=True) + (drop2): Dropout(p=0.0, inplace=False) + ) + (drop_path2): Identity() + ) + ) + (norm): Identity() + (fc_norm): LayerNorm((1024,), eps=1e-06, elementwise_affine=True) + (head_drop): Dropout(p=0.0, inplace=False) + (head): Linear(in_features=1024, out_features=1000, bias=True) + ) + (act_postprocess1): Sequential( + (0): ProjectReadout( + (project): Sequential( + (0): Linear(in_features=2048, out_features=1024, bias=True) + (1): GELU(approximate='none') + ) + ) + (1): Transpose() + (2): Unflatten(dim=2, unflattened_size=torch.Size([24, 24])) + (3): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1)) + (4): ConvTranspose2d(256, 256, kernel_size=(4, 4), stride=(4, 4)) + ) + (act_postprocess2): Sequential( + (0): ProjectReadout( + (project): Sequential( + (0): Linear(in_features=2048, out_features=1024, bias=True) + (1): GELU(approximate='none') + ) + ) + (1): Transpose() + (2): Unflatten(dim=2, unflattened_size=torch.Size([24, 24])) + (3): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1)) + (4): ConvTranspose2d(512, 512, kernel_size=(2, 2), stride=(2, 2)) + ) + (act_postprocess3): Sequential( + (0): ProjectReadout( + (project): Sequential( + (0): Linear(in_features=2048, out_features=1024, bias=True) + (1): GELU(approximate='none') + ) + ) + (1): Transpose() + (2): Unflatten(dim=2, unflattened_size=torch.Size([24, 24])) + (3): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1)) + ) + (act_postprocess4): Sequential( + (0): ProjectReadout( + (project): Sequential( + (0): Linear(in_features=2048, out_features=1024, bias=True) + (1): GELU(approximate='none') + ) + ) + (1): Transpose() + (2): Unflatten(dim=2, unflattened_size=torch.Size([24, 24])) + (3): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1)) + (4): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + ) + ) + (scratch): Module( + (layer1_rn): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (layer2_rn): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (layer3_rn): Conv2d(1024, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (layer4_rn): Conv2d(1024, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (refinenet1): FeatureFusionBlock_custom( + (out_conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) + (resConfUnit1): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (resConfUnit2): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (refinenet2): FeatureFusionBlock_custom( + (out_conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) + (resConfUnit1): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (resConfUnit2): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (refinenet3): FeatureFusionBlock_custom( + (out_conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) + (resConfUnit1): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (resConfUnit2): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (refinenet4): FeatureFusionBlock_custom( + (out_conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) + (resConfUnit1): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (resConfUnit2): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (output_conv): Sequential( + (0): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (1): Interpolate() + (2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (3): ReLU(inplace=True) + (4): Conv2d(32, 1, kernel_size=(1, 1), stride=(1, 1)) + (5): ReLU(inplace=True) + (6): Identity() + ) + ) + ) + ) + (conv2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) + (seed_bin_regressor): SeedBinRegressorUnnormed( + (_net): Sequential( + (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1)) + (3): Softplus(beta=1, threshold=20) + ) + ) + (seed_projector): Projector( + (_net): Sequential( + (0): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + (projectors): ModuleList( + (0-3): 4 x Projector( + (_net): Sequential( + (0): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + ) + (attractors): ModuleList( + (0): AttractorLayerUnnormed( + (_net): Sequential( + (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1)) + (3): Softplus(beta=1, threshold=20) + ) + ) + (1): AttractorLayerUnnormed( + (_net): Sequential( + (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(128, 8, kernel_size=(1, 1), stride=(1, 1)) + (3): Softplus(beta=1, threshold=20) + ) + ) + (2): AttractorLayerUnnormed( + (_net): Sequential( + (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(128, 4, kernel_size=(1, 1), stride=(1, 1)) + (3): Softplus(beta=1, threshold=20) + ) + ) + (3): AttractorLayerUnnormed( + (_net): Sequential( + (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1)) + (3): Softplus(beta=1, threshold=20) + ) + ) + ) + (conditional_log_binomial): ConditionalLogBinomial( + (log_binomial_transform): LogBinomial() + (mlp): Sequential( + (0): Conv2d(161, 80, kernel_size=(1, 1), stride=(1, 1)) + (1): GELU(approximate='none') + (2): Conv2d(80, 4, kernel_size=(1, 1), stride=(1, 1)) + (3): Softplus(beta=1, threshold=20) + ) + ) + ) + (sigloss): SILogLoss() + ) +) +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.cls_token:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.cls_token:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.cls_token:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.cls_token:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.patch_embed.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.patch_embed.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.patch_embed.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.patch_embed.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.patch_embed.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.patch_embed.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.patch_embed.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.patch_embed.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.0.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.1.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.2.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.3.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.4.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.5.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.6.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.7.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.8.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.9.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.10.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.11.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.12.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.13.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.14.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.15.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.16.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.17.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.18.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.19.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.20.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.21.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.22.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.gamma_1:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.gamma_1:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.gamma_1:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.gamma_1:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.gamma_2:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.gamma_2:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.gamma_2:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.gamma_2:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.q_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.q_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.q_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.q_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.v_bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.v_bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.v_bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.v_bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.qkv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.qkv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.qkv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.qkv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.proj.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.proj.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.proj.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.proj.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.proj.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.proj.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.proj.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.attn.proj.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.norm2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.blocks.23.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.fc_norm.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.fc_norm.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.fc_norm.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.fc_norm.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.fc_norm.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.fc_norm.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.fc_norm.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.fc_norm.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.head.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.head.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.head.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.head.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.head.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.head.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.head.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.model.head.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.0.project.0.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.0.project.0.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.0.project.0.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.0.project.0.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.0.project.0.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.0.project.0.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.0.project.0.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.0.project.0.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.3.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.3.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.3.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.3.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.3.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.3.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.3.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.3.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.4.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.4.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.4.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.4.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.4.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.4.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.4.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess1.4.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.0.project.0.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.0.project.0.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.0.project.0.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.0.project.0.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.0.project.0.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.0.project.0.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.0.project.0.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.0.project.0.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.3.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.3.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.3.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.3.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.3.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.3.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.3.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.3.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.4.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.4.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.4.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.4.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.4.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.4.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.4.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess2.4.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.0.project.0.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.0.project.0.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.0.project.0.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.0.project.0.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.0.project.0.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.0.project.0.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.0.project.0.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.0.project.0.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.3.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.3.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.3.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.3.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.3.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.3.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.3.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess3.3.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.0.project.0.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.0.project.0.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.0.project.0.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.0.project.0.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.0.project.0.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.0.project.0.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.0.project.0.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.0.project.0.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.3.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.3.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.3.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.3.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.3.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.3.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.3.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.3.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.4.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.4.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.4.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.4.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.4.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.4.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.4.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.pretrained.act_postprocess4.4.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer1_rn.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer1_rn.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer1_rn.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer1_rn.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer2_rn.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer2_rn.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer2_rn.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer2_rn.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer3_rn.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer3_rn.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer3_rn.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer3_rn.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer4_rn.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer4_rn.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer4_rn.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.layer4_rn.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.out_conv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.out_conv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.out_conv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.out_conv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.out_conv.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.out_conv.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.out_conv.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.out_conv.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.out_conv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.out_conv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.out_conv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.out_conv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.out_conv.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.out_conv.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.out_conv.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.out_conv.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.out_conv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.out_conv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.out_conv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.out_conv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.out_conv.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.out_conv.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.out_conv.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.out_conv.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.out_conv.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.out_conv.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.out_conv.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.out_conv.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.out_conv.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.out_conv.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.out_conv.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.out_conv.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.0.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.0.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.0.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.0.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.0.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.0.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.0.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.0.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.2.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.2.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.2.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.2.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.2.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.2.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.2.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.2.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.4.weight:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.4.weight:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.4.weight:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.4.weight:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.4.bias:lr=2e-05 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.4.bias:weight_decay=0.01 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.4.bias:lr_mult=0.1 +2024/03/13 20:52:54 - patchstitcher - INFO - paramwise_options -- fine_branch.core.core.scratch.output_conv.4.bias:decay_mult=1.0 +2024/03/13 20:52:54 - patchstitcher - INFO - successfully init trainer +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.cls_token +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.patch_embed.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.patch_embed.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.0.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.1.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.2.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.3.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.4.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.5.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.6.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.7.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.8.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.9.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.10.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.11.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.12.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.13.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.14.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.15.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.16.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.17.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.18.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.19.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.20.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.21.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.22.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.gamma_1 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.gamma_2 +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.norm1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.norm1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.attn.q_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.attn.v_bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.attn.relative_position_bias_table +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.attn.qkv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.attn.proj.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.attn.proj.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.norm2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.norm2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.mlp.fc1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.mlp.fc1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.mlp.fc2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.blocks.23.mlp.fc2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.fc_norm.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.fc_norm.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.head.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.model.head.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess1.0.project.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess1.0.project.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess1.3.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess1.3.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess1.4.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess1.4.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess2.0.project.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess2.0.project.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess2.3.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess2.3.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess2.4.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess2.4.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess3.0.project.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess3.0.project.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess3.3.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess3.3.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess4.0.project.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess4.0.project.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess4.3.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess4.3.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess4.4.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.pretrained.act_postprocess4.4.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.layer1_rn.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.layer2_rn.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.layer3_rn.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.layer4_rn.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet1.out_conv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet1.out_conv.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet2.out_conv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet2.out_conv.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet3.out_conv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet3.out_conv.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet4.out_conv.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet4.out_conv.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.output_conv.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.output_conv.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.output_conv.2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.output_conv.2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.output_conv.4.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.core.core.scratch.output_conv.4.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.conv2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.conv2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.seed_bin_regressor._net.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.seed_bin_regressor._net.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.seed_bin_regressor._net.2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.seed_bin_regressor._net.2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.seed_projector._net.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.seed_projector._net.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.seed_projector._net.2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.seed_projector._net.2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.0._net.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.0._net.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.0._net.2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.0._net.2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.1._net.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.1._net.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.1._net.2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.1._net.2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.2._net.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.2._net.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.2._net.2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.2._net.2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.3._net.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.3._net.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.3._net.2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.projectors.3._net.2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.0._net.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.0._net.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.0._net.2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.0._net.2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.1._net.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.1._net.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.1._net.2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.1._net.2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.2._net.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.2._net.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.2._net.2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.2._net.2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.3._net.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.3._net.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.3._net.2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.attractors.3._net.2.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.conditional_log_binomial.mlp.0.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.conditional_log_binomial.mlp.0.bias +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.conditional_log_binomial.mlp.2.weight +2024/03/13 20:52:54 - patchstitcher - INFO - training param: module.fine_branch.conditional_log_binomial.mlp.2.bias +2024/03/13 20:56:03 - patchstitcher - INFO - Epoch: [01/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.599234938621521 - fine_loss: 1.599234938621521 +2024/03/13 20:58:18 - patchstitcher - INFO - Epoch: [01/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 2.532805919647217 - fine_loss: 2.532805919647217 +2024/03/13 21:00:37 - patchstitcher - INFO - Epoch: [01/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 2.651911735534668 - fine_loss: 2.651911735534668 +2024/03/13 21:02:50 - patchstitcher - INFO - Epoch: [01/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.3778038024902344 - fine_loss: 1.3778038024902344 +2024/03/13 21:07:07 - patchstitcher - INFO - Epoch: [02/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.3319225311279297 - fine_loss: 1.3319225311279297 +2024/03/13 21:09:22 - patchstitcher - INFO - Epoch: [02/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.8603584170341492 - fine_loss: 0.8603584170341492 +2024/03/13 21:11:36 - patchstitcher - INFO - Epoch: [02/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 3.489706516265869 - fine_loss: 3.489706516265869 +2024/03/13 21:13:48 - patchstitcher - INFO - Epoch: [02/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.9214969873428345 - fine_loss: 1.9214969873428345 +2024/03/13 21:16:01 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+----------+----------+-----------+-----------+------------+----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+----------+----------+-----------+-----------+------------+----------+-----------+ +| 0.7625762 | 0.9453185 | 0.9828492 | 0.179127 | 1.935254 | 0.0711208 | 0.2121196 | 17.9201627 | 0.340409 | 1.4841475 | ++-----------+-----------+-----------+----------+----------+-----------+-----------+------------+----------+-----------+ +2024/03/13 21:18:19 - patchstitcher - INFO - Epoch: [03/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.2977005243301392 - fine_loss: 1.2977005243301392 +2024/03/13 21:20:30 - patchstitcher - INFO - Epoch: [03/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.1420713663101196 - fine_loss: 1.1420713663101196 +2024/03/13 21:22:44 - patchstitcher - INFO - Epoch: [03/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.5140266418457031 - fine_loss: 1.5140266418457031 +2024/03/13 21:25:03 - patchstitcher - INFO - Epoch: [03/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.2391127347946167 - fine_loss: 1.2391127347946167 +2024/03/13 21:29:03 - patchstitcher - INFO - Epoch: [04/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.6997259855270386 - fine_loss: 1.6997259855270386 +2024/03/13 21:31:11 - patchstitcher - INFO - Epoch: [04/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.5324516296386719 - fine_loss: 1.5324516296386719 +2024/03/13 21:33:26 - patchstitcher - INFO - Epoch: [04/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.607362151145935 - fine_loss: 1.607362151145935 +2024/03/13 21:35:41 - patchstitcher - INFO - Epoch: [04/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.4949111342430115 - fine_loss: 0.4949111342430115 +2024/03/13 21:37:52 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+------------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+------------+-----------+-----------+ +| 0.8603373 | 0.9786087 | 0.9932337 | 0.1248343 | 1.7894115 | 0.0535003 | 0.1648102 | 14.7693928 | 0.2438148 | 1.3317589 | ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+------------+-----------+-----------+ +2024/03/13 21:40:11 - patchstitcher - INFO - Epoch: [05/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.7588756084442139 - fine_loss: 1.7588756084442139 +2024/03/13 21:42:22 - patchstitcher - INFO - Epoch: [05/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.6122137308120728 - fine_loss: 1.6122137308120728 +2024/03/13 21:44:39 - patchstitcher - INFO - Epoch: [05/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.9482064247131348 - fine_loss: 0.9482064247131348 +2024/03/13 21:46:52 - patchstitcher - INFO - Epoch: [05/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.1282145977020264 - fine_loss: 1.1282145977020264 +2024/03/13 21:50:47 - patchstitcher - INFO - Epoch: [06/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.0245898962020874 - fine_loss: 1.0245898962020874 +2024/03/13 21:53:01 - patchstitcher - INFO - Epoch: [06/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.3690811395645142 - fine_loss: 1.3690811395645142 +2024/03/13 21:55:18 - patchstitcher - INFO - Epoch: [06/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.2328670024871826 - fine_loss: 1.2328670024871826 +2024/03/13 21:57:33 - patchstitcher - INFO - Epoch: [06/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.2906349897384644 - fine_loss: 1.2906349897384644 +2024/03/13 21:59:39 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+----------+-----------+-----------+------------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+----------+-----------+-----------+------------+-----------+-----------+ +| 0.8467511 | 0.9792319 | 0.9928145 | 0.1210274 | 2.022959 | 0.0549113 | 0.1685774 | 14.5005249 | 0.2755687 | 1.3921653 | ++-----------+-----------+-----------+-----------+----------+-----------+-----------+------------+-----------+-----------+ +2024/03/13 22:02:00 - patchstitcher - INFO - Epoch: [07/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.8478381633758545 - fine_loss: 0.8478381633758545 +2024/03/13 22:04:10 - patchstitcher - INFO - Epoch: [07/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.5995430946350098 - fine_loss: 1.5995430946350098 +2024/03/13 22:06:19 - patchstitcher - INFO - Epoch: [07/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.9429503083229065 - fine_loss: 0.9429503083229065 +2024/03/13 22:08:32 - patchstitcher - INFO - Epoch: [07/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.1695109605789185 - fine_loss: 1.1695109605789185 +2024/03/13 22:12:31 - patchstitcher - INFO - Epoch: [08/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.8967710733413696 - fine_loss: 0.8967710733413696 +2024/03/13 22:14:44 - patchstitcher - INFO - Epoch: [08/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.1113226413726807 - fine_loss: 1.1113226413726807 +2024/03/13 22:16:54 - patchstitcher - INFO - Epoch: [08/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 2.1354031562805176 - fine_loss: 2.1354031562805176 +2024/03/13 22:19:11 - patchstitcher - INFO - Epoch: [08/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.4950610399246216 - fine_loss: 1.4950610399246216 +2024/03/13 22:21:19 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+------------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+------------+-----------+-----------+ +| 0.8484159 | 0.9792835 | 0.9928952 | 0.1264401 | 1.6712924 | 0.0572679 | 0.1711799 | 14.0565176 | 0.2308308 | 1.3130357 | ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+------------+-----------+-----------+ +2024/03/13 22:23:40 - patchstitcher - INFO - Epoch: [09/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.9288806915283203 - fine_loss: 1.9288806915283203 +2024/03/13 22:25:55 - patchstitcher - INFO - Epoch: [09/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.682614803314209 - fine_loss: 1.682614803314209 +2024/03/13 22:28:09 - patchstitcher - INFO - Epoch: [09/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.8962997198104858 - fine_loss: 1.8962997198104858 +2024/03/13 22:30:19 - patchstitcher - INFO - Epoch: [09/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.9449973702430725 - fine_loss: 0.9449973702430725 +2024/03/13 22:34:14 - patchstitcher - INFO - Epoch: [10/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.5700854063034058 - fine_loss: 0.5700854063034058 +2024/03/13 22:36:30 - patchstitcher - INFO - Epoch: [10/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.9589433073997498 - fine_loss: 0.9589433073997498 +2024/03/13 22:38:44 - patchstitcher - INFO - Epoch: [10/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.8091579675674438 - fine_loss: 0.8091579675674438 +2024/03/13 22:41:00 - patchstitcher - INFO - Epoch: [10/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.6324881315231323 - fine_loss: 0.6324881315231323 +2024/03/13 22:43:07 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+------------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+------------+-----------+-----------+ +| 0.9040357 | 0.9871989 | 0.9961887 | 0.1059332 | 1.5591676 | 0.0460066 | 0.1402428 | 12.6528556 | 0.1886749 | 1.1907095 | ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+------------+-----------+-----------+ +2024/03/13 22:45:28 - patchstitcher - INFO - Epoch: [11/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.8797756433486938 - fine_loss: 0.8797756433486938 +2024/03/13 22:47:43 - patchstitcher - INFO - Epoch: [11/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.8399388194084167 - fine_loss: 0.8399388194084167 +2024/03/13 22:49:54 - patchstitcher - INFO - Epoch: [11/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 2.1201016902923584 - fine_loss: 2.1201016902923584 +2024/03/13 22:52:08 - patchstitcher - INFO - Epoch: [11/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.0084044933319092 - fine_loss: 1.0084044933319092 +2024/03/13 22:56:04 - patchstitcher - INFO - Epoch: [12/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.8885384202003479 - fine_loss: 0.8885384202003479 +2024/03/13 22:58:21 - patchstitcher - INFO - Epoch: [12/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.2604622840881348 - fine_loss: 1.2604622840881348 +2024/03/13 23:00:33 - patchstitcher - INFO - Epoch: [12/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.5557081699371338 - fine_loss: 1.5557081699371338 +2024/03/13 23:02:48 - patchstitcher - INFO - Epoch: [12/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.8595406413078308 - fine_loss: 0.8595406413078308 +2024/03/13 23:04:51 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+------------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+------------+-----------+-----------+ +| 0.9231403 | 0.9881373 | 0.9960707 | 0.0974467 | 1.5368911 | 0.0418611 | 0.1323032 | 12.0398777 | 0.1746615 | 1.1871637 | ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+------------+-----------+-----------+ +2024/03/13 23:07:15 - patchstitcher - INFO - Epoch: [13/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.7663759589195251 - fine_loss: 0.7663759589195251 +2024/03/13 23:09:25 - patchstitcher - INFO - Epoch: [13/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.012923002243042 - fine_loss: 1.012923002243042 +2024/03/13 23:11:36 - patchstitcher - INFO - Epoch: [13/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.8841744661331177 - fine_loss: 0.8841744661331177 +2024/03/13 23:13:49 - patchstitcher - INFO - Epoch: [13/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.757501482963562 - fine_loss: 0.757501482963562 +2024/03/13 23:17:42 - patchstitcher - INFO - Epoch: [14/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.42909500002861023 - fine_loss: 0.42909500002861023 +2024/03/13 23:20:00 - patchstitcher - INFO - Epoch: [14/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.0589606761932373 - fine_loss: 1.0589606761932373 +2024/03/13 23:22:11 - patchstitcher - INFO - Epoch: [14/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.9697980284690857 - fine_loss: 0.9697980284690857 +2024/03/13 23:24:24 - patchstitcher - INFO - Epoch: [14/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.6079586148262024 - fine_loss: 0.6079586148262024 +2024/03/13 23:26:29 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+-----------+-----------+----------+------------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+-----------+-----------+----------+------------+-----------+-----------+ +| 0.9097487 | 0.9879261 | 0.9962917 | 0.0971912 | 1.4695581 | 0.0444763 | 0.138043 | 11.7783045 | 0.1649168 | 1.1237795 | ++-----------+-----------+-----------+-----------+-----------+-----------+----------+------------+-----------+-----------+ +2024/03/13 23:28:50 - patchstitcher - INFO - Epoch: [15/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.9518700838088989 - fine_loss: 0.9518700838088989 +2024/03/13 23:30:59 - patchstitcher - INFO - Epoch: [15/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.2090339660644531 - fine_loss: 1.2090339660644531 +2024/03/13 23:33:13 - patchstitcher - INFO - Epoch: [15/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.0048778057098389 - fine_loss: 1.0048778057098389 +2024/03/13 23:35:26 - patchstitcher - INFO - Epoch: [15/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.4002463817596436 - fine_loss: 1.4002463817596436 +2024/03/13 23:39:20 - patchstitcher - INFO - Epoch: [16/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.8937550187110901 - fine_loss: 0.8937550187110901 +2024/03/13 23:41:34 - patchstitcher - INFO - Epoch: [16/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.984773576259613 - fine_loss: 0.984773576259613 +2024/03/13 23:43:50 - patchstitcher - INFO - Epoch: [16/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.1533817052841187 - fine_loss: 1.1533817052841187 +2024/03/13 23:46:01 - patchstitcher - INFO - Epoch: [16/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 2.18684720993042 - fine_loss: 2.18684720993042 +2024/03/13 23:48:06 - patchstitcher - INFO - Evaluation Summary: ++-----------+----------+-----------+-----------+----------+-----------+-----------+------------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+----------+-----------+-----------+----------+-----------+-----------+------------+-----------+-----------+ +| 0.9535723 | 0.990796 | 0.9966508 | 0.0810093 | 1.337959 | 0.0349151 | 0.1136651 | 10.3636752 | 0.1353082 | 1.0019814 | ++-----------+----------+-----------+-----------+----------+-----------+-----------+------------+-----------+-----------+ +2024/03/13 23:50:25 - patchstitcher - INFO - Epoch: [17/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.8088462352752686 - fine_loss: 0.8088462352752686 +2024/03/13 23:52:41 - patchstitcher - INFO - Epoch: [17/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.0581305027008057 - fine_loss: 1.0581305027008057 +2024/03/13 23:54:52 - patchstitcher - INFO - Epoch: [17/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.3186157941818237 - fine_loss: 1.3186157941818237 +2024/03/13 23:57:06 - patchstitcher - INFO - Epoch: [17/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.3919904232025146 - fine_loss: 1.3919904232025146 +2024/03/14 00:01:02 - patchstitcher - INFO - Epoch: [18/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.6165844798088074 - fine_loss: 0.6165844798088074 +2024/03/14 00:03:15 - patchstitcher - INFO - Epoch: [18/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.5537874698638916 - fine_loss: 0.5537874698638916 +2024/03/14 00:05:25 - patchstitcher - INFO - Epoch: [18/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.7398358583450317 - fine_loss: 0.7398358583450317 +2024/03/14 00:07:35 - patchstitcher - INFO - Epoch: [18/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.073343276977539 - fine_loss: 1.073343276977539 +2024/03/14 00:09:46 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+---------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+---------+ +| 0.9623082 | 0.9914456 | 0.9967468 | 0.0768577 | 1.3010942 | 0.0336142 | 0.108503 | 9.8235238 | 0.1265531 | 1.01389 | ++-----------+-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+---------+ +2024/03/14 00:12:05 - patchstitcher - INFO - Epoch: [19/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.7159844636917114 - fine_loss: 0.7159844636917114 +2024/03/14 00:14:16 - patchstitcher - INFO - Epoch: [19/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.753825306892395 - fine_loss: 0.753825306892395 +2024/03/14 00:16:30 - patchstitcher - INFO - Epoch: [19/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.8764025568962097 - fine_loss: 0.8764025568962097 +2024/03/14 00:18:46 - patchstitcher - INFO - Epoch: [19/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.5341998338699341 - fine_loss: 0.5341998338699341 +2024/03/14 00:22:41 - patchstitcher - INFO - Epoch: [20/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.8124048709869385 - fine_loss: 0.8124048709869385 +2024/03/14 00:24:54 - patchstitcher - INFO - Epoch: [20/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.6512057781219482 - fine_loss: 0.6512057781219482 +2024/03/14 00:27:06 - patchstitcher - INFO - Epoch: [20/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.8805146813392639 - fine_loss: 0.8805146813392639 +2024/03/14 00:29:19 - patchstitcher - INFO - Epoch: [20/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.0138983726501465 - fine_loss: 1.0138983726501465 +2024/03/14 00:31:25 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| 0.9647589 | 0.9937891 | 0.9979442 | 0.0679192 | 1.2152941 | 0.0292711 | 0.0977083 | 9.0947562 | 0.1188258 | 1.0147021 | ++-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +2024/03/14 00:33:43 - patchstitcher - INFO - Epoch: [21/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.6171627044677734 - fine_loss: 0.6171627044677734 +2024/03/14 00:35:54 - patchstitcher - INFO - Epoch: [21/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.609038233757019 - fine_loss: 1.609038233757019 +2024/03/14 00:38:04 - patchstitcher - INFO - Epoch: [21/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.34113970398902893 - fine_loss: 0.34113970398902893 +2024/03/14 00:40:18 - patchstitcher - INFO - Epoch: [21/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.2947721481323242 - fine_loss: 1.2947721481323242 +2024/03/14 00:44:14 - patchstitcher - INFO - Epoch: [22/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.7315841913223267 - fine_loss: 0.7315841913223267 +2024/03/14 00:46:29 - patchstitcher - INFO - Epoch: [22/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.965033233165741 - fine_loss: 0.965033233165741 +2024/03/14 00:48:39 - patchstitcher - INFO - Epoch: [22/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.2988400459289551 - fine_loss: 0.2988400459289551 +2024/03/14 00:50:52 - patchstitcher - INFO - Epoch: [22/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.6066534519195557 - fine_loss: 0.6066534519195557 +2024/03/14 00:52:54 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| 0.9706722 | 0.9935537 | 0.9979067 | 0.060627 | 1.1660999 | 0.0263799 | 0.0914949 | 8.6839164 | 0.1041247 | 0.9264939 | ++-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+ +2024/03/14 00:55:17 - patchstitcher - INFO - Epoch: [23/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.5104568600654602 - fine_loss: 0.5104568600654602 +2024/03/14 00:57:31 - patchstitcher - INFO - Epoch: [23/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.43037480115890503 - fine_loss: 0.43037480115890503 +2024/03/14 00:59:40 - patchstitcher - INFO - Epoch: [23/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.8868927955627441 - fine_loss: 0.8868927955627441 +2024/03/14 01:01:53 - patchstitcher - INFO - Epoch: [23/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.9281996488571167 - fine_loss: 1.9281996488571167 +2024/03/14 01:05:53 - patchstitcher - INFO - Epoch: [24/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.5804940462112427 - fine_loss: 0.5804940462112427 +2024/03/14 01:08:03 - patchstitcher - INFO - Epoch: [24/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.7424122095108032 - fine_loss: 0.7424122095108032 +2024/03/14 01:10:20 - patchstitcher - INFO - Epoch: [24/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.9339959621429443 - fine_loss: 0.9339959621429443 +2024/03/14 01:12:30 - patchstitcher - INFO - Epoch: [24/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.2626660466194153 - fine_loss: 0.2626660466194153 +2024/03/14 01:14:35 - patchstitcher - INFO - Evaluation Summary: ++-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| 0.9723933 | 0.993793 | 0.9979557 | 0.0599933 | 1.1559685 | 0.0259921 | 0.0900158 | 8.4997659 | 0.1029419 | 0.9294583 | ++-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +2024/03/14 01:14:35 - patchstitcher - INFO - Saving ckp, but use the inner get_save_dict fuction to get model_dict +2024/03/14 01:14:35 - patchstitcher - INFO - For saving space. Would you like to save coarse model several times? :> +2024/03/14 01:14:38 - patchstitcher - INFO - save checkpoint_24.pth at ./work_dir/fine_pretrain