diff --git "a/zoedepth_u4k/coarse_pretrain/20240313_154004.log" "b/zoedepth_u4k/coarse_pretrain/20240313_154004.log" new file mode 100644--- /dev/null +++ "b/zoedepth_u4k/coarse_pretrain/20240313_154004.log" @@ -0,0 +1,3138 @@ +2024/03/13 15:40:06 - patchstitcher - INFO - +------------------------------------------------------------ +System environment: + sys.platform: linux + Python: 3.8.18 | packaged by conda-forge | (default, Oct 10 2023, 15:44:36) [GCC 12.3.0] + CUDA available: True + numpy_random_seed: 621 + GPU 0,1,2,3: NVIDIA A100-SXM4-80GB + CUDA_HOME: /sw/rl9g/cuda/11.8/rl9_binary + NVCC: Cuda compilation tools, release 11.8, V11.8.89 + GCC: gcc (GCC) 11.3.1 20220421 (Red Hat 11.3.1-2) + PyTorch: 2.1.2 + PyTorch compiling details: PyTorch built with: + - GCC 9.3 + - C++ Version: 201703 + - Intel(R) oneAPI Math Kernel Library Version 2022.1-Product Build 20220311 for Intel(R) 64 architecture applications + - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4) + - OpenMP 201511 (a.k.a. OpenMP 4.5) + - LAPACK is enabled (usually provided by MKL) + - NNPACK is enabled + - CPU capability usage: AVX2 + - CUDA Runtime 11.8 + - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_90,code=sm_90;-gencode;arch=compute_37,code=compute_37 + - CuDNN 8.7 + - Magma 2.6.1 + - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.8, CUDNN_VERSION=8.7.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.1.2, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, + + TorchVision: 0.16.2 + OpenCV: 4.8.1 + MMEngine: 0.10.2 + +Runtime environment: + cudnn_benchmark: True + mp_cfg: {'mp_start_method': 'forkserver'} + dist_cfg: {'backend': 'nccl'} + seed: 621 + Distributed launcher: pytorch + Distributed training: True + GPU number: 4 +------------------------------------------------------------ + +2024/03/13 15:40:06 - patchstitcher - INFO - Config: +collect_input_args = [ + 'image_lr', + 'crops_image_hr', + 'depth_gt', + 'crop_depths', + 'bboxs', + 'image_hr', +] +convert_syncbn = True +debug = True +env_cfg = dict( + cudnn_benchmark=True, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='forkserver')) +find_unused_parameters = True +general_dataloader = dict( + batch_size=1, + dataset=dict( + dataset_name='', gt_dir=None, rgb_image_dir='', type='ImageDataset'), + num_workers=2) +launcher = 'pytorch' +log_name = 'coarse_pretrain' +max_depth = 80 +min_depth = 0.001 +model = dict( + coarse_branch=dict( + attractor_alpha=1000, + attractor_gamma=2, + attractor_kind='mean', + attractor_type='inv', + aug=True, + bin_centers_type='softplus', + bin_embedding_dim=128, + clip_grad=0.1, + dataset='nyu', + distributed=True, + do_resize=False, + force_keep_ar=True, + freeze_midas_bn=True, + gpu='NULL', + img_size=[ + 384, + 512, + ], + inverse_midas=False, + log_images_every=0.1, + max_depth=80, + max_temp=50.0, + max_translation=100, + memory_efficient=True, + midas_model_type='DPT_BEiT_L_384', + min_depth=0.001, + min_temp=0.0212, + model='zoedepth', + n_attractors=[ + 16, + 8, + 4, + 1, + ], + n_bins=64, + name='ZoeDepth', + notes='', + output_distribution='logbinomial', + prefetch=False, + pretrained_resource= + 'local::./work_dir/ZoeDepthv1_30-Dec_16-29-4e2bc436e4e1_best.pt', + print_losses=False, + project='ZoeDepth', + random_crop=False, + random_translate=False, + root='.', + save_dir='', + shared_dict='NULL', + tags='', + train_midas=True, + translate_prob=0.2, + type='ZoeDepth', + uid='NULL', + use_amp=False, + use_pretrained_midas=True, + use_shared_dict=False, + validate_every=0.25, + version_name='v1', + workers=16), + fine_branch=dict( + attractor_alpha=1000, + attractor_gamma=2, + attractor_kind='mean', + attractor_type='inv', + aug=True, + bin_centers_type='softplus', + bin_embedding_dim=128, + clip_grad=0.1, + dataset='nyu', + distributed=True, + do_resize=False, + force_keep_ar=True, + freeze_midas_bn=True, + gpu='NULL', + img_size=[ + 384, + 512, + ], + inverse_midas=False, + log_images_every=0.1, + max_depth=80, + max_temp=50.0, + max_translation=100, + memory_efficient=True, + midas_model_type='DPT_BEiT_L_384', + min_depth=0.001, + min_temp=0.0212, + model='zoedepth', + n_attractors=[ + 16, + 8, + 4, + 1, + ], + n_bins=64, + name='ZoeDepth', + notes='', + output_distribution='logbinomial', + prefetch=False, + pretrained_resource= + 'local::./work_dir/ZoeDepthv1_30-Dec_16-29-4e2bc436e4e1_best.pt', + print_losses=False, + project='ZoeDepth', + random_crop=False, + random_translate=False, + root='.', + save_dir='', + shared_dict='NULL', + tags='', + train_midas=True, + translate_prob=0.2, + type='ZoeDepth', + uid='NULL', + use_amp=False, + use_pretrained_midas=True, + use_shared_dict=False, + validate_every=0.25, + version_name='v1', + workers=16), + max_depth=80, + min_depth=0.001, + sigloss=dict(type='SILogLoss'), + target='coarse', + type='BaselinePretrain') +optim_wrapper = dict( + clip_grad=dict(max_norm=0.1, norm_type=2, type='norm'), + optimizer=dict(lr=0.0002, type='AdamW', weight_decay=0.01), + paramwise_cfg=dict( + bypass_duplicate=True, + custom_keys=dict( + {'coarse_branch.core': dict(decay_mult=1.0, lr_mult=0.1)}))) +param_scheduler = dict( + base_momentum=0.85, + cycle_momentum=True, + div_factor=1, + final_div_factor=10000, + max_momentum=0.95, + pct_start=0.5, + three_phase=False) +project = 'patchfusion' +tags = [ + 'pcoarse', +] +test_in_dataloader = dict( + batch_size=1, + dataset=dict( + data_root='./data/u4k', + max_depth=80, + min_depth=0.001, + mode='infer', + split='./data/u4k/splits/test.txt', + transform_cfg=dict(network_process_size=[ + 384, + 512, + ]), + type='UnrealStereo4kDataset'), + num_workers=2) +test_out_dataloader = dict( + batch_size=1, + dataset=dict( + data_root='./data/u4k', + max_depth=80, + min_depth=0.001, + mode='infer', + split='./data/u4k/splits/test_out.txt', + transform_cfg=dict(network_process_size=[ + 384, + 512, + ]), + type='UnrealStereo4kDataset'), + num_workers=2) +train_cfg = dict( + eval_start=0, + log_interval=100, + max_epochs=24, + save_checkpoint_interval=24, + train_log_img_interval=100, + val_interval=2, + val_log_img_interval=50, + val_type='epoch_base') +train_dataloader = dict( + batch_size=4, + dataset=dict( + data_root='./data/u4k', + max_depth=80, + min_depth=0.001, + mode='train', + split='./data/u4k/splits/train.txt', + transform_cfg=dict( + degree=1.0, network_process_size=[ + 384, + 512, + ], random_crop=True), + type='UnrealStereo4kDataset'), + num_workers=4) +val_dataloader = dict( + batch_size=1, + dataset=dict( + data_root='./data/u4k', + max_depth=80, + min_depth=0.001, + mode='infer', + split='./data/u4k/splits/val.txt', + transform_cfg=dict(network_process_size=[ + 384, + 512, + ]), + type='UnrealStereo4kDataset'), + num_workers=2) +work_dir = './work_dir/coarse_pretrain' +zoe_depth_config = dict( + attractor_alpha=1000, + attractor_gamma=2, + attractor_kind='mean', + attractor_type='inv', + aug=True, + bin_centers_type='softplus', + bin_embedding_dim=128, + clip_grad=0.1, + dataset='nyu', + distributed=True, + do_resize=False, + force_keep_ar=True, + freeze_midas_bn=True, + gpu='NULL', + img_size=[ + 384, + 512, + ], + inverse_midas=False, + log_images_every=0.1, + max_depth=80, + max_temp=50.0, + max_translation=100, + memory_efficient=True, + midas_model_type='DPT_BEiT_L_384', + min_depth=0.001, + min_temp=0.0212, + model='zoedepth', + n_attractors=[ + 16, + 8, + 4, + 1, + ], + n_bins=64, + name='ZoeDepth', + notes='', + output_distribution='logbinomial', + prefetch=False, + pretrained_resource= + 'local::./work_dir/ZoeDepthv1_30-Dec_16-29-4e2bc436e4e1_best.pt', + print_losses=False, + project='ZoeDepth', + random_crop=False, + random_translate=False, + root='.', + save_dir='', + shared_dict='NULL', + tags='', + train_midas=True, + translate_prob=0.2, + type='ZoeDepth', + uid='NULL', + use_amp=False, + use_pretrained_midas=True, + use_shared_dict=False, + validate_every=0.25, + version_name='v1', + workers=16) + +2024/03/13 15:40:27 - patchstitcher - INFO - Loading deepnet from local::./work_dir/ZoeDepthv1_30-Dec_16-29-4e2bc436e4e1_best.pt +2024/03/13 15:40:28 - patchstitcher - INFO - Current zoedepth.core.prep.resizer is +2024/03/13 15:40:28 - patchstitcher - INFO - DistributedDataParallel( + (module): BaselinePretrain( + (coarse_branch): ZoeDepth( + (core): MidasCore( + (core): DPTDepthModel( + (pretrained): Module( + (model): Beit( + (patch_embed): PatchEmbed( + (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16)) + (norm): Identity() + ) + (pos_drop): Dropout(p=0.0, inplace=False) + (blocks): ModuleList( + (0-23): 24 x Block( + (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + (qkv): Linear(in_features=1024, out_features=3072, bias=False) + (attn_drop): Dropout(p=0.0, inplace=False) + (proj): Linear(in_features=1024, out_features=1024, bias=True) + (proj_drop): Dropout(p=0.0, inplace=False) + ) + (drop_path1): Identity() + (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=1024, out_features=4096, bias=True) + (act): GELU(approximate='none') + (drop1): Dropout(p=0.0, inplace=False) + (norm): Identity() + (fc2): Linear(in_features=4096, out_features=1024, bias=True) + (drop2): Dropout(p=0.0, inplace=False) + ) + (drop_path2): Identity() + ) + ) + (norm): Identity() + (fc_norm): LayerNorm((1024,), eps=1e-06, elementwise_affine=True) + (head_drop): Dropout(p=0.0, inplace=False) + (head): Linear(in_features=1024, out_features=1000, bias=True) + ) + (act_postprocess1): Sequential( + (0): ProjectReadout( + (project): Sequential( + (0): Linear(in_features=2048, out_features=1024, bias=True) + (1): GELU(approximate='none') + ) + ) + (1): Transpose() + (2): Unflatten(dim=2, unflattened_size=torch.Size([24, 24])) + (3): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1)) + (4): ConvTranspose2d(256, 256, kernel_size=(4, 4), stride=(4, 4)) + ) + (act_postprocess2): Sequential( + (0): ProjectReadout( + (project): Sequential( + (0): Linear(in_features=2048, out_features=1024, bias=True) + (1): GELU(approximate='none') + ) + ) + (1): Transpose() + (2): Unflatten(dim=2, unflattened_size=torch.Size([24, 24])) + (3): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1)) + (4): ConvTranspose2d(512, 512, kernel_size=(2, 2), stride=(2, 2)) + ) + (act_postprocess3): Sequential( + (0): ProjectReadout( + (project): Sequential( + (0): Linear(in_features=2048, out_features=1024, bias=True) + (1): GELU(approximate='none') + ) + ) + (1): Transpose() + (2): Unflatten(dim=2, unflattened_size=torch.Size([24, 24])) + (3): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1)) + ) + (act_postprocess4): Sequential( + (0): ProjectReadout( + (project): Sequential( + (0): Linear(in_features=2048, out_features=1024, bias=True) + (1): GELU(approximate='none') + ) + ) + (1): Transpose() + (2): Unflatten(dim=2, unflattened_size=torch.Size([24, 24])) + (3): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1)) + (4): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) + ) + ) + (scratch): Module( + (layer1_rn): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (layer2_rn): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (layer3_rn): Conv2d(1024, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (layer4_rn): Conv2d(1024, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) + (refinenet1): FeatureFusionBlock_custom( + (out_conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) + (resConfUnit1): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (resConfUnit2): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (refinenet2): FeatureFusionBlock_custom( + (out_conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) + (resConfUnit1): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (resConfUnit2): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (refinenet3): FeatureFusionBlock_custom( + (out_conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) + (resConfUnit1): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (resConfUnit2): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (refinenet4): FeatureFusionBlock_custom( + (out_conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) + (resConfUnit1): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (resConfUnit2): ResidualConvUnit_custom( + (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (activation): ReLU() + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (skip_add): FloatFunctional( + (activation_post_process): Identity() + ) + ) + (output_conv): Sequential( + (0): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (1): Interpolate() + (2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (3): ReLU(inplace=True) + (4): Conv2d(32, 1, kernel_size=(1, 1), stride=(1, 1)) + (5): ReLU(inplace=True) + (6): Identity() + ) + ) + ) + ) + (conv2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) + (seed_bin_regressor): SeedBinRegressorUnnormed( + (_net): Sequential( + (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1)) + (3): Softplus(beta=1, threshold=20) + ) + ) + (seed_projector): Projector( + (_net): Sequential( + (0): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + (projectors): ModuleList( + (0-3): 4 x Projector( + (_net): Sequential( + (0): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1)) + ) + ) + ) + (attractors): ModuleList( + (0): AttractorLayerUnnormed( + (_net): Sequential( + (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1)) + (3): Softplus(beta=1, threshold=20) + ) + ) + (1): AttractorLayerUnnormed( + (_net): Sequential( + (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(128, 8, kernel_size=(1, 1), stride=(1, 1)) + (3): Softplus(beta=1, threshold=20) + ) + ) + (2): AttractorLayerUnnormed( + (_net): Sequential( + (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(128, 4, kernel_size=(1, 1), stride=(1, 1)) + (3): Softplus(beta=1, threshold=20) + ) + ) + (3): AttractorLayerUnnormed( + (_net): Sequential( + (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1)) + (1): ReLU(inplace=True) + (2): Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1)) + (3): Softplus(beta=1, threshold=20) + ) + ) + ) + (conditional_log_binomial): ConditionalLogBinomial( + (log_binomial_transform): LogBinomial() + (mlp): Sequential( + (0): Conv2d(161, 80, kernel_size=(1, 1), stride=(1, 1)) + (1): GELU(approximate='none') + (2): Conv2d(80, 4, kernel_size=(1, 1), stride=(1, 1)) + (3): Softplus(beta=1, threshold=20) + ) + ) + ) + (sigloss): SILogLoss() + ) +) +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.cls_token:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.cls_token:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.cls_token:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.cls_token:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.patch_embed.proj.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.patch_embed.proj.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.patch_embed.proj.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.patch_embed.proj.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.patch_embed.proj.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.patch_embed.proj.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.patch_embed.proj.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.patch_embed.proj.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.gamma_1:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.gamma_1:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.gamma_1:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.gamma_1:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.gamma_2:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.gamma_2:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.gamma_2:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.gamma_2:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm1.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm1.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.q_bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.v_bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm2.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm2.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.gamma_1:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.gamma_1:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.gamma_1:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.gamma_1:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.gamma_2:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.gamma_2:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.gamma_2:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.gamma_2:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm1.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm1.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.q_bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.v_bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm2.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm2.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.gamma_1:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.gamma_1:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.gamma_1:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.gamma_1:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.gamma_2:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.gamma_2:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.gamma_2:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.gamma_2:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm1.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm1.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.q_bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.v_bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm2.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm2.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.gamma_1:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.gamma_1:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.gamma_1:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.gamma_1:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.gamma_2:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.gamma_2:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.gamma_2:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.gamma_2:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm1.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm1.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.q_bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.v_bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm2.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm2.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.gamma_1:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.gamma_1:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.gamma_1:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.gamma_1:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.gamma_2:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.gamma_2:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.gamma_2:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.gamma_2:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm1.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm1.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.q_bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.v_bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm2.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm2.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:47 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.gamma_1:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.gamma_1:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.gamma_1:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.gamma_1:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.gamma_2:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.gamma_2:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.gamma_2:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.gamma_2:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.q_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.q_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.q_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.q_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.v_bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.v_bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.v_bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.v_bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.relative_position_bias_table:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.relative_position_bias_table:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.relative_position_bias_table:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.relative_position_bias_table:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.qkv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.qkv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.qkv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.qkv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.proj.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.proj.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.proj.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.proj.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.proj.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.proj.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.proj.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.attn.proj.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.norm2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.fc_norm.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.fc_norm.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.fc_norm.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.fc_norm.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.fc_norm.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.fc_norm.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.fc_norm.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.fc_norm.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.head.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.head.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.head.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.head.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.head.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.head.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.head.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.model.head.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.0.project.0.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.0.project.0.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.0.project.0.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.0.project.0.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.0.project.0.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.0.project.0.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.0.project.0.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.0.project.0.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.3.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.3.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.3.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.3.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.3.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.3.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.3.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.3.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.4.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.4.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.4.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.4.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.4.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.4.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.4.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess1.4.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.0.project.0.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.0.project.0.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.0.project.0.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.0.project.0.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.0.project.0.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.0.project.0.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.0.project.0.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.0.project.0.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.3.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.3.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.3.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.3.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.3.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.3.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.3.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.3.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.4.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.4.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.4.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.4.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.4.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.4.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.4.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess2.4.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.0.project.0.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.0.project.0.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.0.project.0.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.0.project.0.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.0.project.0.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.0.project.0.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.0.project.0.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.0.project.0.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.3.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.3.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.3.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.3.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.3.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.3.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.3.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess3.3.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.0.project.0.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.0.project.0.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.0.project.0.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.0.project.0.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.0.project.0.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.0.project.0.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.0.project.0.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.0.project.0.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.3.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.3.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.3.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.3.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.3.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.3.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.3.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.3.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.4.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.4.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.4.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.4.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.4.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.4.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.4.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.pretrained.act_postprocess4.4.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer1_rn.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer1_rn.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer1_rn.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer1_rn.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer2_rn.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer2_rn.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer2_rn.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer2_rn.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer3_rn.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer3_rn.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer3_rn.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer3_rn.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer4_rn.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer4_rn.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer4_rn.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.layer4_rn.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.out_conv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.out_conv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.out_conv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.out_conv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.out_conv.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.out_conv.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.out_conv.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.out_conv.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.out_conv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.out_conv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.out_conv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.out_conv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.out_conv.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.out_conv.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.out_conv.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.out_conv.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.out_conv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.out_conv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.out_conv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.out_conv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.out_conv.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.out_conv.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.out_conv.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.out_conv.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.out_conv.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.out_conv.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.out_conv.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.out_conv.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.out_conv.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.out_conv.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.out_conv.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.out_conv.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.0.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.0.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.0.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.0.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.0.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.0.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.0.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.0.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.2.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.2.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.2.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.2.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.2.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.2.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.2.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.2.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.4.weight:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.4.weight:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.4.weight:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.4.weight:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.4.bias:lr=2e-05 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.4.bias:weight_decay=0.01 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.4.bias:lr_mult=0.1 +2024/03/13 15:40:48 - patchstitcher - INFO - paramwise_options -- coarse_branch.core.core.scratch.output_conv.4.bias:decay_mult=1.0 +2024/03/13 15:40:48 - patchstitcher - INFO - successfully init trainer +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.cls_token +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.patch_embed.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.patch_embed.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.0.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.1.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.2.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.3.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.4.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.5.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.6.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.7.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.8.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.9.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.10.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.11.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.12.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.13.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.14.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.15.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.16.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.17.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.18.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.19.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.20.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.21.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.22.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.gamma_1 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.gamma_2 +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.norm1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.norm1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.attn.q_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.attn.v_bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.attn.relative_position_bias_table +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.attn.qkv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.attn.proj.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.attn.proj.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.norm2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.norm2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.blocks.23.mlp.fc2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.fc_norm.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.fc_norm.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.head.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.model.head.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess1.0.project.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess1.0.project.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess1.3.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess1.3.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess1.4.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess1.4.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess2.0.project.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess2.0.project.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess2.3.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess2.3.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess2.4.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess2.4.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess3.0.project.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess3.0.project.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess3.3.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess3.3.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess4.0.project.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess4.0.project.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess4.3.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess4.3.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess4.4.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.pretrained.act_postprocess4.4.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.layer1_rn.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.layer2_rn.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.layer3_rn.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.layer4_rn.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet1.out_conv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet1.out_conv.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet1.resConfUnit1.conv2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet1.resConfUnit2.conv2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet2.out_conv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet2.out_conv.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet2.resConfUnit1.conv2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet2.resConfUnit2.conv2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet3.out_conv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet3.out_conv.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet3.resConfUnit1.conv2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet3.resConfUnit2.conv2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet4.out_conv.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet4.out_conv.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet4.resConfUnit1.conv2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv1.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.refinenet4.resConfUnit2.conv2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.output_conv.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.output_conv.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.output_conv.2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.output_conv.2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.output_conv.4.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.core.core.scratch.output_conv.4.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.conv2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.conv2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.seed_bin_regressor._net.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.seed_bin_regressor._net.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.seed_bin_regressor._net.2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.seed_bin_regressor._net.2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.seed_projector._net.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.seed_projector._net.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.seed_projector._net.2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.seed_projector._net.2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.0._net.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.0._net.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.0._net.2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.0._net.2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.1._net.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.1._net.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.1._net.2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.1._net.2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.2._net.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.2._net.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.2._net.2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.2._net.2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.3._net.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.3._net.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.3._net.2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.projectors.3._net.2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.0._net.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.0._net.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.0._net.2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.0._net.2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.1._net.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.1._net.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.1._net.2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.1._net.2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.2._net.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.2._net.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.2._net.2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.2._net.2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.3._net.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.3._net.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.3._net.2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.attractors.3._net.2.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.conditional_log_binomial.mlp.0.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.conditional_log_binomial.mlp.0.bias +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.conditional_log_binomial.mlp.2.weight +2024/03/13 15:40:48 - patchstitcher - INFO - training param: module.coarse_branch.conditional_log_binomial.mlp.2.bias +2024/03/13 15:44:06 - patchstitcher - INFO - Epoch: [01/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.7131173610687256 - coarse_loss: 1.7131173610687256 +2024/03/13 15:46:19 - patchstitcher - INFO - Epoch: [01/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 2.0335652828216553 - coarse_loss: 2.0335652828216553 +2024/03/13 15:48:38 - patchstitcher - INFO - Epoch: [01/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.5221185684204102 - coarse_loss: 1.5221185684204102 +2024/03/13 15:50:53 - patchstitcher - INFO - Epoch: [01/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.2828749418258667 - coarse_loss: 1.2828749418258667 +2024/03/13 15:55:13 - patchstitcher - INFO - Epoch: [02/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.9703367352485657 - coarse_loss: 0.9703367352485657 +2024/03/13 15:57:28 - patchstitcher - INFO - Epoch: [02/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.2090288400650024 - coarse_loss: 1.2090288400650024 +2024/03/13 15:59:42 - patchstitcher - INFO - Epoch: [02/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.168603539466858 - coarse_loss: 1.168603539466858 +2024/03/13 16:01:55 - patchstitcher - INFO - Epoch: [02/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.143261432647705 - coarse_loss: 1.143261432647705 +2024/03/13 16:03:57 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| 0.9627955 | 0.9889554 | 0.9953486 | 0.074003 | 1.5849035 | 0.0316879 | 0.1094913 | 9.6426039 | 0.1999298 | 1.1507299 | ++-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+ +2024/03/13 16:06:17 - patchstitcher - INFO - Epoch: [03/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.1554230451583862 - coarse_loss: 1.1554230451583862 +2024/03/13 16:08:25 - patchstitcher - INFO - Epoch: [03/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.0465468168258667 - coarse_loss: 1.0465468168258667 +2024/03/13 16:10:42 - patchstitcher - INFO - Epoch: [03/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.5575401782989502 - coarse_loss: 1.5575401782989502 +2024/03/13 16:12:59 - patchstitcher - INFO - Epoch: [03/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.9880474209785461 - coarse_loss: 0.9880474209785461 +2024/03/13 16:16:59 - patchstitcher - INFO - Epoch: [04/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.2483534812927246 - coarse_loss: 1.2483534812927246 +2024/03/13 16:19:07 - patchstitcher - INFO - Epoch: [04/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.958562970161438 - coarse_loss: 0.958562970161438 +2024/03/13 16:21:22 - patchstitcher - INFO - Epoch: [04/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.9945743083953857 - coarse_loss: 0.9945743083953857 +2024/03/13 16:23:39 - patchstitcher - INFO - Epoch: [04/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.5984330773353577 - coarse_loss: 0.5984330773353577 +2024/03/13 16:25:35 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+ +| 0.9611621 | 0.9898382 | 0.9954542 | 0.0702681 | 1.5167718 | 0.030248 | 0.1041869 | 9.1619216 | 0.2028615 | 1.1258394 | ++-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+ +2024/03/13 16:27:53 - patchstitcher - INFO - Epoch: [05/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.6511057615280151 - coarse_loss: 0.6511057615280151 +2024/03/13 16:30:04 - patchstitcher - INFO - Epoch: [05/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.5451534390449524 - coarse_loss: 0.5451534390449524 +2024/03/13 16:32:22 - patchstitcher - INFO - Epoch: [05/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.7816470265388489 - coarse_loss: 0.7816470265388489 +2024/03/13 16:34:35 - patchstitcher - INFO - Epoch: [05/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.8089712262153625 - coarse_loss: 0.8089712262153625 +2024/03/13 16:38:31 - patchstitcher - INFO - Epoch: [06/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.2738938331604004 - coarse_loss: 1.2738938331604004 +2024/03/13 16:40:46 - patchstitcher - INFO - Epoch: [06/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.8033538460731506 - coarse_loss: 0.8033538460731506 +2024/03/13 16:43:02 - patchstitcher - INFO - Epoch: [06/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.680912435054779 - coarse_loss: 0.680912435054779 +2024/03/13 16:45:14 - patchstitcher - INFO - Epoch: [06/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.7955048084259033 - coarse_loss: 0.7955048084259033 +2024/03/13 16:47:11 - patchstitcher - INFO - Evaluation Summary: ++-----------+----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| 0.9713826 | 0.990082 | 0.9955904 | 0.063581 | 1.4751952 | 0.0280818 | 0.0985967 | 8.4533564 | 0.1650279 | 1.0380057 | ++-----------+----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+ +2024/03/13 16:49:33 - patchstitcher - INFO - Epoch: [07/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.7122501134872437 - coarse_loss: 0.7122501134872437 +2024/03/13 16:51:42 - patchstitcher - INFO - Epoch: [07/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.0893974304199219 - coarse_loss: 1.0893974304199219 +2024/03/13 16:53:52 - patchstitcher - INFO - Epoch: [07/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.0466139316558838 - coarse_loss: 1.0466139316558838 +2024/03/13 16:56:05 - patchstitcher - INFO - Epoch: [07/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.6300845742225647 - coarse_loss: 0.6300845742225647 +2024/03/13 17:00:02 - patchstitcher - INFO - Epoch: [08/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.6965449452400208 - coarse_loss: 0.6965449452400208 +2024/03/13 17:02:15 - patchstitcher - INFO - Epoch: [08/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.9946849346160889 - coarse_loss: 0.9946849346160889 +2024/03/13 17:04:27 - patchstitcher - INFO - Epoch: [08/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.2835063934326172 - coarse_loss: 1.2835063934326172 +2024/03/13 17:06:43 - patchstitcher - INFO - Epoch: [08/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.366194486618042 - coarse_loss: 1.366194486618042 +2024/03/13 17:08:39 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| 0.9710541 | 0.9906279 | 0.996204 | 0.0594678 | 1.4010291 | 0.0256353 | 0.0933313 | 8.4669466 | 0.1635645 | 1.0337067 | ++-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +2024/03/13 17:10:59 - patchstitcher - INFO - Epoch: [09/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.9983483552932739 - coarse_loss: 0.9983483552932739 +2024/03/13 17:13:13 - patchstitcher - INFO - Epoch: [09/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.738366961479187 - coarse_loss: 1.738366961479187 +2024/03/13 17:15:25 - patchstitcher - INFO - Epoch: [09/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.8853738903999329 - coarse_loss: 0.8853738903999329 +2024/03/13 17:17:37 - patchstitcher - INFO - Epoch: [09/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.8385846614837646 - coarse_loss: 0.8385846614837646 +2024/03/13 17:21:36 - patchstitcher - INFO - Epoch: [10/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.44202721118927 - coarse_loss: 0.44202721118927 +2024/03/13 17:23:51 - patchstitcher - INFO - Epoch: [10/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.8443354368209839 - coarse_loss: 0.8443354368209839 +2024/03/13 17:26:06 - patchstitcher - INFO - Epoch: [10/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.4652352333068848 - coarse_loss: 1.4652352333068848 +2024/03/13 17:28:23 - patchstitcher - INFO - Epoch: [10/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.8553734421730042 - coarse_loss: 0.8553734421730042 +2024/03/13 17:30:15 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+ +| 0.9707531 | 0.9904648 | 0.9957918 | 0.0803878 | 1.431243 | 0.0361089 | 0.1142488 | 8.4251662 | 0.1654137 | 1.0413262 | ++-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+ +2024/03/13 17:32:34 - patchstitcher - INFO - Epoch: [11/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.7248023152351379 - coarse_loss: 0.7248023152351379 +2024/03/13 17:34:52 - patchstitcher - INFO - Epoch: [11/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.8598499298095703 - coarse_loss: 0.8598499298095703 +2024/03/13 17:37:02 - patchstitcher - INFO - Epoch: [11/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.0179355144500732 - coarse_loss: 1.0179355144500732 +2024/03/13 17:39:17 - patchstitcher - INFO - Epoch: [11/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.5394287109375 - coarse_loss: 0.5394287109375 +2024/03/13 17:43:14 - patchstitcher - INFO - Epoch: [12/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.7072930932044983 - coarse_loss: 0.7072930932044983 +2024/03/13 17:45:29 - patchstitcher - INFO - Epoch: [12/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.8514989018440247 - coarse_loss: 0.8514989018440247 +2024/03/13 17:47:41 - patchstitcher - INFO - Epoch: [12/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.8754427433013916 - coarse_loss: 0.8754427433013916 +2024/03/13 17:49:57 - patchstitcher - INFO - Epoch: [12/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.7960880994796753 - coarse_loss: 0.7960880994796753 +2024/03/13 17:51:48 - patchstitcher - INFO - Evaluation Summary: ++----------+-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++----------+-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+ +| 0.973872 | 0.9911502 | 0.9961429 | 0.0544657 | 1.3489431 | 0.0237886 | 0.088933 | 7.8991681 | 0.1474454 | 0.9941621 | ++----------+-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+ +2024/03/13 17:54:10 - patchstitcher - INFO - Epoch: [13/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.2369189262390137 - coarse_loss: 1.2369189262390137 +2024/03/13 17:56:20 - patchstitcher - INFO - Epoch: [13/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 1.072669506072998 - coarse_loss: 1.072669506072998 +2024/03/13 17:58:35 - patchstitcher - INFO - Epoch: [13/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.8937158584594727 - coarse_loss: 0.8937158584594727 +2024/03/13 18:00:47 - patchstitcher - INFO - Epoch: [13/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.7027549147605896 - coarse_loss: 0.7027549147605896 +2024/03/13 18:04:41 - patchstitcher - INFO - Epoch: [14/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.5427314043045044 - coarse_loss: 0.5427314043045044 +2024/03/13 18:06:59 - patchstitcher - INFO - Epoch: [14/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.738332211971283 - coarse_loss: 0.738332211971283 +2024/03/13 18:09:11 - patchstitcher - INFO - Epoch: [14/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.5939739942550659 - coarse_loss: 0.5939739942550659 +2024/03/13 18:11:25 - patchstitcher - INFO - Epoch: [14/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.5172966718673706 - coarse_loss: 0.5172966718673706 +2024/03/13 18:13:17 - patchstitcher - INFO - Evaluation Summary: ++----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| 0.972616 | 0.9911106 | 0.9959747 | 0.0618082 | 1.3711957 | 0.0270541 | 0.0955989 | 8.2175053 | 0.1579217 | 1.0465419 | ++----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +2024/03/13 18:15:38 - patchstitcher - INFO - Epoch: [15/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.5915038585662842 - coarse_loss: 0.5915038585662842 +2024/03/13 18:17:51 - patchstitcher - INFO - Epoch: [15/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.8162057399749756 - coarse_loss: 0.8162057399749756 +2024/03/13 18:20:04 - patchstitcher - INFO - Epoch: [15/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.027492880821228 - coarse_loss: 1.027492880821228 +2024/03/13 18:22:17 - patchstitcher - INFO - Epoch: [15/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.7036874294281006 - coarse_loss: 0.7036874294281006 +2024/03/13 18:26:12 - patchstitcher - INFO - Epoch: [16/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.9338802099227905 - coarse_loss: 0.9338802099227905 +2024/03/13 18:28:27 - patchstitcher - INFO - Epoch: [16/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.6914787888526917 - coarse_loss: 0.6914787888526917 +2024/03/13 18:30:42 - patchstitcher - INFO - Epoch: [16/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.9839471578598022 - coarse_loss: 0.9839471578598022 +2024/03/13 18:32:53 - patchstitcher - INFO - Epoch: [16/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.651856780052185 - coarse_loss: 1.651856780052185 +2024/03/13 18:34:45 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+ +| 0.9757517 | 0.9916734 | 0.9963179 | 0.0496801 | 1.2989587 | 0.021218 | 0.0832603 | 7.7315539 | 0.1475995 | 0.9686595 | ++-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+ +2024/03/13 18:37:03 - patchstitcher - INFO - Epoch: [17/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.5893704295158386 - coarse_loss: 0.5893704295158386 +2024/03/13 18:39:20 - patchstitcher - INFO - Epoch: [17/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.7406035661697388 - coarse_loss: 0.7406035661697388 +2024/03/13 18:41:32 - patchstitcher - INFO - Epoch: [17/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.0807702541351318 - coarse_loss: 1.0807702541351318 +2024/03/13 18:43:49 - patchstitcher - INFO - Epoch: [17/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.1311298608779907 - coarse_loss: 1.1311298608779907 +2024/03/13 18:47:42 - patchstitcher - INFO - Epoch: [18/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.6089723706245422 - coarse_loss: 0.6089723706245422 +2024/03/13 18:49:56 - patchstitcher - INFO - Epoch: [18/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.49315693974494934 - coarse_loss: 0.49315693974494934 +2024/03/13 18:52:07 - patchstitcher - INFO - Epoch: [18/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.765090823173523 - coarse_loss: 0.765090823173523 +2024/03/13 18:54:20 - patchstitcher - INFO - Epoch: [18/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.0061668157577515 - coarse_loss: 1.0061668157577515 +2024/03/13 18:56:15 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+----------+ +| 0.9775608 | 0.9917449 | 0.9962492 | 0.047275 | 1.2713183 | 0.0207713 | 0.0821434 | 7.4744742 | 0.1302942 | 0.914453 | ++-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+-----------+----------+ +2024/03/13 18:58:37 - patchstitcher - INFO - Epoch: [19/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.47735485434532166 - coarse_loss: 0.47735485434532166 +2024/03/13 19:00:47 - patchstitcher - INFO - Epoch: [19/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.477491170167923 - coarse_loss: 0.477491170167923 +2024/03/13 19:03:01 - patchstitcher - INFO - Epoch: [19/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.7433751821517944 - coarse_loss: 0.7433751821517944 +2024/03/13 19:05:14 - patchstitcher - INFO - Epoch: [19/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.4475404620170593 - coarse_loss: 0.4475404620170593 +2024/03/13 19:09:10 - patchstitcher - INFO - Epoch: [20/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 1.0900338888168335 - coarse_loss: 1.0900338888168335 +2024/03/13 19:11:24 - patchstitcher - INFO - Epoch: [20/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.5305348634719849 - coarse_loss: 0.5305348634719849 +2024/03/13 19:13:40 - patchstitcher - INFO - Epoch: [20/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.44258806109428406 - coarse_loss: 0.44258806109428406 +2024/03/13 19:15:51 - patchstitcher - INFO - Epoch: [20/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.8207988739013672 - coarse_loss: 0.8207988739013672 +2024/03/13 19:17:46 - patchstitcher - INFO - Evaluation Summary: ++----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +| 0.977787 | 0.9918466 | 0.9964245 | 0.0419112 | 1.2307948 | 0.0181534 | 0.0777722 | 7.2845711 | 0.1274987 | 0.9021566 | ++----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+ +2024/03/13 19:20:07 - patchstitcher - INFO - Epoch: [21/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.8148857355117798 - coarse_loss: 0.8148857355117798 +2024/03/13 19:22:16 - patchstitcher - INFO - Epoch: [21/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.5273571610450745 - coarse_loss: 0.5273571610450745 +2024/03/13 19:24:29 - patchstitcher - INFO - Epoch: [21/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.4554903507232666 - coarse_loss: 0.4554903507232666 +2024/03/13 19:26:44 - patchstitcher - INFO - Epoch: [21/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.5026299953460693 - coarse_loss: 1.5026299953460693 +2024/03/13 19:30:42 - patchstitcher - INFO - Epoch: [22/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.928625226020813 - coarse_loss: 0.928625226020813 +2024/03/13 19:32:55 - patchstitcher - INFO - Epoch: [22/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.8586921691894531 - coarse_loss: 0.8586921691894531 +2024/03/13 19:35:08 - patchstitcher - INFO - Epoch: [22/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.435793399810791 - coarse_loss: 0.435793399810791 +2024/03/13 19:37:20 - patchstitcher - INFO - Epoch: [22/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.6521605253219604 - coarse_loss: 0.6521605253219604 +2024/03/13 19:39:13 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+ +| 0.9780887 | 0.9918597 | 0.9964257 | 0.0401822 | 1.2286178 | 0.0173981 | 0.07643 | 7.2405782 | 0.1242689 | 0.8875719 | ++-----------+-----------+-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+ +2024/03/13 19:41:35 - patchstitcher - INFO - Epoch: [23/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.47807303071022034 - coarse_loss: 0.47807303071022034 +2024/03/13 19:43:50 - patchstitcher - INFO - Epoch: [23/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.9843131303787231 - coarse_loss: 0.9843131303787231 +2024/03/13 19:45:58 - patchstitcher - INFO - Epoch: [23/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 1.528859257698059 - coarse_loss: 1.528859257698059 +2024/03/13 19:48:15 - patchstitcher - INFO - Epoch: [23/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 1.305454969406128 - coarse_loss: 1.305454969406128 +2024/03/13 19:52:15 - patchstitcher - INFO - Epoch: [24/24] - Step: [00100/00475] - Time: [1/1] - Total Loss: 0.7509616613388062 - coarse_loss: 0.7509616613388062 +2024/03/13 19:54:26 - patchstitcher - INFO - Epoch: [24/24] - Step: [00200/00475] - Time: [1/1] - Total Loss: 0.48038068413734436 - coarse_loss: 0.48038068413734436 +2024/03/13 19:56:45 - patchstitcher - INFO - Epoch: [24/24] - Step: [00300/00475] - Time: [1/1] - Total Loss: 0.3761628270149231 - coarse_loss: 0.3761628270149231 +2024/03/13 19:58:57 - patchstitcher - INFO - Epoch: [24/24] - Step: [00400/00475] - Time: [1/1] - Total Loss: 0.3492358326911926 - coarse_loss: 0.3492358326911926 +2024/03/13 20:00:48 - patchstitcher - INFO - Evaluation Summary: ++-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+---------+ +| a1 | a2 | a3 | abs_rel | rmse | log_10 | rmse_log | silog | sq_rel | see | ++-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+---------+ +| 0.9781844 | 0.9919228 | 0.9964451 | 0.0402917 | 1.221662 | 0.0174114 | 0.0764684 | 7.2314185 | 0.1251314 | 0.89066 | ++-----------+-----------+-----------+-----------+----------+-----------+-----------+-----------+-----------+---------+ +2024/03/13 20:00:48 - patchstitcher - INFO - Saving ckp, but use the inner get_save_dict fuction to get model_dict +2024/03/13 20:00:48 - patchstitcher - INFO - For saving space. Would you like to save coarse model several times? :> +2024/03/13 20:00:52 - patchstitcher - INFO - save checkpoint_24.pth at ./work_dir/coarse_pretrain