collect_input_args = [ 'image_lr', 'crops_image_hr', 'depth_gt', 'crop_depths', 'bboxs', 'image_hr', ] convert_syncbn = True debug = False env_cfg = dict( cudnn_benchmark=True, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='forkserver')) find_unused_parameters = True general_dataloader = dict( batch_size=1, dataset=dict( dataset_name='', gt_dir=None, rgb_image_dir='', type='ImageDataset'), num_workers=2) launcher = 'pytorch' log_name = 'patchfusion' max_depth = 80 min_depth = 0.001 model = dict( coarse_branch=dict( attractor_alpha=1000, attractor_gamma=2, attractor_kind='mean', attractor_type='inv', aug=True, bin_centers_type='softplus', bin_embedding_dim=128, clip_grad=0.1, dataset='nyu', depth_anything=True, distributed=True, do_resize=False, force_keep_ar=True, freeze_midas_bn=True, gpu='NULL', img_size=[ 392, 518, ], inverse_midas=False, log_images_every=0.1, max_depth=80, max_temp=50.0, max_translation=100, memory_efficient=True, midas_model_type='vitb', min_depth=0.001, min_temp=0.0212, model='zoedepth', n_attractors=[ 16, 8, 4, 1, ], n_bins=64, name='ZoeDepth', notes='', output_distribution='logbinomial', prefetch=False, pretrained_resource='local::./work_dir/DepthAnything_vitb.pt', print_losses=False, project='ZoeDepth', random_crop=False, random_translate=False, root='.', save_dir='', shared_dict='NULL', tags='', train_midas=True, translate_prob=0.2, type='DA-ZoeDepth', uid='NULL', use_amp=False, use_pretrained_midas=True, use_shared_dict=False, validate_every=0.25, version_name='v1', workers=16), fine_branch=dict( attractor_alpha=1000, attractor_gamma=2, attractor_kind='mean', attractor_type='inv', aug=True, bin_centers_type='softplus', bin_embedding_dim=128, clip_grad=0.1, dataset='nyu', depth_anything=True, distributed=True, do_resize=False, force_keep_ar=True, freeze_midas_bn=True, gpu='NULL', img_size=[ 392, 518, ], inverse_midas=False, log_images_every=0.1, max_depth=80, max_temp=50.0, max_translation=100, memory_efficient=True, midas_model_type='vitb', min_depth=0.001, min_temp=0.0212, model='zoedepth', n_attractors=[ 16, 8, 4, 1, ], n_bins=64, name='ZoeDepth', notes='', output_distribution='logbinomial', prefetch=False, pretrained_resource='local::./work_dir/DepthAnything_vitb.pt', print_losses=False, project='ZoeDepth', random_crop=False, random_translate=False, root='.', save_dir='', shared_dict='NULL', tags='', train_midas=True, translate_prob=0.2, type='DA-ZoeDepth', uid='NULL', use_amp=False, use_pretrained_midas=True, use_shared_dict=False, validate_every=0.25, version_name='v1', workers=16), guided_fusion=dict( g2l=True, in_channels=[ 32, 128, 128, 128, 128, 128, ], n_channels=5, num_patches=[ 203056, 66304, 16576, 4144, 1036, 266, ], patch_process_shape=( 392, 518, ), type='GuidedFusionPatchFusion'), max_depth=80, min_depth=0.001, patch_process_shape=( 392, 518, ), pretrain_model=[ './work_dir/depthanything_vitb_u4k/coarse_pretrain/checkpoint_24.pth', './work_dir/depthanything_vitb_u4k/fine_pretrain/checkpoint_24.pth', ], sigloss=dict(type='SILogLoss'), type='PatchFusion') optim_wrapper = dict( clip_grad=dict(max_norm=0.1, norm_type=2, type='norm'), optimizer=dict(lr=0.0001, type='AdamW', weight_decay=0.001), paramwise_cfg=dict(bypass_duplicate=True, custom_keys=dict())) param_scheduler = dict( base_momentum=0.85, cycle_momentum=True, div_factor=10, final_div_factor=10000, max_momentum=0.95, pct_start=0.25, three_phase=False) project = 'patchfusion' resume = False tags = [ 'patchfusion', 'da', 'vitb', ] test_in_dataloader = dict( batch_size=1, dataset=dict( data_root='./data/u4k', max_depth=80, min_depth=0.001, mode='infer', split='./data/u4k/splits/test.txt', transform_cfg=dict(network_process_size=[ 384, 512, ]), type='UnrealStereo4kDataset'), num_workers=2) test_out_dataloader = dict( batch_size=1, dataset=dict( data_root='./data/u4k', max_depth=80, min_depth=0.001, mode='infer', split='./data/u4k/splits/test_out.txt', transform_cfg=dict(network_process_size=[ 384, 512, ]), type='UnrealStereo4kDataset'), num_workers=2) train_cfg = dict( eval_start=0, log_interval=100, max_epochs=16, save_checkpoint_interval=16, train_log_img_interval=500, val_interval=2, val_log_img_interval=50, val_type='epoch_base') train_dataloader = dict( batch_size=4, dataset=dict( data_root='./data/u4k', max_depth=80, min_depth=0.001, mode='train', resize_mode='depth-anything', split='./data/u4k/splits/train.txt', transform_cfg=dict( degree=1.0, network_process_size=[ 392, 518, ], random_crop=True, random_crop_size=( 540, 960, )), type='UnrealStereo4kDataset'), num_workers=4) val_dataloader = dict( batch_size=1, dataset=dict( data_root='./data/u4k', max_depth=80, min_depth=0.001, mode='infer', resize_mode='depth-anything', split='./data/u4k/splits/val.txt', transform_cfg=dict( degree=1.0, network_process_size=[ 392, 518, ], random_crop_size=( 540, 960, )), type='UnrealStereo4kDataset'), num_workers=2) work_dir = './work_dir/depthanything_vitb_u4k/patchfusion' zoe_depth_config = dict( attractor_alpha=1000, attractor_gamma=2, attractor_kind='mean', attractor_type='inv', aug=True, bin_centers_type='softplus', bin_embedding_dim=128, clip_grad=0.1, dataset='nyu', depth_anything=True, distributed=True, do_resize=False, force_keep_ar=True, freeze_midas_bn=True, gpu='NULL', img_size=[ 392, 518, ], inverse_midas=False, log_images_every=0.1, max_depth=80, max_temp=50.0, max_translation=100, memory_efficient=True, midas_model_type='vitb', min_depth=0.001, min_temp=0.0212, model='zoedepth', n_attractors=[ 16, 8, 4, 1, ], n_bins=64, name='ZoeDepth', notes='', output_distribution='logbinomial', prefetch=False, pretrained_resource='local::./work_dir/DepthAnything_vitb.pt', print_losses=False, project='ZoeDepth', random_crop=False, random_translate=False, root='.', save_dir='', shared_dict='NULL', tags='', train_midas=True, translate_prob=0.2, type='DA-ZoeDepth', uid='NULL', use_amp=False, use_pretrained_midas=True, use_shared_dict=False, validate_every=0.25, version_name='v1', workers=16)