Uploaded Heidelberg_trichome_testset4

Browse files

Files changed (2) hide show

Heidelberg_trichome_testset4/det_rein_dinov2_mask2former.py +666 -0
Heidelberg_trichome_testset4/dinov2_detector.pth +3 -0

Heidelberg_trichome_testset4/det_rein_dinov2_mask2former.py ADDED Viewed

	@@ -0,0 +1,666 @@

+ReduceOnPlateauLR_patience = 50
+albu_train_transforms = [
+    dict(
+        p=0.25,
+        transforms=[
+            dict(
+                alpha=20,
+                approximate=True,
+                border_mode=0,
+                interpolation=4,
+                mask_value=(
+                    0,
+                    0,
+                    0,
+                ),
+                p=0.5,
+                same_dxdy=True,
+                sigma=15,
+                type='ElasticTransform'),
+            dict(
+                alpha=40,
+                approximate=True,
+                border_mode=0,
+                interpolation=4,
+                mask_value=(
+                    0,
+                    0,
+                    0,
+                ),
+                p=0.5,
+                same_dxdy=False,
+                sigma=15,
+                type='ElasticTransform'),
+        ],
+        type='OneOf'),
+    dict(p=0.05, type='AdvancedBlur'),
+]
+auto_scale_lr = dict(base_batch_size=16, enable=False)
+batch_augments = [
+    dict(
+        img_pad_value=0,
+        mask_pad_value=0,
+        pad_mask=True,
+        pad_seg=False,
+        seg_pad_value=255,
+        size=(
+            512,
+            512,
+        ),
+        type='BatchFixedSizePad'),
+]
+batch_size = 2
+classes = ('trichome', )
+crop_size = (
+    1024,
+    768,
+)
+custom_hooks = [
+    dict(type='NumClassCheckHook'),
+    dict(interval=200, type='MemoryProfilerHook'),
+    dict(interval=200, type='CheckInvalidLossHook'),
+    dict(type='EMAHook'),
+]
+custom_imports = dict(
+    allow_failed_imports=False, imports=[
+        'mmpretrain.models',
+    ])
+data_preprocessor = dict(
+    batch_augments=[
+        dict(
+            img_pad_value=0,
+            mask_pad_value=0,
+            pad_mask=True,
+            pad_seg=False,
+            seg_pad_value=255,
+            size=(
+                512,
+                512,
+            ),
+            type='BatchFixedSizePad'),
+    ],
+    bgr_to_rgb=True,
+    mask_pad_value=0,
+    mean=[
+        123.675,
+        116.28,
+        103.53,
+    ],
+    pad_mask=True,
+    pad_seg=False,
+    pad_size_divisor=32,
+    seg_pad_value=255,
+    std=[
+        58.395,
+        57.12,
+        57.375,
+    ],
+    type='DetDataPreprocessor')
+data_root = 'train/data/Stomata_detection/'
+dataset_type = 'CocoDataset'
+default_hooks = dict(
+    checkpoint=dict(
+        by_epoch=True,
+        interval=9999999999,
+        max_keep_ckpts=1,
+        rule='greater',
+        save_best='coco/segm_mAP',
+        save_last=False,
+        type='CheckpointHook'),
+    early_stopping=dict(
+        monitor='coco/segm_mAP',
+        patience=150,
+        rule='greater',
+        type='EarlyStoppingHook'),
+    logger=dict(interval=200, type='LoggerHook'),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    timer=dict(type='IterTimerHook'),
+    visualization=dict(draw=True, interval=10, type='DetVisualizationHook'))
+default_scope = 'mmdet'
+dinov2_checkpoint = 'train/checkpoints/dinov2_converted.pth'
+early_stopping_patience = 150
+embed_multi = dict(decay_mult=0.0, lr_mult=1.0)
+env_cfg = dict(
+    cudnn_benchmark=False,
+    dist_cfg=dict(backend='nccl'),
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+find_unused_parameters = True
+fp16 = dict(loss_scale='dynamic')
+image_size = (
+    512,
+    512,
+)
+launcher = 'pytorch'
+load_pipeline = [
+    dict(to_float32=True, type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(prob=0.5, type='RandomFlip'),
+    dict(type='PhotoMetricDistortion'),
+    dict(
+        img_border_value=(
+            0,
+            0,
+            0,
+        ),
+        interpolation='lanczos',
+        prob=0.5,
+        type='GeomTransform'),
+    dict(keep_ratio=True, scale=(
+        1024,
+        768,
+    ), type='Resize'),
+]
+log_level = 'INFO'
+log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
+lr = 9.375e-05
+max_epochs = 300
+model = dict(
+    backbone=dict(
+        block_chunks=0,
+        depth=24,
+        embed_dim=1024,
+        ffn_bias=True,
+        ffn_layer='mlp',
+        img_size=512,
+        init_cfg=dict(
+            checkpoint='train/checkpoints/dinov2_converted.pth',
+            type='Pretrained'),
+        init_values=1e-05,
+        mlp_ratio=4,
+        num_heads=16,
+        patch_size=16,
+        proj_bias=True,
+        qkv_bias=True,
+        reins_config=dict(
+            embed_dims=1024,
+            link_token_to_query=True,
+            lora_dim=16,
+            num_layers=24,
+            patch_size=16,
+            token_length=100,
+            type='LoRAReins'),
+        type='ReinsDinoVisionTransformer'),
+    data_preprocessor=dict(
+        batch_augments=[
+            dict(
+                img_pad_value=0,
+                mask_pad_value=0,
+                pad_mask=True,
+                pad_seg=False,
+                seg_pad_value=255,
+                size=(
+                    512,
+                    512,
+                ),
+                type='BatchFixedSizePad'),
+        ],
+        bgr_to_rgb=True,
+        mask_pad_value=0,
+        mean=[
+            123.675,
+            116.28,
+            103.53,
+        ],
+        pad_mask=True,
+        pad_seg=False,
+        pad_size_divisor=32,
+        seg_pad_value=255,
+        std=[
+            58.395,
+            57.12,
+            57.375,
+        ],
+        type='DetDataPreprocessor'),
+    init_cfg=None,
+    panoptic_fusion_head=dict(
+        init_cfg=None,
+        loss_panoptic=None,
+        num_stuff_classes=0,
+        num_things_classes=1,
+        type='MaskFormerFusionHead'),
+    panoptic_head=dict(
+        enforce_decoder_input_project=False,
+        feat_channels=256,
+        in_channels=[
+            1024,
+            1024,
+            1024,
+            1024,
+        ],
+        loss_cls=dict(
+            class_weight=[
+                1.0,
+                0.1,
+            ],
+            loss_weight=2.0,
+            reduction='mean',
+            type='CrossEntropyLoss',
+            use_sigmoid=False),
+        loss_dice=dict(
+            activate=True,
+            eps=1.0,
+            loss_weight=5.0,
+            naive_dice=True,
+            reduction='mean',
+            type='DiceLoss',
+            use_sigmoid=True),
+        loss_mask=dict(
+            loss_weight=5.0,
+            reduction='mean',
+            type='CrossEntropyLoss',
+            use_sigmoid=True),
+        num_queries=100,
+        num_stuff_classes=0,
+        num_things_classes=1,
+        num_transformer_feat_level=3,
+        out_channels=256,
+        pixel_decoder=dict(
+            act_cfg=dict(type='ReLU'),
+            encoder=dict(
+                layer_cfg=dict(
+                    ffn_cfg=dict(
+                        act_cfg=dict(inplace=True, type='ReLU'),
+                        embed_dims=256,
+                        feedforward_channels=1024,
+                        ffn_drop=0.0,
+                        num_fcs=2),
+                    self_attn_cfg=dict(
+                        batch_first=True,
+                        dropout=0.0,
+                        embed_dims=256,
+                        num_heads=8,
+                        num_levels=3,
+                        num_points=4)),
+                num_layers=6),
+            norm_cfg=dict(num_groups=32, type='GN'),
+            num_outs=3,
+            positional_encoding=dict(normalize=True, num_feats=128),
+            type='MSDeformAttnPixelDecoder'),
+        positional_encoding=dict(normalize=True, num_feats=128),
+        strides=[
+            4,
+            8,
+            16,
+            32,
+        ],
+        transformer_decoder=dict(
+            init_cfg=None,
+            layer_cfg=dict(
+                cross_attn_cfg=dict(
+                    batch_first=True, dropout=0.0, embed_dims=256,
+                    num_heads=8),
+                ffn_cfg=dict(
+                    act_cfg=dict(inplace=True, type='ReLU'),
+                    embed_dims=256,
+                    feedforward_channels=2048,
+                    ffn_drop=0.0,
+                    num_fcs=2),
+                self_attn_cfg=dict(
+                    batch_first=True, dropout=0.0, embed_dims=256,
+                    num_heads=8)),
+            num_layers=9,
+            return_intermediate=True),
+        type='ReinMask2FormerHead'),
+    test_cfg=dict(
+        filter_low_score=True,
+        instance_on=True,
+        iou_thr=0.8,
+        max_per_image=100,
+        panoptic_on=False,
+        semantic_on=False),
+    train_cfg=dict(
+        assigner=dict(
+            match_costs=[
+                dict(type='ClassificationCost', weight=2.0),
+                dict(
+                    type='CrossEntropyLossCost', use_sigmoid=True, weight=5.0),
+                dict(eps=1.0, pred_act=True, type='DiceCost', weight=5.0),
+            ],
+            type='HungarianAssigner'),
+        importance_sample_ratio=0.75,
+        num_points=12544,
+        oversample_ratio=3.0,
+        sampler=dict(type='MaskPseudoSampler')),
+    type='Mask2Former')
+n_gpus = 6
+num_classes = 1
+num_stuff_classes = 0
+num_things_classes = 1
+num_workers = 16
+optim_wrapper = dict(
+    clip_grad=dict(max_norm=0.01, norm_type=2),
+    constructor='PEFTOptimWrapperConstructor',
+    optimizer=dict(
+        betas=(
+            0.9,
+            0.999,
+        ),
+        eps=1e-08,
+        lr=9.375e-05,
+        type='AdamW',
+        weight_decay=0.05),
+    paramwise_cfg=dict(
+        custom_keys=dict(
+            backbone=dict(decay_mult=1.0, lr_mult=0.1),
+            level_embed=dict(decay_mult=0.0, lr_mult=1.0),
+            query_embed=dict(decay_mult=0.0, lr_mult=1.0),
+            query_feat=dict(decay_mult=0.0, lr_mult=1.0)),
+        norm_decay_mult=0.0),
+    type='OptimWrapper')
+optimizer_config = dict(
+    cumulative_iters=4, type='GradientCumulativeOptimizerHook')
+original_batch_size = 16
+original_lr = 0.0001
+original_n_gpus = 8
+output_dir = '2025.04.14_Heidelberg_4testset4'
+param_scheduler = [
+    dict(
+        begin=0,
+        by_epoch=True,
+        convert_to_iter_based=True,
+        end=30,
+        end_factor=1.0,
+        start_factor=0.001,
+        type='LinearLR',
+        verbose=False),
+    dict(
+        T_max=270,
+        begin=30,
+        by_epoch=True,
+        convert_to_iter_based=True,
+        end=300,
+        eta_min=9.375e-07,
+        eta_min_ratio=None,
+        type='CosineAnnealingLR',
+        verbose=False),
+    dict(
+        by_epoch=True,
+        factor=0.75,
+        monitor='coco/bbox_mAP',
+        patience=50,
+        rule='greater',
+        type='ReduceOnPlateauLR',
+        verbose=False),
+]
+randomness = dict(deterministic=False, seed=42)
+resume = None
+test_cfg = dict(type='ValLoop')
+test_dataloader = dict(
+    batch_size=2,
+    dataset=dict(
+        ann_file='val_sahi/sahi_coco.json',
+        backend_args=None,
+        data_prefix=dict(img='val_sahi/', seg='annotations/panoptic_val2017/'),
+        data_root='train/data/Stomata_detection/',
+        metainfo=dict(classes=('trichome', )),
+        pipeline=[
+            dict(to_float32=True, type='LoadImageFromFile'),
+            dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+            dict(keep_ratio=True, scale=(
+                1024,
+                768,
+            ), type='Resize'),
+            dict(
+                meta_keys=(
+                    'img_id',
+                    'img_path',
+                    'img',
+                    'img_shape',
+                    'ori_shape',
+                    'scale_factor',
+                    'gt_bboxes',
+                    'gt_ignore_flags',
+                    'gt_bboxes_labels',
+                    'gt_masks',
+                ),
+                type='PackDetInputs'),
+        ],
+        test_mode=False,
+        type='CocoDataset'),
+    drop_last=False,
+    num_workers=16,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+    ann_file='train/data/Stomata_detection/val_sahi/sahi_coco.json',
+    backend_args=None,
+    format_only=False,
+    metric=[
+        'bbox',
+        'segm',
+    ],
+    type='CocoMetric')
+test_pipeline = [
+    dict(to_float32=True, type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(keep_ratio=True, scale=(
+        1024,
+        768,
+    ), type='Resize'),
+    dict(
+        meta_keys=(
+            'img_id',
+            'img_path',
+            'img',
+            'img_shape',
+            'ori_shape',
+            'scale_factor',
+            'gt_bboxes',
+            'gt_ignore_flags',
+            'gt_bboxes_labels',
+            'gt_masks',
+        ),
+        type='PackDetInputs'),
+]
+train_ann_file = 'train_sahi/sahi_coco.json'
+train_cfg = dict(max_epochs=300, type='EpochBasedTrainLoop', val_interval=1)
+train_dataloader = dict(
+    batch_sampler=dict(type='AspectRatioBatchSampler'),
+    batch_size=2,
+    dataset=dict(
+        dataset=dict(
+            ann_file='train_sahi/sahi_coco.json',
+            backend_args=None,
+            data_prefix=dict(
+                img='train_sahi/', seg='annotations/panoptic_train2017/'),
+            data_root='train/data/Stomata_detection/',
+            filter_cfg=dict(filter_empty_gt=True, min_size=32),
+            metainfo=dict(classes=('trichome', )),
+            pipeline=[
+                dict(to_float32=True, type='LoadImageFromFile'),
+                dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+                dict(prob=0.5, type='RandomFlip'),
+                dict(type='PhotoMetricDistortion'),
+                dict(
+                    img_border_value=(
+                        0,
+                        0,
+                        0,
+                    ),
+                    interpolation='lanczos',
+                    prob=0.5,
+                    type='GeomTransform'),
+                dict(keep_ratio=True, scale=(
+                    1024,
+                    768,
+                ), type='Resize'),
+            ],
+            type='CocoDataset'),
+        pipeline=[
+            dict(
+                bbox_occluded_thr=50,
+                mask_occluded_thr=1000,
+                max_num_pasted=5,
+                paste_by_box=False,
+                selected=True,
+                type='CopyPaste'),
+            dict(
+                by_mask=True,
+                min_gt_bbox_wh=(
+                    10,
+                    10,
+                ),
+                min_gt_mask_area=10,
+                type='FilterAnnotations'),
+            dict(
+                meta_keys=(
+                    'img_path',
+                    'img',
+                    'gt_bboxes',
+                    'gt_ignore_flags',
+                    'gt_bboxes_labels',
+                    'gt_masks',
+                ),
+                type='PackDetInputs'),
+        ],
+        type='MultiImageMixDataset'),
+    num_workers=16,
+    persistent_workers=True,
+    sampler=dict(shuffle=True, type='DefaultSampler'))
+train_dataset = dict(
+    dataset=dict(
+        ann_file='train_sahi/sahi_coco.json',
+        backend_args=None,
+        data_prefix=dict(
+            img='train_sahi/', seg='annotations/panoptic_train2017/'),
+        data_root='train/data/Stomata_detection/',
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        metainfo=dict(classes=('trichome', )),
+        pipeline=[
+            dict(to_float32=True, type='LoadImageFromFile'),
+            dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+            dict(prob=0.5, type='RandomFlip'),
+            dict(type='PhotoMetricDistortion'),
+            dict(
+                img_border_value=(
+                    0,
+                    0,
+                    0,
+                ),
+                interpolation='lanczos',
+                prob=0.5,
+                type='GeomTransform'),
+            dict(keep_ratio=True, scale=(
+                1024,
+                768,
+            ), type='Resize'),
+        ],
+        type='CocoDataset'),
+    pipeline=[
+        dict(
+            bbox_occluded_thr=50,
+            mask_occluded_thr=1000,
+            max_num_pasted=5,
+            paste_by_box=False,
+            selected=True,
+            type='CopyPaste'),
+        dict(
+            by_mask=True,
+            min_gt_bbox_wh=(
+                10,
+                10,
+            ),
+            min_gt_mask_area=10,
+            type='FilterAnnotations'),
+        dict(
+            meta_keys=(
+                'img_path',
+                'img',
+                'gt_bboxes',
+                'gt_ignore_flags',
+                'gt_bboxes_labels',
+                'gt_masks',
+            ),
+            type='PackDetInputs'),
+    ],
+    type='MultiImageMixDataset')
+train_pipeline = [
+    dict(
+        bbox_occluded_thr=50,
+        mask_occluded_thr=1000,
+        max_num_pasted=5,
+        paste_by_box=False,
+        selected=True,
+        type='CopyPaste'),
+    dict(
+        by_mask=True,
+        min_gt_bbox_wh=(
+            10,
+            10,
+        ),
+        min_gt_mask_area=10,
+        type='FilterAnnotations'),
+    dict(
+        meta_keys=(
+            'img_path',
+            'img',
+            'gt_bboxes',
+            'gt_ignore_flags',
+            'gt_bboxes_labels',
+            'gt_masks',
+        ),
+        type='PackDetInputs'),
+]
+val_ann_file = 'val_sahi/sahi_coco.json'
+val_cfg = dict(type='ValLoop')
+val_dataloader = dict(
+    batch_size=2,
+    dataset=dict(
+        ann_file='val_sahi/sahi_coco.json',
+        backend_args=None,
+        data_prefix=dict(img='val_sahi/', seg='annotations/panoptic_val2017/'),
+        data_root='train/data/Stomata_detection/',
+        metainfo=dict(classes=('trichome', )),
+        pipeline=[
+            dict(to_float32=True, type='LoadImageFromFile'),
+            dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+            dict(keep_ratio=True, scale=(
+                1024,
+                768,
+            ), type='Resize'),
+            dict(
+                meta_keys=(
+                    'img_id',
+                    'img_path',
+                    'img',
+                    'img_shape',
+                    'ori_shape',
+                    'scale_factor',
+                    'gt_bboxes',
+                    'gt_ignore_flags',
+                    'gt_bboxes_labels',
+                    'gt_masks',
+                ),
+                type='PackDetInputs'),
+        ],
+        test_mode=False,
+        type='CocoDataset'),
+    drop_last=False,
+    num_workers=16,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+    ann_file='train/data/Stomata_detection/val_sahi/sahi_coco.json',
+    backend_args=None,
+    format_only=False,
+    metric=[
+        'bbox',
+        'segm',
+    ],
+    type='CocoMetric')
+val_interval = 1
+visualizer = dict(
+    name='visualizer',
+    type='DetLocalVisualizer',
+    vis_backends=[
+        dict(type='LocalVisBackend'),
+        dict(
+            init_kwargs=dict(
+                name='2025.04.14_Heidelberg_4testset4', project='StomataPy'),
+            type='WandbVisBackend'),
+    ])
+wandb_project = 'StomataPy'
+warmup_epochs = 30
+with_cp = True
+work_dir = 'Models//2025.04.14_Heidelberg_4testset4'

Heidelberg_trichome_testset4/dinov2_detector.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:886170c3cf88b6ef444eedb0a251f670cb40c17a0c79930de2811deb4d9c36a9
+size 1412415811