Spaces:

ECCV2022
/

PSG

Build error

App Files Files Community

Liangyu commited on Aug 9, 2022

Commit

c7f0cc1

1 Parent(s): 1e03b30

add functions

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

OpenPSG/checkpoints/epoch_60.pth +3 -0
OpenPSG/configs/_base_/custom_runtime.py +17 -0
OpenPSG/configs/_base_/datasets/psg.py +93 -0
OpenPSG/configs/_base_/datasets/psg_panoptic.py +72 -0
OpenPSG/configs/_base_/datasets/vg_detection.py +56 -0
OpenPSG/configs/_base_/datasets/vg_sg.py +57 -0
OpenPSG/configs/_base_/models/detr4seg_r101.py +64 -0
OpenPSG/configs/_base_/models/detr4seg_r101_psg.py +137 -0
OpenPSG/configs/_base_/models/detr4seg_r50.py +65 -0
OpenPSG/configs/_base_/models/detr4seg_r50_psg.py +152 -0
OpenPSG/configs/_base_/models/detr_r50.py +64 -0
OpenPSG/configs/_base_/models/mask_rcnn_r50_fpn.py +107 -0
OpenPSG/configs/_base_/models/panoptic_fpn_r101_fpn_psg.py +8 -0
OpenPSG/configs/_base_/models/panoptic_fpn_r50_fpn_psg.py +74 -0
OpenPSG/configs/_base_/models/psgtr_r101.py +5 -0
OpenPSG/configs/_base_/models/psgtr_r50.py +82 -0
OpenPSG/configs/_base_/schedules/schedule_1x.py +10 -0
OpenPSG/configs/_base_/schedules/schedule_3x.py +10 -0
OpenPSG/configs/gpsnet/panoptic_fpn_r101_fpn_1x_predcls_psg.py +26 -0
OpenPSG/configs/gpsnet/panoptic_fpn_r101_fpn_1x_sgdet_psg.py +26 -0
OpenPSG/configs/gpsnet/panoptic_fpn_r50_fpn_1x_predcls_psg.py +41 -0
OpenPSG/configs/gpsnet/panoptic_fpn_r50_fpn_1x_sgdet_psg.py +45 -0
OpenPSG/configs/imp/panoptic_fpn_r101_fpn_1x_predcls_psg.py +28 -0
OpenPSG/configs/imp/panoptic_fpn_r101_fpn_1x_sgdet_psg.py +26 -0
OpenPSG/configs/imp/panoptic_fpn_r50_fpn_1x_predcls_psg.py +44 -0
OpenPSG/configs/imp/panoptic_fpn_r50_fpn_1x_sgdet_psg.py +48 -0
OpenPSG/configs/motifs/panoptic_fpn_r101_fpn_1x_predcls_psg.py +28 -0
OpenPSG/configs/motifs/panoptic_fpn_r101_fpn_1x_sgdet_psg.py +28 -0
OpenPSG/configs/motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py +241 -0
OpenPSG/configs/motifs/panoptic_fpn_r50_fpn_1x_sgdet_psg.py +44 -0
OpenPSG/configs/psgformer/psgformer_r101_psg.py +16 -0
OpenPSG/configs/psgformer/psgformer_r50.py +96 -0
OpenPSG/configs/psgformer/psgformer_r50_psg.py +244 -0
OpenPSG/configs/psgformer/psgformer_r50_psg_inference.py +31 -0
OpenPSG/configs/psgtr/psgtr_r101_psg.py +231 -0
OpenPSG/configs/psgtr/psgtr_r50.py +82 -0
OpenPSG/configs/psgtr/psgtr_r50_psg.py +233 -0
OpenPSG/configs/psgtr/psgtr_r50_psg_inference.py +31 -0
OpenPSG/configs/vctree/panoptic_fpn_r101_fpn_1x_predcls_psg.py +28 -0
OpenPSG/configs/vctree/panoptic_fpn_r101_fpn_1x_sgdet_psg.py +28 -0
OpenPSG/configs/vctree/panoptic_fpn_r50_fpn_1x_predcls_psg.py +43 -0
OpenPSG/configs/vctree/panoptic_fpn_r50_fpn_1x_sgdet_psg.py +49 -0
README.md +3 -3
app.py +133 -13
fake_gan.py +56 -0
images/cooking.jpg +0 -0
images/forrest-gump.jpg +0 -0
images/friends.jpg +0 -0
images/mbappe.jpg +0 -0
images/messi.jpg +0 -0

OpenPSG/checkpoints/epoch_60.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c4ddcbda74686568b7e6b8145f7f33030407e27e390c37c23206f95c51829ed
+size 531751994

OpenPSG/configs/_base_/custom_runtime.py ADDED Viewed

	@@ -0,0 +1,17 @@

+checkpoint_config = dict(interval=1, max_keep_ckpts=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+custom_hooks = [dict(type='NumClassCheckHook')]
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1), ('val', 1)]

OpenPSG/configs/_base_/datasets/psg.py ADDED Viewed

	@@ -0,0 +1,93 @@

+# dataset settings
+dataset_type = 'PanopticSceneGraphDataset'
+ann_file = './data/psg/psg.json'
+coco_root = 'data/coco'
+img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
+                    std=[58.395, 57.12, 57.375],
+                    to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadPanopticSceneGraphAnnotations',
+        with_bbox=True,
+        with_rel=True,
+        with_mask=True,
+        with_seg=True,
+    ),
+    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='SegRescale', scale_factor=1 / 4),
+    dict(type='SceneGraphFormatBundle'),
+    dict(
+        type='Collect',
+        keys=[
+            'img',
+            'gt_bboxes',
+            'gt_labels',
+            'gt_rels',
+            'gt_relmaps',
+            'gt_masks',
+            'gt_semantic_seg',
+        ],
+    ),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    # Since the forward process may need gt info, annos must be loaded.
+    dict(type='LoadPanopticSceneGraphAnnotations',
+         with_bbox=True,
+         with_rel=True),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            # NOTE: Do not change the img to DC.
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='ToTensor', keys=['gt_bboxes', 'gt_labels']),
+            dict(
+                type='ToDataContainer',
+                fields=(dict(key='gt_bboxes'), dict(key='gt_labels')),
+            ),
+            dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+        ],
+    ),
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=ann_file,
+        img_prefix=coco_root,
+        seg_prefix=coco_root,
+        pipeline=train_pipeline,
+        split='train',
+        all_bboxes=True,
+    ),
+    val=dict(
+        type=dataset_type,
+        ann_file=ann_file,
+        img_prefix=coco_root,
+        seg_prefix=coco_root,
+        pipeline=test_pipeline,
+        split='test',
+        all_bboxes=True,
+    ),
+    test=dict(
+        type=dataset_type,
+        ann_file=ann_file,
+        img_prefix=coco_root,
+        seg_prefix=coco_root,
+        pipeline=test_pipeline,
+        split='test',
+        all_bboxes=True,
+    ),
+)

OpenPSG/configs/_base_/datasets/psg_panoptic.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# dataset settings
+dataset_type = 'PanopticSceneGraphDataset'
+ann_file = './data/psg/psg.json'
+coco_root = './data/coco'
+img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
+                    std=[58.395, 57.12, 57.375],
+                    to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadPanopticSceneGraphAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        with_seg=True,
+    ),
+    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='SegRescale', scale_factor=1 / 4),
+    dict(type='DefaultFormatBundle'),
+    dict(
+        type='Collect',
+        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg'],
+    ),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ],
+    ),
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=ann_file,
+        img_prefix=coco_root,
+        seg_prefix=coco_root,
+        pipeline=train_pipeline,
+        split='train',
+    ),
+    val=dict(
+        type=dataset_type,
+        ann_file=ann_file,
+        img_prefix=coco_root,
+        seg_prefix=coco_root,
+        pipeline=test_pipeline,
+        split='test',
+    ),
+    test=dict(
+        type=dataset_type,
+        ann_file=ann_file,
+        img_prefix=coco_root,
+        seg_prefix=coco_root,
+        pipeline=test_pipeline,
+        split='test',
+    ),
+)
+evaluation = dict(interval=1, metric='PQ')

OpenPSG/configs/_base_/datasets/vg_detection.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# dataset settings
+custom_imports = dict(imports=[
+    'openpsg.datasets',
+    'openpsg.datasets.pipelines',
+],
+                      allow_failed_imports=False)
+dataset_type = 'SceneGraphDataset'
+ann_file = 'data/vg/data_openpsg.json'
+img_dir = 'data/vg/VG_100K'
+img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
+                    std=[58.395, 57.12, 57.375],
+                    to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadSceneGraphAnnotations', with_bbox=True),
+    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='MultiScaleFlipAug',
+         img_scale=(1333, 800),
+         flip=False,
+         transforms=[
+             dict(type='Resize', keep_ratio=True),
+             dict(type='RandomFlip'),
+             dict(type='Normalize', **img_norm_cfg),
+             dict(type='Pad', size_divisor=32),
+             dict(type='ImageToTensor', keys=['img']),
+             dict(type='Collect', keys=['img']),
+         ])
+]
+data = dict(samples_per_gpu=2,
+            workers_per_gpu=2,
+            train=dict(type=dataset_type,
+                       ann_file=ann_file,
+                       img_prefix=img_dir,
+                       pipeline=train_pipeline,
+                       split='train'),
+            val=dict(type=dataset_type,
+                     ann_file=ann_file,
+                     img_prefix=img_dir,
+                     pipeline=test_pipeline,
+                     split='test'),
+            test=dict(type=dataset_type,
+                      ann_file=ann_file,
+                      img_prefix=img_dir,
+                      pipeline=test_pipeline,
+                      split='test'))
+evaluation = dict(interval=1, metric='bbox')

OpenPSG/configs/_base_/datasets/vg_sg.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# dataset settings
+dataset_type = 'SceneGraphDataset'
+ann_file = '/mnt/ssd/gzj/data/VisualGenome/data_openpsg.json'
+img_dir = '/mnt/ssd/gzj/data/VisualGenome/VG_100K'
+img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
+                    std=[58.395, 57.12, 57.375],
+                    to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadSceneGraphAnnotations', with_bbox=True, with_rel=True),
+    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='SceneGraphFormatBundle'),
+    dict(type='Collect',
+         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_rels', 'gt_relmaps']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    # Since the forward process may need gt info, annos must be loaded.
+    dict(type='LoadSceneGraphAnnotations', with_bbox=True, with_rel=True),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            # NOTE: Do not change the img to DC.
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='ToTensor', keys=['gt_bboxes', 'gt_labels']),
+            dict(type='ToDataContainer',
+                 fields=(dict(key='gt_bboxes'), dict(key='gt_labels'))),
+            dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+        ])
+]
+data = dict(samples_per_gpu=2,
+            workers_per_gpu=2,
+            train=dict(type=dataset_type,
+                       ann_file=ann_file,
+                       img_prefix=img_dir,
+                       pipeline=train_pipeline,
+                       split='train'),
+            val=dict(type=dataset_type,
+                     ann_file=ann_file,
+                     img_prefix=img_dir,
+                     pipeline=test_pipeline,
+                     split='test'),
+            test=dict(type=dataset_type,
+                      ann_file=ann_file,
+                      img_prefix=img_dir,
+                      pipeline=test_pipeline,
+                      split='test'))

OpenPSG/configs/_base_/models/detr4seg_r101.py ADDED Viewed

	@@ -0,0 +1,64 @@

+model = dict(
+    type='DETR4seg',
+    backbone=dict(type='ResNet',
+                  depth=101,
+                  num_stages=4,
+                  out_indices=(0, 1, 2, 3),
+                  frozen_stages=1,
+                  norm_cfg=dict(type='BN', requires_grad=False),
+                  norm_eval=True,
+                  style='pytorch',
+                  init_cfg=dict(type='Pretrained',
+                                checkpoint='torchvision://resnet101')),
+    bbox_head=dict(type='detr4segHead',
+                   num_classes=80,
+                   in_channels=2048,
+                   transformer=dict(
+                       type='Transformer',
+                       encoder=dict(type='DetrTransformerEncoder',
+                                    num_layers=6,
+                                    transformerlayers=dict(
+                                        type='BaseTransformerLayer',
+                                        attn_cfgs=[
+                                            dict(type='MultiheadAttention',
+                                                 embed_dims=256,
+                                                 num_heads=8,
+                                                 dropout=0.1)
+                                        ],
+                                        feedforward_channels=2048,
+                                        ffn_dropout=0.1,
+                                        operation_order=('self_attn', 'norm',
+                                                         'ffn', 'norm'))),
+                       decoder=dict(
+                           type='DetrTransformerDecoder',
+                           return_intermediate=True,
+                           num_layers=6,
+                           transformerlayers=dict(
+                               type='DetrTransformerDecoderLayer',
+                               attn_cfgs=dict(type='MultiheadAttention',
+                                              embed_dims=256,
+                                              num_heads=8,
+                                              dropout=0.1),
+                               feedforward_channels=2048,
+                               ffn_dropout=0.1,
+                               operation_order=('self_attn', 'norm',
+                                                'cross_attn', 'norm', 'ffn',
+                                                'norm')),
+                       )),
+                   positional_encoding=dict(type='SinePositionalEncoding',
+                                            num_feats=128,
+                                            normalize=True),
+                   loss_cls=dict(type='CrossEntropyLoss',
+                                 use_sigmoid=False,
+                                 loss_weight=1.0,
+                                 class_weight=1.0),
+                   loss_bbox=dict(type='L1Loss', loss_weight=5.0),
+                   loss_iou=dict(type='GIoULoss', loss_weight=2.0),
+                   dice_loss=dict(type='DiceLoss', loss_weight=1.0)),
+    # training and testing settings
+    train_cfg=dict(assigner=dict(
+        type='HungarianAssigner',
+        cls_cost=dict(type='ClassificationCost', weight=1.),
+        reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
+        iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
+    test_cfg=dict(max_per_img=100))

OpenPSG/configs/_base_/models/detr4seg_r101_psg.py ADDED Viewed

	@@ -0,0 +1,137 @@

+_base_ = [
+    '../_base_/models/detr4seg_r101.py', '../_base_/datasets/psg.py',
+    '../_base_/custom_runtime.py'
+]
+custom_imports = dict(imports=[
+    'openpsg.models.frameworks.detr4seg',
+    'openpsg.models.relation_heads.detr4seg_head', 'openpsg.datasets',
+    'openpsg.datasets.pipelines.loading',
+    'openpsg.datasets.pipelines.rel_randomcrop',
+    'openpsg.models.relation_heads.approaches.matcher',
+    'openpsg.models.losses.seg_losses'
+],
+                      allow_failed_imports=False)
+object_classes = [
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
+    'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
+    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+    'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
+    'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
+    'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+    'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
+    'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+    'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+    'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
+    'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
+    'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
+    'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea',
+    'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone',
+    'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other',
+    'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
+    'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
+    'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged',
+    'food-other-merged', 'building-other-merged', 'rock-merged',
+    'wall-other-merged', 'rug-merged'
+]
+model = dict(bbox_head=dict(
+    num_classes=len(object_classes),
+    object_classes=object_classes,
+))
+img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
+                    std=[58.395, 57.12, 57.375],
+                    to_rgb=True)
+# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
+# from the default setting in mmdet.
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadSceneGraphAnnotations', with_bbox=True, with_rel=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(
+        type='AutoAugment',
+        policies=[
+            [
+                dict(type='Resize',
+                     img_scale=[(480, 1333), (512, 1333), (544, 1333),
+                                (576, 1333), (608, 1333), (640, 1333),
+                                (672, 1333), (704, 1333), (736, 1333),
+                                (768, 1333), (800, 1333)],
+                     multiscale_mode='value',
+                     keep_ratio=True)
+            ],
+            [
+                dict(type='Resize',
+                     img_scale=[(400, 1333), (500, 1333), (600, 1333)],
+                     multiscale_mode='value',
+                     keep_ratio=True),
+                dict(type='RandomCrop',
+                     crop_type='absolute_range',
+                     crop_size=(384, 600),
+                     allow_negative_crop=False),  # no empty relations
+                dict(type='Resize',
+                     img_scale=[(480, 1333), (512, 1333), (544, 1333),
+                                (576, 1333), (608, 1333), (640, 1333),
+                                (672, 1333), (704, 1333), (736, 1333),
+                                (768, 1333), (800, 1333)],
+                     multiscale_mode='value',
+                     override=True,
+                     keep_ratio=True)
+            ]
+        ]),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=1),
+    dict(type='RelsFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+]
+# test_pipeline, NOTE the Pad's size_divisor is different from the default
+# setting (size_divisor=32). While there is little effect on the performance
+# whether we use the default setting or use size_divisor=1.
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='MultiScaleFlipAug',
+         img_scale=(1333, 800),
+         flip=False,
+         transforms=[
+             dict(type='Resize', keep_ratio=True),
+             dict(type='RandomFlip'),
+             dict(type='Normalize', **img_norm_cfg),
+             dict(type='Pad', size_divisor=1),
+             dict(type='ImageToTensor', keys=['img']),
+             dict(type='Collect', keys=['img'])
+         ])
+]
+data = dict(samples_per_gpu=2,
+            workers_per_gpu=2,
+            train=dict(pipeline=train_pipeline),
+            val=dict(pipeline=test_pipeline),
+            test=dict(pipeline=test_pipeline))
+# optimizer
+optimizer = dict(
+    type='AdamW',
+    lr=0.0001,
+    weight_decay=0.0001,
+    paramwise_cfg=dict(
+        custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
+optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
+# learning policy
+lr_config = dict(policy='step', step=110)
+runner = dict(type='EpochBasedRunner', max_epochs=150)
+project_name = 'detr4seg'
+expt_name = 'detr4seg_r101_coco'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')],
+)
+load_from = '/mnt/ssd/gzj/test/OpenPSG/detr_r50_fb_origin.pth'

OpenPSG/configs/_base_/models/detr4seg_r50.py ADDED Viewed

	@@ -0,0 +1,65 @@

+model = dict(
+    type='DETR4seg',
+    backbone=dict(type='ResNet',
+                  depth=50,
+                  num_stages=4,
+                  out_indices=(0, 1, 2, 3),
+                  frozen_stages=1,
+                  norm_cfg=dict(type='BN', requires_grad=False),
+                  norm_eval=True,
+                  style='pytorch',
+                  init_cfg=dict(type='Pretrained',
+                                checkpoint='torchvision://resnet50')),
+    bbox_head=dict(type='detr4segHead',
+                   num_classes=80,
+                   in_channels=2048,
+                   transformer=dict(
+                       type='Transformer',
+                       encoder=dict(type='DetrTransformerEncoder',
+                                    num_layers=6,
+                                    transformerlayers=dict(
+                                        type='BaseTransformerLayer',
+                                        attn_cfgs=[
+                                            dict(type='MultiheadAttention',
+                                                 embed_dims=256,
+                                                 num_heads=8,
+                                                 dropout=0.1)
+                                        ],
+                                        feedforward_channels=2048,
+                                        ffn_dropout=0.1,
+                                        operation_order=('self_attn', 'norm',
+                                                         'ffn', 'norm'))),
+                       decoder=dict(
+                           type='DetrTransformerDecoder',
+                           return_intermediate=True,
+                           num_layers=6,
+                           transformerlayers=dict(
+                               type='DetrTransformerDecoderLayer',
+                               attn_cfgs=dict(type='MultiheadAttention',
+                                              embed_dims=256,
+                                              num_heads=8,
+                                              dropout=0.1),
+                               feedforward_channels=2048,
+                               ffn_dropout=0.1,
+                               operation_order=('self_attn', 'norm',
+                                                'cross_attn', 'norm', 'ffn',
+                                                'norm')),
+                       )),
+                   positional_encoding=dict(type='SinePositionalEncoding',
+                                            num_feats=128,
+                                            normalize=True),
+                   loss_cls=dict(type='CrossEntropyLoss',
+                                 use_sigmoid=False,
+                                 loss_weight=1.0,
+                                 class_weight=1.0),
+                   loss_bbox=dict(type='L1Loss', loss_weight=5.0),
+                   loss_iou=dict(type='GIoULoss', loss_weight=2.0),
+                   focal_loss=dict(type='BCEFocalLoss', loss_weight=1.0),
+                   dice_loss=dict(type='psgtrDiceLoss', loss_weight=1.0)),
+    # training and testing settings
+    train_cfg=dict(assigner=dict(
+        type='HungarianAssigner',
+        cls_cost=dict(type='ClassificationCost', weight=1.),
+        reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
+        iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
+    test_cfg=dict(max_per_img=100))

OpenPSG/configs/_base_/models/detr4seg_r50_psg.py ADDED Viewed

	@@ -0,0 +1,152 @@

+_base_ = ['./detr4seg_r50.py', '../datasets/psg.py', '../custom_runtime.py']
+custom_imports = dict(imports=[
+    'openpsg.models.frameworks.detr4seg',
+    'openpsg.models.relation_heads.detr4seg_head', 'openpsg.datasets',
+    'openpsg.datasets.pipelines.loading',
+    'openpsg.datasets.pipelines.rel_randomcrop',
+    'openpsg.models.relation_heads.approaches.matcher',
+    'openpsg.models.losses.seg_losses'
+],
+                      allow_failed_imports=False)
+object_classes = [
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
+    'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
+    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+    'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
+    'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
+    'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+    'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
+    'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+    'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+    'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
+    'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
+    'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
+    'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea',
+    'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone',
+    'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other',
+    'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
+    'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
+    'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged',
+    'food-other-merged', 'building-other-merged', 'rock-merged',
+    'wall-other-merged', 'rug-merged'
+]
+model = dict(bbox_head=dict(
+    num_classes=len(object_classes),
+    object_classes=object_classes,
+))
+img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
+                    std=[58.395, 57.12, 57.375],
+                    to_rgb=True)
+# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
+# from the default setting in mmdet.
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadPanopticSceneGraphAnnotations',
+         with_bbox=True,
+         with_mask=True,
+         with_seg=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(
+        type='AutoAugment',
+        policies=[
+            [
+                dict(type='Resize',
+                     img_scale=[(480, 1333), (512, 1333), (544, 1333),
+                                (576, 1333), (608, 1333), (640, 1333),
+                                (672, 1333), (704, 1333), (736, 1333),
+                                (768, 1333), (800, 1333)],
+                     multiscale_mode='value',
+                     keep_ratio=True)
+            ],
+            [
+                dict(type='Resize',
+                     img_scale=[(400, 1333), (500, 1333), (600, 1333)],
+                     multiscale_mode='value',
+                     keep_ratio=True),
+                dict(type='RandomCrop',
+                     crop_type='absolute_range',
+                     crop_size=(384, 600),
+                     allow_negative_crop=False),  # no empty relations
+                dict(type='Resize',
+                     img_scale=[(480, 1333), (512, 1333), (544, 1333),
+                                (576, 1333), (608, 1333), (640, 1333),
+                                (672, 1333), (704, 1333), (736, 1333),
+                                (768, 1333), (800, 1333)],
+                     multiscale_mode='value',
+                     override=True,
+                     keep_ratio=True)
+            ]
+        ]),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=1),
+    dict(type='RelsFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
+]
+# test_pipeline, NOTE the Pad's size_divisor is different from the default
+# setting (size_divisor=32). While there is little effect on the performance
+# whether we use the default setting or use size_divisor=1.
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='MultiScaleFlipAug',
+         img_scale=(1333, 800),
+         flip=False,
+         transforms=[
+             dict(type='Resize', keep_ratio=True),
+             dict(type='RandomFlip'),
+             dict(type='Normalize', **img_norm_cfg),
+             dict(type='Pad', size_divisor=1),
+             dict(type='ImageToTensor', keys=['img']),
+             dict(type='Collect', keys=['img'])
+         ])
+]
+data = dict(samples_per_gpu=1,
+            workers_per_gpu=1,
+            train=dict(pipeline=train_pipeline),
+            val=dict(pipeline=test_pipeline),
+            test=dict(pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='AdamW',
+                 lr=0.00001,
+                 weight_decay=0.0001,
+                 paramwise_cfg=dict(
+                     custom_keys={
+                         'backbone': dict(lr_mult=0.1, decay_mult=1.0),
+                         'bbox_attention': dict(lr_mult=10.0, decay_mult=1.0),
+                         'mask_head': dict(lr_mult=10.0, decay_mult=1.0)
+                     }))
+optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
+# learning policy
+lr_config = dict(policy='step', step=8)
+runner = dict(type='EpochBasedRunner', max_epochs=10)
+evaluation = dict(interval=1, metric='PQ')
+checkpoint_config = dict(interval=1, max_keep_ckpts=10)
+project_name = 'detr4seg'
+expt_name = 'test_detr4seg_r50_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook'),
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+                # config=work_dir + "/cfg.yaml"
+            ))
+    ],
+)
+load_from = 'detr_pan_r50.pth'

OpenPSG/configs/_base_/models/detr_r50.py ADDED Viewed

	@@ -0,0 +1,64 @@

+model = dict(
+    type='DETR',
+    backbone=dict(type='ResNet',
+                  depth=50,
+                  num_stages=4,
+                  out_indices=(3, ),
+                  frozen_stages=1,
+                  norm_cfg=dict(type='BN', requires_grad=False),
+                  norm_eval=True,
+                  style='pytorch',
+                  init_cfg=dict(type='Pretrained',
+                                checkpoint='torchvision://resnet50')),
+    bbox_head=dict(type='DETRHead',
+                   num_classes=80,
+                   in_channels=2048,
+                   transformer=dict(
+                       type='Transformer',
+                       encoder=dict(type='DetrTransformerEncoder',
+                                    num_layers=6,
+                                    transformerlayers=dict(
+                                        type='BaseTransformerLayer',
+                                        attn_cfgs=[
+                                            dict(type='MultiheadAttention',
+                                                 embed_dims=256,
+                                                 num_heads=8,
+                                                 dropout=0.1)
+                                        ],
+                                        feedforward_channels=2048,
+                                        ffn_dropout=0.1,
+                                        operation_order=('self_attn', 'norm',
+                                                         'ffn', 'norm'))),
+                       decoder=dict(
+                           type='DetrTransformerDecoder',
+                           return_intermediate=True,
+                           num_layers=6,
+                           transformerlayers=dict(
+                               type='DetrTransformerDecoderLayer',
+                               attn_cfgs=dict(type='MultiheadAttention',
+                                              embed_dims=256,
+                                              num_heads=8,
+                                              dropout=0.1),
+                               feedforward_channels=2048,
+                               ffn_dropout=0.1,
+                               operation_order=('self_attn', 'norm',
+                                                'cross_attn', 'norm', 'ffn',
+                                                'norm')),
+                       )),
+                   positional_encoding=dict(type='SinePositionalEncoding',
+                                            num_feats=128,
+                                            normalize=True),
+                   loss_cls=dict(type='CrossEntropyLoss',
+                                 bg_cls_weight=0.1,
+                                 use_sigmoid=False,
+                                 loss_weight=1.0,
+                                 class_weight=1.0),
+                   loss_bbox=dict(type='L1Loss', loss_weight=5.0),
+                   loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
+    # training and testing settings
+    train_cfg=dict(assigner=dict(
+        type='HungarianAssigner',
+        cls_cost=dict(type='ClassificationCost', weight=1.),
+        reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
+        iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
+    test_cfg=dict(max_per_img=100))

OpenPSG/configs/_base_/models/mask_rcnn_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,107 @@

+# model settings
+model = dict(
+    type='MaskRCNN',
+    backbone=dict(type='ResNet',
+                  depth=50,
+                  num_stages=4,
+                  out_indices=(0, 1, 2, 3),
+                  frozen_stages=1,
+                  norm_cfg=dict(type='BN', requires_grad=True),
+                  norm_eval=True,
+                  style='pytorch',
+                  init_cfg=dict(type='Pretrained',
+                                checkpoint='torchvision://resnet50')),
+    neck=dict(type='FPN',
+              in_channels=[256, 512, 1024, 2048],
+              out_channels=256,
+              num_outs=5),
+    rpn_head=dict(type='RPNHead',
+                  in_channels=256,
+                  feat_channels=256,
+                  anchor_generator=dict(type='AnchorGenerator',
+                                        scales=[8],
+                                        ratios=[0.5, 1.0, 2.0],
+                                        strides=[4, 8, 16, 32, 64]),
+                  bbox_coder=dict(type='DeltaXYWHBBoxCoder',
+                                  target_means=[.0, .0, .0, .0],
+                                  target_stds=[1.0, 1.0, 1.0, 1.0]),
+                  loss_cls=dict(type='CrossEntropyLoss',
+                                use_sigmoid=True,
+                                loss_weight=1.0),
+                  loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(type='StandardRoIHead',
+                  bbox_roi_extractor=dict(type='SingleRoIExtractor',
+                                          roi_layer=dict(type='RoIAlign',
+                                                         output_size=7,
+                                                         sampling_ratio=0),
+                                          out_channels=256,
+                                          featmap_strides=[4, 8, 16, 32]),
+                  bbox_head=dict(
+                      type='Shared2FCBBoxHead',
+                      in_channels=256,
+                      fc_out_channels=1024,
+                      roi_feat_size=7,
+                      num_classes=80,
+                      bbox_coder=dict(type='DeltaXYWHBBoxCoder',
+                                      target_means=[0., 0., 0., 0.],
+                                      target_stds=[0.1, 0.1, 0.2, 0.2]),
+                      reg_class_agnostic=False,
+                      loss_cls=dict(type='CrossEntropyLoss',
+                                    use_sigmoid=False,
+                                    loss_weight=1.0),
+                      loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+                  mask_roi_extractor=dict(type='SingleRoIExtractor',
+                                          roi_layer=dict(type='RoIAlign',
+                                                         output_size=14,
+                                                         sampling_ratio=0),
+                                          out_channels=256,
+                                          featmap_strides=[4, 8, 16, 32]),
+                  mask_head=dict(type='FCNMaskHead',
+                                 num_convs=4,
+                                 in_channels=256,
+                                 conv_out_channels=256,
+                                 num_classes=80,
+                                 loss_mask=dict(type='CrossEntropyLoss',
+                                                use_mask=True,
+                                                loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner',
+                                          pos_iou_thr=0.7,
+                                          neg_iou_thr=0.3,
+                                          min_pos_iou=0.3,
+                                          match_low_quality=True,
+                                          ignore_iof_thr=-1),
+                            sampler=dict(type='RandomSampler',
+                                         num=256,
+                                         pos_fraction=0.5,
+                                         neg_pos_ub=-1,
+                                         add_gt_as_proposals=False),
+                            allowed_border=-1,
+                            pos_weight=-1,
+                            debug=False),
+                   rpn_proposal=dict(nms_pre=2000,
+                                     max_per_img=1000,
+                                     nms=dict(type='nms', iou_threshold=0.7),
+                                     min_bbox_size=0),
+                   rcnn=dict(assigner=dict(type='MaxIoUAssigner',
+                                           pos_iou_thr=0.5,
+                                           neg_iou_thr=0.5,
+                                           min_pos_iou=0.5,
+                                           match_low_quality=True,
+                                           ignore_iof_thr=-1),
+                             sampler=dict(type='RandomSampler',
+                                          num=512,
+                                          pos_fraction=0.25,
+                                          neg_pos_ub=-1,
+                                          add_gt_as_proposals=True),
+                             mask_size=28,
+                             pos_weight=-1,
+                             debug=False)),
+    test_cfg=dict(rpn=dict(nms_pre=1000,
+                           max_per_img=1000,
+                           nms=dict(type='nms', iou_threshold=0.7),
+                           min_bbox_size=0),
+                  rcnn=dict(score_thr=0.05,
+                            nms=dict(type='nms', iou_threshold=0.5),
+                            max_per_img=100,
+                            mask_thr_binary=0.5)))

OpenPSG/configs/_base_/models/panoptic_fpn_r101_fpn_psg.py ADDED Viewed

	@@ -0,0 +1,8 @@

+_base_ = './panoptic_fpn_r50_fpn_psg.py'
+model = dict(backbone=dict(
+    depth=101,
+    init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
+expt_name = 'panoptic_fpn_r101_fpn_psg'
+load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'

OpenPSG/configs/_base_/models/panoptic_fpn_r50_fpn_psg.py ADDED Viewed

	@@ -0,0 +1,74 @@

+_base_ = [
+    '../models/mask_rcnn_r50_fpn.py',
+    '../datasets/psg_panoptic.py',
+    '../schedules/schedule_1x.py',
+    '../custom_runtime.py',
+]
+model = dict(
+    type='PanopticFPN',
+    semantic_head=dict(
+        type='PanopticFPNHead',
+        num_things_classes=80,
+        num_stuff_classes=53,
+        in_channels=256,
+        inner_channels=128,
+        start_level=0,
+        end_level=4,
+        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
+        conv_cfg=None,
+        loss_seg=dict(type='CrossEntropyLoss',
+                      ignore_index=255,
+                      loss_weight=0.5),
+    ),
+    panoptic_fusion_head=dict(type='HeuristicFusionHead',
+                              num_things_classes=80,
+                              num_stuff_classes=53),
+    test_cfg=dict(panoptic=dict(
+        score_thr=0.6,
+        max_per_img=100,
+        mask_thr_binary=0.5,
+        mask_overlap=0.5,
+        nms=dict(type='nms', iou_threshold=0.5, class_agnostic=True),
+        stuff_area_limit=4096,
+    )),
+)
+custom_hooks = []
+# Change batch size and learning rate
+data = dict(samples_per_gpu=8,
+            # workers_per_gpu=2
+            )
+# optimizer = dict(lr=0.02)
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(_delete_=True,
+                        grad_clip=dict(max_norm=35, norm_type=2))
+lr_config = dict(policy='step',
+                 warmup='linear',
+                 warmup_iters=500,
+                 warmup_ratio=1.0 / 3,
+                 step=[8, 11])
+project_name = 'openpsg'
+expt_name = 'panoptic_fpn_r50_fpn_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+                # config=work_dir + "/cfg.yaml"
+            ),
+        ),
+    ],
+)
+load_from = 'work_dirs/checkpoints/panoptic_fpn_r50_fpn_1x_coco_20210821_101153-9668fd13.pth'

OpenPSG/configs/_base_/models/psgtr_r101.py ADDED Viewed

	@@ -0,0 +1,5 @@

+_base_ = './psgtr_r50.py'
+model = dict(backbone=dict(
+    depth=101,
+    init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))

OpenPSG/configs/_base_/models/psgtr_r50.py ADDED Viewed

	@@ -0,0 +1,82 @@

+model = dict(
+    type='PSGTr',
+    backbone=dict(type='ResNet',
+                  depth=50,
+                  num_stages=4,
+                  out_indices=(0, 1, 2, 3),
+                  frozen_stages=1,
+                  norm_cfg=dict(type='BN', requires_grad=False),
+                  norm_eval=True,
+                  style='pytorch',
+                  init_cfg=dict(type='Pretrained',
+                                checkpoint='torchvision://resnet50')),
+    bbox_head=dict(type='PSGTrHead',
+                   num_classes=80,
+                   num_relations=117,
+                   in_channels=2048,
+                   transformer=dict(
+                       type='Transformer',
+                       encoder=dict(type='DetrTransformerEncoder',
+                                    num_layers=6,
+                                    transformerlayers=dict(
+                                        type='BaseTransformerLayer',
+                                        attn_cfgs=[
+                                            dict(type='MultiheadAttention',
+                                                 embed_dims=256,
+                                                 num_heads=8,
+                                                 dropout=0.1)
+                                        ],
+                                        feedforward_channels=2048,
+                                        ffn_dropout=0.1,
+                                        operation_order=('self_attn', 'norm',
+                                                         'ffn', 'norm'))),
+                       decoder=dict(
+                           type='DetrTransformerDecoder',
+                           return_intermediate=True,
+                           num_layers=6,
+                           transformerlayers=dict(
+                               type='DetrTransformerDecoderLayer',
+                               attn_cfgs=dict(type='MultiheadAttention',
+                                              embed_dims=256,
+                                              num_heads=8,
+                                              dropout=0.1),
+                               feedforward_channels=2048,
+                               ffn_dropout=0.1,
+                               operation_order=('self_attn', 'norm',
+                                                'cross_attn', 'norm', 'ffn',
+                                                'norm')),
+                       )),
+                   positional_encoding=dict(type='SinePositionalEncoding',
+                                            num_feats=128,
+                                            normalize=True),
+                   sub_loss_cls=dict(type='CrossEntropyLoss',
+                                     use_sigmoid=False,
+                                     loss_weight=1.0,
+                                     class_weight=1.0),
+                   sub_loss_bbox=dict(type='L1Loss', loss_weight=5.0),
+                   sub_loss_iou=dict(type='GIoULoss', loss_weight=2.0),
+                   sub_focal_loss=dict(type='BCEFocalLoss', loss_weight=1.0),
+                   sub_dice_loss=dict(type='psgtrDiceLoss', loss_weight=1.0),
+                   obj_loss_cls=dict(type='CrossEntropyLoss',
+                                     use_sigmoid=False,
+                                     loss_weight=1.0,
+                                     class_weight=1.0),
+                   obj_loss_bbox=dict(type='L1Loss', loss_weight=5.0),
+                   obj_loss_iou=dict(type='GIoULoss', loss_weight=2.0),
+                   obj_focal_loss=dict(type='BCEFocalLoss', loss_weight=1.0),
+                   obj_dice_loss=dict(type='psgtrDiceLoss', loss_weight=1.0),
+                   rel_loss_cls=dict(type='CrossEntropyLoss',
+                                     use_sigmoid=False,
+                                     loss_weight=2.0,
+                                     class_weight=1.0)),
+    # training and testing settings
+    train_cfg=dict(assigner=dict(
+        type='HTriMatcher',
+        s_cls_cost=dict(type='ClassificationCost', weight=1.),
+        s_reg_cost=dict(type='BBoxL1Cost', weight=5.0),
+        s_iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0),
+        o_cls_cost=dict(type='ClassificationCost', weight=1.),
+        o_reg_cost=dict(type='BBoxL1Cost', weight=5.0),
+        o_iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0),
+        r_cls_cost=dict(type='ClassificationCost', weight=2.))),
+    test_cfg=dict(max_per_img=100))

OpenPSG/configs/_base_/schedules/schedule_1x.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step',
+                 warmup='linear',
+                 warmup_iters=500,
+                 warmup_ratio=0.001,
+                 step=[8, 11])
+runner = dict(type='EpochBasedRunner', max_epochs=12)

OpenPSG/configs/_base_/schedules/schedule_3x.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(policy='step',
+                 warmup='linear',
+                 warmup_iters=1000,
+                 warmup_ratio=0.001,
+                 step=[27, 33])
+runner = dict(type='EpochBasedRunner', max_epochs=36)

OpenPSG/configs/gpsnet/panoptic_fpn_r101_fpn_1x_predcls_psg.py ADDED Viewed

	@@ -0,0 +1,26 @@

+_base_ = './panoptic_fpn_r50_fpn_1x_predcls_psg.py'
+model = dict(backbone=dict(
+    depth=101,
+    init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
+# Log config
+project_name = 'openpsg'
+expt_name = 'gpsnet_panoptic_fpn_r101_fpn_1x_predcls_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+            ),
+        ),
+    ],
+)
+load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'

OpenPSG/configs/gpsnet/panoptic_fpn_r101_fpn_1x_sgdet_psg.py ADDED Viewed

	@@ -0,0 +1,26 @@

+_base_ = './panoptic_fpn_r50_fpn_1x_sgdet_psg.py'
+model = dict(backbone=dict(
+    depth=101,
+    init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
+# Log config
+project_name = 'openpsg'
+expt_name = 'gpsnet_panoptic_fpn_r101_fpn_1x_sgdet_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+            ),
+        ),
+    ],
+)
+load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'

OpenPSG/configs/gpsnet/panoptic_fpn_r50_fpn_1x_predcls_psg.py ADDED Viewed

	@@ -0,0 +1,41 @@

+_base_ = [
+    '../motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py',
+]
+model = dict(relation_head=dict(
+    type='GPSHead',
+    head_config=dict(
+        # NOTE: Evaluation type
+        use_gt_box=True,
+        use_gt_label=True,
+    ),
+))
+evaluation = dict(interval=1,
+                  metric='predcls',
+                  relation_mode=True,
+                  classwise=True,
+                  detection_method='pan_seg')
+# Change batch size and learning rate
+data = dict(samples_per_gpu=16, workers_per_gpu=0)
+optimizer = dict(type='SGD', lr=0.03, momentum=0.9, weight_decay=0.0001)
+# Log config
+project_name = 'openpsg'
+expt_name = 'gpsnet_panoptic_fpn_r50_fpn_1x_predcls_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+            ),
+        ),
+    ],
+)

OpenPSG/configs/gpsnet/panoptic_fpn_r50_fpn_1x_sgdet_psg.py ADDED Viewed

	@@ -0,0 +1,45 @@

+_base_ = [
+    '../motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py',
+]
+model = dict(
+    relation_head=dict(
+        type='GPSHead',
+        head_config=dict(
+            # NOTE: Evaluation type
+            use_gt_box=False,
+            use_gt_label=False,
+        ),
+    ),
+    roi_head=dict(bbox_head=dict(type='SceneGraphBBoxHead'), ),
+)
+evaluation = dict(
+    interval=1,
+    metric='sgdet',
+    relation_mode=True,
+    classwise=True,
+    iou_thrs=0.5,
+    detection_method='pan_seg',
+)
+data = dict(samples_per_gpu=16)
+# Log config
+project_name = 'openpsg'
+expt_name = 'gpsnet_panoptic_fpn_r50_fpn_1x_sgdet_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+            ),
+        ),
+    ],
+)

OpenPSG/configs/imp/panoptic_fpn_r101_fpn_1x_predcls_psg.py ADDED Viewed

	@@ -0,0 +1,28 @@

+_base_ = './panoptic_fpn_r50_fpn_1x_predcls_psg.py'
+model = dict(backbone=dict(
+    depth=101,
+    init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
+# Log config
+project_name = 'openpsg'
+expt_name = 'imp_panoptic_fpn_r101_fpn_1x_predcls_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+                # config=work_dir + "/cfg.yaml"
+            ),
+        ),
+    ],
+)
+load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'

OpenPSG/configs/imp/panoptic_fpn_r101_fpn_1x_sgdet_psg.py ADDED Viewed

	@@ -0,0 +1,26 @@

+_base_ = './panoptic_fpn_r50_fpn_1x_sgdet_psg.py'
+model = dict(backbone=dict(
+    depth=101,
+    init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
+# Log config
+project_name = 'openpsg'
+expt_name = 'imp_panoptic_fpn_r101_fpn_1x_sgdet_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+            ),
+        ),
+    ],
+)
+load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'

OpenPSG/configs/imp/panoptic_fpn_r50_fpn_1x_predcls_psg.py ADDED Viewed

	@@ -0,0 +1,44 @@

+_base_ = [
+    '../motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py',
+]
+model = dict(relation_head=dict(
+    type='IMPHead',
+    head_config=dict(
+        # NOTE: Evaluation type
+        use_gt_box=True,
+        use_gt_label=True,
+        num_iter=2,
+    ),
+))
+evaluation = dict(interval=1,
+                  metric='predcls',
+                  relation_mode=True,
+                  classwise=True)
+# Change batch size and learning rate
+data = dict(samples_per_gpu=16, )
+# workers_per_gpu=0)  # FIXME: Is this the problem?
+optimizer = dict(type='SGD', lr=0.001, momentum=0.9)
+# Log config
+project_name = 'openpsg'
+expt_name = 'imp_panoptic_fpn_r50_fpn_1x_predcls_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+                # config=work_dir + "/cfg.yaml"
+            ),
+        ),
+    ],
+)

OpenPSG/configs/imp/panoptic_fpn_r50_fpn_1x_sgdet_psg.py ADDED Viewed

	@@ -0,0 +1,48 @@

+_base_ = [
+    '../motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py',
+]
+model = dict(relation_head=dict(
+    type='IMPHead',
+    head_config=dict(
+        # NOTE: Evaluation type
+        use_gt_box=False,
+        use_gt_label=False,
+        num_iter=2,
+    ),
+))
+evaluation = dict(
+    interval=1,
+    metric='sgdet',
+    relation_mode=True,
+    classwise=True,
+    iou_thrs=0.5,
+    detection_method='pan_seg',
+)
+# Change batch size and learning rate
+data = dict(samples_per_gpu=16, )
+# workers_per_gpu=0)  # FIXME: Is this the problem?
+optimizer = dict(type='SGD', lr=0.001, momentum=0.9)
+# Log config
+project_name = 'openpsg'
+expt_name = 'imp_panoptic_fpn_r50_fpn_1x_sgdet_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+                # config=work_dir + "/cfg.yaml"
+            ),
+        ),
+    ],
+)

OpenPSG/configs/motifs/panoptic_fpn_r101_fpn_1x_predcls_psg.py ADDED Viewed

	@@ -0,0 +1,28 @@

+_base_ = './panoptic_fpn_r50_fpn_1x_predcls_psg.py'
+model = dict(backbone=dict(
+    depth=101,
+    init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
+# Log config
+project_name = 'openpsg'
+expt_name = 'motifs_panoptic_fpn_r101_fpn_1x_predcls_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+                # config=work_dir + "/cfg.yaml"
+            ),
+        ),
+    ],
+)
+load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'

OpenPSG/configs/motifs/panoptic_fpn_r101_fpn_1x_sgdet_psg.py ADDED Viewed

	@@ -0,0 +1,28 @@

+_base_ = './panoptic_fpn_r50_fpn_1x_sgdet_psg.py'
+model = dict(backbone=dict(
+    depth=101,
+    init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
+# Log config
+project_name = 'openpsg'
+expt_name = 'motifs_panoptic_fpn_r101_fpn_1x_sgdet_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+                # config=work_dir + "/cfg.yaml"
+            ),
+        ),
+    ],
+)
+load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'

OpenPSG/configs/motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py ADDED Viewed

	@@ -0,0 +1,241 @@

+_base_ = [
+    '../_base_/models/mask_rcnn_r50_fpn.py',
+    '../_base_/datasets/psg.py',
+    '../_base_/schedules/schedule_1x.py',
+    '../_base_/custom_runtime.py',
+]
+find_unused_parameters = True
+dataset_type = 'PanopticSceneGraphDataset'
+# HACK:
+object_classes = [
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
+    'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
+    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+    'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
+    'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
+    'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+    'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
+    'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+    'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+    'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
+    'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
+    'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
+    'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea',
+    'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone',
+    'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other',
+    'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
+    'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
+    'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged',
+    'food-other-merged', 'building-other-merged', 'rock-merged',
+    'wall-other-merged', 'rug-merged'
+]
+predicate_classes = [
+    'over',
+    'in front of',
+    'beside',
+    'on',
+    'in',
+    'attached to',
+    'hanging from',
+    'on back of',
+    'falling off',
+    'going down',
+    'painted on',
+    'walking on',
+    'running on',
+    'crossing',
+    'standing on',
+    'lying on',
+    'sitting on',
+    'flying over',
+    'jumping over',
+    'jumping from',
+    'wearing',
+    'holding',
+    'carrying',
+    'looking at',
+    'guiding',
+    'kissing',
+    'eating',
+    'drinking',
+    'feeding',
+    'biting',
+    'catching',
+    'picking',
+    'playing with',
+    'chasing',
+    'climbing',
+    'cleaning',
+    'playing',
+    'touching',
+    'pushing',
+    'pulling',
+    'opening',
+    'cooking',
+    'talking to',
+    'throwing',
+    'slicing',
+    'driving',
+    'riding',
+    'parked on',
+    'driving on',
+    'about to hit',
+    'kicking',
+    'swinging',
+    'entering',
+    'exiting',
+    'enclosing',
+    'leaning on',
+]
+model = dict(
+    type='SceneGraphPanopticFPN',
+    semantic_head=dict(
+        type='PanopticFPNHead',
+        num_things_classes=80,
+        num_stuff_classes=53,
+        in_channels=256,
+        inner_channels=128,
+        start_level=0,
+        end_level=4,
+        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
+        conv_cfg=None,
+        loss_seg=dict(type='CrossEntropyLoss',
+                      ignore_index=255,
+                      loss_weight=0.5),
+    ),
+    panoptic_fusion_head=dict(type='HeuristicFusionHead',
+                              num_things_classes=80,
+                              num_stuff_classes=53),
+    test_cfg=dict(panoptic=dict(
+        score_thr=0.6,
+        max_per_img=100,
+        mask_thr_binary=0.5,
+        mask_overlap=0.5,
+        nms=dict(type='nms', iou_threshold=0.5, class_agnostic=True),
+        stuff_area_limit=4096,
+    )),
+    relation_head=dict(
+        type='MotifHead',
+        object_classes=object_classes,
+        predicate_classes=predicate_classes,
+        num_classes=len(object_classes) + 1,  # with background class
+        num_predicates=len(predicate_classes) + 1,
+        use_bias=False,  # NOTE: whether to use frequency bias
+        head_config=dict(
+            # NOTE: Evaluation type
+            use_gt_box=True,
+            use_gt_label=True,
+            use_vision=True,
+            embed_dim=200,
+            hidden_dim=512,
+            roi_dim=1024,
+            context_pooling_dim=4096,
+            dropout_rate=0.2,
+            context_object_layer=1,
+            context_edge_layer=1,
+            glove_dir='data/glove/',
+            causal_effect_analysis=False,
+        ),
+        bbox_roi_extractor=dict(
+            type='VisualSpatialExtractor',
+            bbox_roi_layer=dict(type='RoIAlign',
+                                output_size=7,
+                                sampling_ratio=2),
+            with_visual_bbox=True,
+            with_visual_mask=False,
+            with_visual_point=False,
+            with_spatial=False,
+            in_channels=256,
+            fc_out_channels=1024,
+            featmap_strides=[4, 8, 16, 32],
+        ),
+        relation_roi_extractor=dict(
+            type='VisualSpatialExtractor',
+            bbox_roi_layer=dict(type='RoIAlign',
+                                output_size=7,
+                                sampling_ratio=2),
+            with_visual_bbox=True,
+            with_visual_mask=False,
+            with_visual_point=False,
+            with_spatial=True,
+            separate_spatial=False,
+            in_channels=256,
+            fc_out_channels=1024,
+            featmap_strides=[4, 8, 16, 32],
+        ),
+        relation_sampler=dict(
+            type='Motif',
+            pos_iou_thr=0.5,
+            require_overlap=False,  # for sgdet training, not require
+            num_sample_per_gt_rel=4,
+            num_rel_per_image=1024,
+            pos_fraction=0.25,
+            # NOTE: To only include overlapping bboxes?
+            test_overlap=False,  # for testing
+        ),
+        loss_object=dict(type='CrossEntropyLoss',
+                         use_sigmoid=False,
+                         loss_weight=1.0),
+        loss_relation=dict(type='CrossEntropyLoss',
+                           use_sigmoid=False,
+                           loss_weight=1.0),
+    ),
+)
+custom_hooks = []
+# To freeze modules
+freeze_modules = [
+    'backbone',
+    'neck',
+    'rpn_head',
+    'roi_head',
+    'semantic_head',
+    'panoptic_fusion_head',
+]
+evaluation = dict(interval=1,
+                  metric='predcls',
+                  relation_mode=True,
+                  classwise=True)
+# Change batch size and learning rate
+data = dict(samples_per_gpu=16, )
+# optimizer = dict(lr=0.003)
+optimizer = dict(type='SGD', lr=0.03, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(_delete_=True,
+                        grad_clip=dict(max_norm=35, norm_type=2))
+lr_config = dict(policy='step',
+                 warmup='linear',
+                 warmup_iters=500,
+                 warmup_ratio=1.0 / 3,
+                 step=[7, 10])
+# Log config
+project_name = 'openpsg'
+expt_name = 'motifs_panoptic_fpn_r50_fpn_1x_predcls_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+            ),
+        ),
+    ],
+)
+load_from = 'work_dirs/checkpoints/panoptic_fpn_r50_fpn_1x_coco_20210821_101153-9668fd13.pth'

OpenPSG/configs/motifs/panoptic_fpn_r50_fpn_1x_sgdet_psg.py ADDED Viewed

	@@ -0,0 +1,44 @@

+_base_ = [
+    './panoptic_fpn_r50_fpn_1x_predcls_psg.py',
+]
+model = dict(
+    relation_head=dict(
+        head_config=dict(
+            # NOTE: Evaluation type
+            use_gt_box=False,
+            use_gt_label=False,
+        ), ),
+    roi_head=dict(bbox_head=dict(type='SceneGraphBBoxHead'), ),
+)
+evaluation = dict(interval=1,
+                  metric='sgdet',
+                  relation_mode=True,
+                  classwise=True,
+                  iou_thrs=0.5,
+                  detection_method='pan_seg')
+# Change batch size and learning rate
+data = dict(samples_per_gpu=8,
+            # workers_per_gpu=2
+            )
+# Log config
+project_name = 'openpsg'
+expt_name = 'motifs_panoptic_fpn_r50_fpn_1x_sgdet_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+            ),
+        ),
+    ],
+)

OpenPSG/configs/psgformer/psgformer_r101_psg.py ADDED Viewed

	@@ -0,0 +1,16 @@

+_base_ = './psgformer_r50_psg.py'
+model = dict(backbone=dict(
+    depth=101,
+    init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
+# learning policy
+lr_config = dict(policy='step', step=48)
+runner = dict(type='EpochBasedRunner', max_epochs=60)
+project_name = 'psgformer'
+expt_name = 'psgformer_r101_psg'
+work_dir = f'./work_dirs/{expt_name}'
+checkpoint_config = dict(interval=12, max_keep_ckpts=10)
+load_from = './work_dirs/checkpoints/detr4psgformer_r101.pth'

OpenPSG/configs/psgformer/psgformer_r50.py ADDED Viewed

	@@ -0,0 +1,96 @@

+model = dict(
+    type='PSGTr',
+    backbone=dict(type='ResNet',
+                  depth=50,
+                  num_stages=4,
+                  out_indices=(0, 1, 2, 3),
+                  frozen_stages=1,
+                  norm_cfg=dict(type='BN', requires_grad=False),
+                  norm_eval=True,
+                  style='pytorch',
+                  init_cfg=dict(type='Pretrained',
+                                checkpoint='torchvision://resnet50')),
+    bbox_head=dict(
+        type='PSGFormerHead',
+        num_classes=80,
+        num_relations=117,
+        in_channels=2048,
+        transformer=dict(
+            type='DualTransformer',
+            encoder=dict(type='DetrTransformerEncoder',
+                         num_layers=6,
+                         transformerlayers=dict(
+                             type='BaseTransformerLayer',
+                             attn_cfgs=[
+                                 dict(type='MultiheadAttention',
+                                      embed_dims=256,
+                                      num_heads=8,
+                                      dropout=0.1)
+                             ],
+                             feedforward_channels=2048,
+                             ffn_dropout=0.1,
+                             operation_order=('self_attn', 'norm', 'ffn',
+                                              'norm'))),
+            decoder1=dict(type='DetrTransformerDecoder',
+                          return_intermediate=True,
+                          num_layers=6,
+                          transformerlayers=dict(
+                              type='DetrTransformerDecoderLayer',
+                              attn_cfgs=dict(type='MultiheadAttention',
+                                             embed_dims=256,
+                                             num_heads=8,
+                                             dropout=0.1),
+                              feedforward_channels=2048,
+                              ffn_dropout=0.1,
+                              operation_order=('self_attn', 'norm',
+                                               'cross_attn', 'norm', 'ffn',
+                                               'norm'))),
+            decoder2=dict(type='DetrTransformerDecoder',
+                          return_intermediate=True,
+                          num_layers=6,
+                          transformerlayers=dict(
+                              type='DetrTransformerDecoderLayer',
+                              attn_cfgs=dict(type='MultiheadAttention',
+                                             embed_dims=256,
+                                             num_heads=8,
+                                             dropout=0.1),
+                              feedforward_channels=2048,
+                              ffn_dropout=0.1,
+                              operation_order=('self_attn', 'norm',
+                                               'cross_attn', 'norm', 'ffn',
+                                               'norm'))),
+        ),
+        positional_encoding=dict(type='SinePositionalEncoding',
+                                 num_feats=128,
+                                 normalize=True),
+        rel_loss_cls=dict(type='CrossEntropyLoss',
+                          use_sigmoid=False,
+                          loss_weight=2.0,
+                          class_weight=1.0),
+        sub_id_loss=dict(type='MultilabelCrossEntropy', loss_weight=2.0),
+        obj_id_loss=dict(type='MultilabelCrossEntropy', loss_weight=2.0),
+        loss_cls=dict(type='CrossEntropyLoss',
+                      use_sigmoid=False,
+                      loss_weight=4.0,
+                      class_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=3.0),
+        loss_iou=dict(type='GIoULoss', loss_weight=2.0),
+        focal_loss=dict(type='BCEFocalLoss', loss_weight=1.0),
+        dice_loss=dict(type='psgtrDiceLoss', loss_weight=1.0)),
+    # training and testing settings
+    train_cfg=dict(id_assigner=dict(type='IdMatcher',
+                                    sub_id_cost=dict(type='ClassificationCost',
+                                                     weight=1.),
+                                    obj_id_cost=dict(type='ClassificationCost',
+                                                     weight=1.),
+                                    r_cls_cost=dict(type='ClassificationCost',
+                                                    weight=1.)),
+                   bbox_assigner=dict(type='HungarianAssigner',
+                                      cls_cost=dict(type='ClassificationCost',
+                                                    weight=4.0),
+                                      reg_cost=dict(type='BBoxL1Cost',
+                                                    weight=3.0),
+                                      iou_cost=dict(type='IoUCost',
+                                                    iou_mode='giou',
+                                                    weight=2.0))),
+    test_cfg=dict(max_per_img=100))

OpenPSG/configs/psgformer/psgformer_r50_psg.py ADDED Viewed

	@@ -0,0 +1,244 @@

+_base_ = [
+    './psgformer_r50.py', '../_base_/datasets/psg.py',
+    '../_base_/custom_runtime.py'
+]
+find_unused_parameters = True
+custom_imports = dict(imports=[
+    'openpsg.models.frameworks.psgtr', 'openpsg.models.losses.seg_losses',
+    'openpsg.models.frameworks.dual_transformer',
+    'openpsg.models.relation_heads.psgformer_head', 'openpsg.datasets',
+    'openpsg.datasets.pipelines.loading',
+    'openpsg.datasets.pipelines.rel_randomcrop',
+    'openpsg.models.relation_heads.approaches.matcher', 'openpsg.utils'
+],
+                      allow_failed_imports=False)
+dataset_type = 'PanopticSceneGraphDataset'
+# HACK:
+object_classes = [
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
+    'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
+    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+    'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
+    'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
+    'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+    'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
+    'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+    'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+    'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
+    'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
+    'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
+    'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea',
+    'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone',
+    'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other',
+    'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
+    'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
+    'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged',
+    'food-other-merged', 'building-other-merged', 'rock-merged',
+    'wall-other-merged', 'rug-merged'
+]
+predicate_classes = [
+    'over',
+    'in front of',
+    'beside',
+    'on',
+    'in',
+    'attached to',
+    'hanging from',
+    'on back of',
+    'falling off',
+    'going down',
+    'painted on',
+    'walking on',
+    'running on',
+    'crossing',
+    'standing on',
+    'lying on',
+    'sitting on',
+    'flying over',
+    'jumping over',
+    'jumping from',
+    'wearing',
+    'holding',
+    'carrying',
+    'looking at',
+    'guiding',
+    'kissing',
+    'eating',
+    'drinking',
+    'feeding',
+    'biting',
+    'catching',
+    'picking',
+    'playing with',
+    'chasing',
+    'climbing',
+    'cleaning',
+    'playing',
+    'touching',
+    'pushing',
+    'pulling',
+    'opening',
+    'cooking',
+    'talking to',
+    'throwing',
+    'slicing',
+    'driving',
+    'riding',
+    'parked on',
+    'driving on',
+    'about to hit',
+    'kicking',
+    'swinging',
+    'entering',
+    'exiting',
+    'enclosing',
+    'leaning on',
+]
+model = dict(bbox_head=dict(
+    num_classes=len(object_classes),
+    num_relations=len(predicate_classes),
+    object_classes=object_classes,
+    predicate_classes=predicate_classes,
+    num_obj_query=100,
+    num_rel_query=100,
+), )
+img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
+                    std=[58.395, 57.12, 57.375],
+                    to_rgb=True)
+# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
+# from the default setting in mmdet.
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadPanopticSceneGraphAnnotations',
+         with_bbox=True,
+         with_rel=True,
+         with_mask=True,
+         with_seg=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(
+        type='AutoAugment',
+        policies=[
+            [
+                dict(type='Resize',
+                     img_scale=[(480, 1333), (512, 1333), (544, 1333),
+                                (576, 1333), (608, 1333), (640, 1333),
+                                (672, 1333), (704, 1333), (736, 1333),
+                                (768, 1333), (800, 1333)],
+                     multiscale_mode='value',
+                     keep_ratio=True)
+            ],
+            [
+                dict(type='Resize',
+                     img_scale=[(400, 1333), (500, 1333), (600, 1333)],
+                     multiscale_mode='value',
+                     keep_ratio=True),
+                dict(type='RelRandomCrop',
+                     crop_type='absolute_range',
+                     crop_size=(384, 600),
+                     allow_negative_crop=False),  # no empty relations
+                dict(type='Resize',
+                     img_scale=[(480, 1333), (512, 1333), (544, 1333),
+                                (576, 1333), (608, 1333), (640, 1333),
+                                (672, 1333), (704, 1333), (736, 1333),
+                                (768, 1333), (800, 1333)],
+                     multiscale_mode='value',
+                     override=True,
+                     keep_ratio=True)
+            ]
+        ]),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=1),
+    dict(type='RelsFormatBundle'),
+    dict(type='Collect',
+         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_rels', 'gt_masks'])
+]
+# test_pipeline, NOTE the Pad's size_divisor is different from the default
+# setting (size_divisor=32). While there is little effect on the performance
+# whether we use the default setting or use size_divisor=1.
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadSceneGraphAnnotations', with_bbox=True, with_rel=True),
+    dict(type='MultiScaleFlipAug',
+         img_scale=(1333, 800),
+         flip=False,
+         transforms=[
+             dict(type='Resize', keep_ratio=True),
+             dict(type='RandomFlip'),
+             dict(type='Normalize', **img_norm_cfg),
+             dict(type='Pad', size_divisor=1),
+             dict(type='ImageToTensor', keys=['img']),
+             dict(type='ToTensor', keys=['gt_bboxes', 'gt_labels']),
+             dict(type='ToDataContainer',
+                  fields=(dict(key='gt_bboxes'), dict(key='gt_labels'))),
+             dict(type='Collect', keys=['img']),
+         ])
+]
+evaluation = dict(
+    interval=1,
+    metric='sgdet',
+    relation_mode=True,
+    classwise=True,
+    iou_thrs=0.5,
+    detection_method='pan_seg',
+)
+data = dict(samples_per_gpu=1,
+            workers_per_gpu=2,
+            train=dict(pipeline=train_pipeline),
+            val=dict(pipeline=test_pipeline),
+            test=dict(pipeline=test_pipeline))
+# optimizer
+optimizer = dict(
+    type='AdamW',
+    lr=0.001,
+    weight_decay=0.0001,
+    paramwise_cfg=dict(
+        custom_keys={
+            'backbone': dict(lr_mult=0.1, decay_mult=1.0),
+            'transformer.encoder': dict(lr_mult=0.1, decay_mult=1.0),
+            'transformer.decoder1': dict(lr_mult=0.1, decay_mult=1.0),
+            'obj_query_embed': dict(lr_mult=0.1, decay_mult=1.0),
+            'input_proj': dict(lr_mult=0.1, decay_mult=1.0),
+            'class_embed': dict(lr_mult=0.1, decay_mult=1.0),
+            'box_embed': dict(lr_mult=0.1, decay_mult=1.0),
+            'bbox_attention': dict(lr_mult=0.1, decay_mult=1.0),
+            'mask_head': dict(lr_mult=0.1, decay_mult=1.0),
+        }))
+optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
+# learning policy
+lr_config = dict(policy='step', step=40)
+runner = dict(type='EpochBasedRunner', max_epochs=60)
+project_name = 'psgformer'
+expt_name = 'psgformer_r50_psg'
+work_dir = f'./work_dirs/{expt_name}'
+checkpoint_config = dict(interval=1, max_keep_ckpts=15)
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+            ),
+        )
+    ],
+)
+load_from = './work_dirs/checkpoints/detr4psgformer_r50.pth'

OpenPSG/configs/psgformer/psgformer_r50_psg_inference.py ADDED Viewed

	@@ -0,0 +1,31 @@

+_base_ = [
+    './psgformer_r50_psg.py'
+]
+img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
+                    std=[58.395, 57.12, 57.375],
+                    to_rgb=True)
+pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            # NOTE: Do not change the img to DC.
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ],
+    ),
+]
+data = dict(
+    test=dict(
+        pipeline=pipeline,
+    ),
+)

OpenPSG/configs/psgtr/psgtr_r101_psg.py ADDED Viewed

	@@ -0,0 +1,231 @@

+_base_ = [
+    '../_base_/models/psgtr_r101.py', '../_base_/datasets/psg.py',
+    '../_base_/custom_runtime.py'
+]
+custom_imports = dict(imports=[
+    'openpsg.models.frameworks.psgtr', 'openpsg.models.losses.seg_losses',
+    'openpsg.models.relation_heads.psgtr_head', 'openpsg.datasets',
+    'openpsg.datasets.pipelines.loading',
+    'openpsg.datasets.pipelines.rel_randomcrop',
+    'openpsg.models.relation_heads.approaches.matcher', 'openpsg.utils'
+],
+                      allow_failed_imports=False)
+dataset_type = 'PanopticSceneGraphDataset'
+# HACK:
+object_classes = [
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
+    'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
+    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+    'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
+    'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
+    'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+    'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
+    'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+    'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+    'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
+    'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
+    'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
+    'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea',
+    'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone',
+    'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other',
+    'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
+    'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
+    'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged',
+    'food-other-merged', 'building-other-merged', 'rock-merged',
+    'wall-other-merged', 'rug-merged'
+]
+predicate_classes = [
+    'over',
+    'in front of',
+    'beside',
+    'on',
+    'in',
+    'attached to',
+    'hanging from',
+    'on back of',
+    'falling off',
+    'going down',
+    'painted on',
+    'walking on',
+    'running on',
+    'crossing',
+    'standing on',
+    'lying on',
+    'sitting on',
+    'flying over',
+    'jumping over',
+    'jumping from',
+    'wearing',
+    'holding',
+    'carrying',
+    'looking at',
+    'guiding',
+    'kissing',
+    'eating',
+    'drinking',
+    'feeding',
+    'biting',
+    'catching',
+    'picking',
+    'playing with',
+    'chasing',
+    'climbing',
+    'cleaning',
+    'playing',
+    'touching',
+    'pushing',
+    'pulling',
+    'opening',
+    'cooking',
+    'talking to',
+    'throwing',
+    'slicing',
+    'driving',
+    'riding',
+    'parked on',
+    'driving on',
+    'about to hit',
+    'kicking',
+    'swinging',
+    'entering',
+    'exiting',
+    'enclosing',
+    'leaning on',
+]
+model = dict(bbox_head=dict(
+    num_classes=len(object_classes),
+    num_relations=len(predicate_classes),
+    object_classes=object_classes,
+    predicate_classes=predicate_classes,
+    use_mask=True,
+    num_query=100,
+), )
+img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
+                    std=[58.395, 57.12, 57.375],
+                    to_rgb=True)
+# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
+# from the default setting in mmdet.
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadPanopticSceneGraphAnnotations',
+         with_bbox=True,
+         with_rel=True,
+         with_mask=True,
+         with_seg=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(
+        type='AutoAugment',
+        policies=[
+            [
+                dict(type='Resize',
+                     img_scale=[(480, 1333), (512, 1333), (544, 1333),
+                                (576, 1333), (608, 1333), (640, 1333),
+                                (672, 1333), (704, 1333), (736, 1333),
+                                (768, 1333), (800, 1333)],
+                     multiscale_mode='value',
+                     keep_ratio=True)
+            ],
+            [
+                dict(type='Resize',
+                     img_scale=[(400, 1333), (500, 1333), (600, 1333)],
+                     multiscale_mode='value',
+                     keep_ratio=True),
+                dict(type='RelRandomCrop',
+                     crop_type='absolute_range',
+                     crop_size=(384, 600),
+                     allow_negative_crop=False),  # no empty relations
+                dict(type='Resize',
+                     img_scale=[(480, 1333), (512, 1333), (544, 1333),
+                                (576, 1333), (608, 1333), (640, 1333),
+                                (672, 1333), (704, 1333), (736, 1333),
+                                (768, 1333), (800, 1333)],
+                     multiscale_mode='value',
+                     override=True,
+                     keep_ratio=True)
+            ]
+        ]),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=1),
+    dict(type='RelsFormatBundle'),
+    dict(type='Collect',
+         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_rels', 'gt_masks'])
+]
+# test_pipeline, NOTE the Pad's size_divisor is different from the default
+# setting (size_divisor=32). While there is little effect on the performance
+# whether we use the default setting or use size_divisor=1.
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    # dict(type='LoadSceneGraphAnnotations', with_bbox=True, with_rel=True),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=1),
+            dict(type='ImageToTensor', keys=['img']),
+            # dict(type='ToTensor', keys=['gt_bboxes', 'gt_labels']),
+            # dict(type='ToDataContainer', fields=(dict(key='gt_bboxes'), dict(key='gt_labels'))),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+evaluation = dict(
+    interval=1,
+    metric='sgdet',
+    relation_mode=True,
+    classwise=True,
+    iou_thrs=0.5,
+    detection_method='pan_seg',
+)
+data = dict(samples_per_gpu=1,
+            workers_per_gpu=2,
+            train=dict(pipeline=train_pipeline),
+            val=dict(pipeline=test_pipeline),
+            test=dict(pipeline=test_pipeline))
+# optimizer
+optimizer = dict(
+    type='AdamW',
+    lr=0.0001,
+    weight_decay=0.0001,
+    paramwise_cfg=dict(custom_keys={
+        'backbone': dict(lr_mult=0.1, decay_mult=1.0),
+    }))
+optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
+# learning policy
+lr_config = dict(policy='step', step=40)
+runner = dict(type='EpochBasedRunner', max_epochs=60)
+project_name = 'psgtr'
+expt_name = 'psgtr_r101_psg'
+work_dir = f'./work_dirs/{expt_name}'
+checkpoint_config = dict(interval=2, max_keep_ckpts=10)
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+            ),
+        )
+    ],
+)
+load_from = 'work_dirs/checkpoints/detr_pan_r101.pth'

OpenPSG/configs/psgtr/psgtr_r50.py ADDED Viewed

	@@ -0,0 +1,82 @@

+model = dict(
+    type='PSGTr',
+    backbone=dict(type='ResNet',
+                  depth=50,
+                  num_stages=4,
+                  out_indices=(0, 1, 2, 3),
+                  frozen_stages=1,
+                  norm_cfg=dict(type='BN', requires_grad=False),
+                  norm_eval=True,
+                  style='pytorch',
+                  init_cfg=dict(type='Pretrained',
+                                checkpoint='torchvision://resnet50')),
+    bbox_head=dict(type='PSGTrHead',
+                   num_classes=80,
+                   num_relations=117,
+                   in_channels=2048,
+                   transformer=dict(
+                       type='Transformer',
+                       encoder=dict(type='DetrTransformerEncoder',
+                                    num_layers=6,
+                                    transformerlayers=dict(
+                                        type='BaseTransformerLayer',
+                                        attn_cfgs=[
+                                            dict(type='MultiheadAttention',
+                                                 embed_dims=256,
+                                                 num_heads=8,
+                                                 dropout=0.1)
+                                        ],
+                                        feedforward_channels=2048,
+                                        ffn_dropout=0.1,
+                                        operation_order=('self_attn', 'norm',
+                                                         'ffn', 'norm'))),
+                       decoder=dict(
+                           type='DetrTransformerDecoder',
+                           return_intermediate=True,
+                           num_layers=6,
+                           transformerlayers=dict(
+                               type='DetrTransformerDecoderLayer',
+                               attn_cfgs=dict(type='MultiheadAttention',
+                                              embed_dims=256,
+                                              num_heads=8,
+                                              dropout=0.1),
+                               feedforward_channels=2048,
+                               ffn_dropout=0.1,
+                               operation_order=('self_attn', 'norm',
+                                                'cross_attn', 'norm', 'ffn',
+                                                'norm')),
+                       )),
+                   positional_encoding=dict(type='SinePositionalEncoding',
+                                            num_feats=128,
+                                            normalize=True),
+                   sub_loss_cls=dict(type='CrossEntropyLoss',
+                                     use_sigmoid=False,
+                                     loss_weight=1.0,
+                                     class_weight=1.0),
+                   sub_loss_bbox=dict(type='L1Loss', loss_weight=5.0),
+                   sub_loss_iou=dict(type='GIoULoss', loss_weight=2.0),
+                   sub_focal_loss=dict(type='BCEFocalLoss', loss_weight=2.0),
+                   sub_dice_loss=dict(type='psgtrDiceLoss', loss_weight=2.0),
+                   obj_loss_cls=dict(type='CrossEntropyLoss',
+                                     use_sigmoid=False,
+                                     loss_weight=1.0,
+                                     class_weight=1.0),
+                   obj_loss_bbox=dict(type='L1Loss', loss_weight=5.0),
+                   obj_loss_iou=dict(type='GIoULoss', loss_weight=2.0),
+                   obj_focal_loss=dict(type='BCEFocalLoss', loss_weight=2.0),
+                   obj_dice_loss=dict(type='psgtrDiceLoss', loss_weight=2.0),
+                   rel_loss_cls=dict(type='CrossEntropyLoss',
+                                     use_sigmoid=False,
+                                     loss_weight=2.0,
+                                     class_weight=1.0)),
+    # training and testing settings
+    train_cfg=dict(assigner=dict(
+        type='HTriMatcher',
+        s_cls_cost=dict(type='ClassificationCost', weight=1.),
+        s_reg_cost=dict(type='BBoxL1Cost', weight=5.0),
+        s_iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0),
+        o_cls_cost=dict(type='ClassificationCost', weight=1.),
+        o_reg_cost=dict(type='BBoxL1Cost', weight=5.0),
+        o_iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0),
+        r_cls_cost=dict(type='ClassificationCost', weight=2.))),
+    test_cfg=dict(max_per_img=100))

OpenPSG/configs/psgtr/psgtr_r50_psg.py ADDED Viewed

	@@ -0,0 +1,233 @@

+_base_ = [
+    '../_base_/models/psgtr_r50.py', '../_base_/datasets/psg.py',
+    '../_base_/custom_runtime.py'
+]
+custom_imports = dict(imports=[
+    'openpsg.models.frameworks.psgtr', 'openpsg.models.losses.seg_losses',
+    'openpsg.models.relation_heads.psgtr_head', 'openpsg.datasets',
+    'openpsg.datasets.pipelines.loading',
+    'openpsg.datasets.pipelines.rel_randomcrop',
+    'openpsg.models.relation_heads.approaches.matcher', 'openpsg.utils'
+],
+                      allow_failed_imports=False)
+dataset_type = 'PanopticSceneGraphDataset'
+# HACK:
+object_classes = [
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
+    'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
+    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
+    'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
+    'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
+    'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+    'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
+    'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+    'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+    'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
+    'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
+    'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
+    'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea',
+    'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone',
+    'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other',
+    'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
+    'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
+    'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged',
+    'food-other-merged', 'building-other-merged', 'rock-merged',
+    'wall-other-merged', 'rug-merged'
+]
+predicate_classes = [
+    'over',
+    'in front of',
+    'beside',
+    'on',
+    'in',
+    'attached to',
+    'hanging from',
+    'on back of',
+    'falling off',
+    'going down',
+    'painted on',
+    'walking on',
+    'running on',
+    'crossing',
+    'standing on',
+    'lying on',
+    'sitting on',
+    'flying over',
+    'jumping over',
+    'jumping from',
+    'wearing',
+    'holding',
+    'carrying',
+    'looking at',
+    'guiding',
+    'kissing',
+    'eating',
+    'drinking',
+    'feeding',
+    'biting',
+    'catching',
+    'picking',
+    'playing with',
+    'chasing',
+    'climbing',
+    'cleaning',
+    'playing',
+    'touching',
+    'pushing',
+    'pulling',
+    'opening',
+    'cooking',
+    'talking to',
+    'throwing',
+    'slicing',
+    'driving',
+    'riding',
+    'parked on',
+    'driving on',
+    'about to hit',
+    'kicking',
+    'swinging',
+    'entering',
+    'exiting',
+    'enclosing',
+    'leaning on',
+]
+model = dict(bbox_head=dict(
+    num_classes=len(object_classes),
+    num_relations=len(predicate_classes),
+    object_classes=object_classes,
+    predicate_classes=predicate_classes,
+    use_mask=True,
+    num_query=100,
+), )
+img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
+                    std=[58.395, 57.12, 57.375],
+                    to_rgb=True)
+# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
+# from the default setting in mmdet.
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadPanopticSceneGraphAnnotations',
+         with_bbox=True,
+         with_rel=True,
+         with_mask=True,
+         with_seg=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(
+        type='AutoAugment',
+        policies=[
+            [
+                dict(type='Resize',
+                     img_scale=[(480, 1333), (512, 1333), (544, 1333),
+                                (576, 1333), (608, 1333), (640, 1333),
+                                (672, 1333), (704, 1333), (736, 1333),
+                                (768, 1333), (800, 1333)],
+                     multiscale_mode='value',
+                     keep_ratio=True)
+            ],
+            [
+                dict(type='Resize',
+                     img_scale=[(400, 1333), (500, 1333), (600, 1333)],
+                     multiscale_mode='value',
+                     keep_ratio=True),
+                dict(type='RelRandomCrop',
+                     crop_type='absolute_range',
+                     crop_size=(384, 600),
+                     allow_negative_crop=False),  # no empty relations
+                dict(type='Resize',
+                     img_scale=[(480, 1333), (512, 1333), (544, 1333),
+                                (576, 1333), (608, 1333), (640, 1333),
+                                (672, 1333), (704, 1333), (736, 1333),
+                                (768, 1333), (800, 1333)],
+                     multiscale_mode='value',
+                     override=True,
+                     keep_ratio=True)
+            ]
+        ]),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=1),
+    dict(type='RelsFormatBundle'),
+    dict(type='Collect',
+         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_rels', 'gt_masks'])
+]
+# test_pipeline, NOTE the Pad's size_divisor is different from the default
+# setting (size_divisor=32). While there is little effect on the performance
+# whether we use the default setting or use size_divisor=1.
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    # dict(type='LoadSceneGraphAnnotations', with_bbox=True, with_rel=True),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=1),
+            dict(type='ImageToTensor', keys=['img']),
+            # dict(type='ToTensor', keys=['gt_bboxes', 'gt_labels']),
+            # dict(type='ToDataContainer', fields=(dict(key='gt_bboxes'), dict(key='gt_labels'))),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+evaluation = dict(
+    interval=1,
+    metric='sgdet',
+    relation_mode=True,
+    classwise=True,
+    iou_thrs=0.5,
+    detection_method='pan_seg',
+)
+data = dict(samples_per_gpu=1,
+            workers_per_gpu=2,
+            train=dict(pipeline=train_pipeline),
+            val=dict(pipeline=test_pipeline),
+            test=dict(pipeline=test_pipeline))
+# optimizer
+optimizer = dict(
+    type='AdamW',
+    lr=0.0001,
+    weight_decay=0.0001,
+    paramwise_cfg=dict(custom_keys={
+        'backbone': dict(lr_mult=0.1, decay_mult=1.0),
+    }))
+optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
+# learning policy
+lr_config = dict(policy='step', step=40)
+runner = dict(type='EpochBasedRunner', max_epochs=60)
+project_name = 'psgformer'
+expt_name = 'psgtr_r50_psg_0.5_scale_mask'
+work_dir = f'./work_dirs/{expt_name}'
+checkpoint_config = dict(interval=2, max_keep_ckpts=10)
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook'),
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+                # config=work_dir + "/cfg.yaml"
+            ),
+        )
+    ],
+)
+load_from = 'work_dirs/checkpoints/detr_pan_r50.pth'

OpenPSG/configs/psgtr/psgtr_r50_psg_inference.py ADDED Viewed

	@@ -0,0 +1,31 @@

+_base_ = [
+    './psgtr_r50_psg.py'
+]
+img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
+                    std=[58.395, 57.12, 57.375],
+                    to_rgb=True)
+pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            # NOTE: Do not change the img to DC.
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ],
+    ),
+]
+data = dict(
+    test=dict(
+        pipeline=pipeline,
+    ),
+)

OpenPSG/configs/vctree/panoptic_fpn_r101_fpn_1x_predcls_psg.py ADDED Viewed

	@@ -0,0 +1,28 @@

+_base_ = './panoptic_fpn_r50_fpn_1x_predcls_psg.py'
+model = dict(backbone=dict(
+    depth=101,
+    init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
+# Log config
+project_name = 'openpsg'
+expt_name = 'vctree_panoptic_fpn_r101_fpn_1x_predcls_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+                # config=work_dir + "/cfg.yaml"
+            ),
+        ),
+    ],
+)
+load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'

OpenPSG/configs/vctree/panoptic_fpn_r101_fpn_1x_sgdet_psg.py ADDED Viewed

	@@ -0,0 +1,28 @@

+_base_ = './panoptic_fpn_r50_fpn_1x_sgdet_psg.py'
+model = dict(backbone=dict(
+    depth=101,
+    init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
+# Log config
+project_name = 'openpsg'
+expt_name = 'vctree_panoptic_fpn_r101_fpn_1x_sgdet_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+                # config=work_dir + "/cfg.yaml"
+            ),
+        ),
+    ],
+)
+load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'

OpenPSG/configs/vctree/panoptic_fpn_r50_fpn_1x_predcls_psg.py ADDED Viewed

	@@ -0,0 +1,43 @@

+_base_ = [
+    '../motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py',
+]
+model = dict(relation_head=dict(
+    type='VCTreeHead',
+    head_config=dict(
+        # NOTE: Evaluation type
+        use_gt_box=True,
+        use_gt_label=True,
+    ),
+))
+evaluation = dict(interval=1,
+                  metric='predcls',
+                  relation_mode=True,
+                  classwise=True)
+# Change batch size and learning rate
+data = dict(samples_per_gpu=16,
+            workers_per_gpu=0)  # FIXME: Is this the problem?
+# optimizer = dict(lr=0.001)
+# Log config
+project_name = 'openpsg'
+expt_name = 'vctree_panoptic_fpn_r50_fpn_1x_predcls_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+                # config=work_dir + "/cfg.yaml"
+            ),
+        ),
+    ],
+)

OpenPSG/configs/vctree/panoptic_fpn_r50_fpn_1x_sgdet_psg.py ADDED Viewed

	@@ -0,0 +1,49 @@

+_base_ = [
+    '../motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py',
+]
+model = dict(
+    relation_head=dict(
+        type='VCTreeHead',
+        head_config=dict(
+            # NOTE: Evaluation type
+            use_gt_box=False,
+            use_gt_label=False,
+        ),
+    ),
+    roi_head=dict(bbox_head=dict(type='SceneGraphBBoxHead'), ),
+)
+evaluation = dict(interval=1,
+                  metric='sgdet',
+                  relation_mode=True,
+                  classwise=True,
+                  iou_thrs=0.5,
+                  detection_method='pan_seg')
+# Change batch size and learning rate
+data = dict(samples_per_gpu=16,
+            # workers_per_gpu=2
+            )
+# optimizer = dict(lr=0.003)
+# Log config
+project_name = 'openpsg'
+expt_name = 'vctree_panoptic_fpn_r50_fpn_1x_sgdet_psg'
+work_dir = f'./work_dirs/{expt_name}'
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+        dict(
+            type='WandbLoggerHook',
+            init_kwargs=dict(
+                project=project_name,
+                name=expt_name,
+                # config=work_dir + "/cfg.yaml"
+            ),
+        ),
+    ],
+)

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: OpenPSG
-emoji: 🐠
-colorFrom: green
-colorTo: gray
 sdk: gradio
 sdk_version: 3.1.4
 app_file: app.py

 ---
 title: OpenPSG
+emoji: 🖼️🏙️🌄🌉
+colorFrom: yellow
+colorTo: blue
 sdk: gradio
 sdk_version: 3.1.4
 app_file: app.py

app.py CHANGED Viewed

@@ -1,15 +1,135 @@
-import numpy as np
 import gradio as gr
-def sepia(input_img):
-    sepia_filter = np.array([
-        [0.393, 0.769, 0.189],
-        [0.349, 0.686, 0.168],
-        [0.272, 0.534, 0.131]
-    ])
-    sepia_img = input_img.dot(sepia_filter.T)
-    sepia_img /= sepia_img.max()
-    return sepia_img
-demo = gr.Interface(sepia, gr.Image(shape=(200, 200)), "image")
-demo.launch(share=True)

+#!/usr/bin/env python
+from __future__ import annotations
+import argparse
+import os
+import pathlib
+import subprocess
+import tarfile
+if os.getenv('SYSTEM') == 'spaces':
+    import mim
+    mim.uninstall('mmcv-full', confirm_yes=True)
+    mim.install('mmcv-full==1.5.2', is_yes=True)
+    subprocess.call('pip uninstall -y opencv-python'.split())
+    subprocess.call('pip uninstall -y opencv-python-headless'.split())
+    subprocess.call('pip install opencv-python-headless==4.5.5.64'.split())
+import cv2
 import gradio as gr
+import numpy as np
+from mmdet.apis import init_detector, inference_detector
+from utils import show_result
+import mmcv
+from mmcv import Config
+import os.path as osp
+DESCRIPTION = '''# OpenPSG
+This is an official demo for [OpenPSG](https://github.com/Jingkang50/OpenPSG).
+<img id="overview" alt="overview" src="https://camo.githubusercontent.com/880346b66831a8212074787ba9a2301b4d700bd8f765ca11e4845ac0ab34c230/68747470733a2f2f6c6976652e737461746963666c69636b722e636f6d2f36353533352f35323139333837393637375f373531613465306237395f6b2e6a7067" />
+'''
+FOOTER = '<img id="visitor-badge" src="https://visitor-badge.glitch.me/badge?page_id=c-liangyu.openpsg" alt="visitor badge" />'
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--device', type=str, default='cpu')
+    parser.add_argument('--theme', type=str)
+    parser.add_argument('--share', action='store_true')
+    parser.add_argument('--port', type=int)
+    parser.add_argument('--disable-queue',
+                        dest='enable_queue',
+                        action='store_false')
+    return parser.parse_args()
+def update_input_image(image: np.ndarray) -> dict:
+    if image is None:
+        return gr.Image.update(value=None)
+    scale = 1500 / max(image.shape[:2])
+    if scale < 1:
+        image = cv2.resize(image, None, fx=scale, fy=scale)
+    return gr.Image.update(value=image)
+def set_example_image(example: list) -> dict:
+    return gr.Image.update(value=example[0])
+def infer(model, input_image, num_rel):
+    result = inference_detector(model, input_image)
+    return show_result(input_image,
+                        result,
+                        is_one_stage=True,
+                        num_rel=num_rel,
+                        show=True
+                        )
+def main():
+    args = parse_args()
+    model_ckt ='OpenPSG/checkpoints/epoch_60.pth'
+    cfg = Config.fromfile('OpenPSG/configs/psgtr/psgtr_r50_psg_inference.py')
+    model = init_detector(cfg, model_ckt, device=args.device)
+    with gr.Blocks(theme=args.theme, css='style.css') as demo:
+        gr.Markdown(DESCRIPTION)
+        with gr.Row():
+            with gr.Column():
+                with gr.Row():
+                    input_image = gr.Image(label='Input Image', type='numpy')
+                with gr.Group():
+                    with gr.Row():
+                        num_rel = gr.Slider(
+                            5,
+                            100,
+                            step=5,
+                            value=20,
+                            label='Number of Relations')
+                with gr.Row():
+                    run_button = gr.Button(value='Run')
+                    # prediction_results = gr.Variable()
+            with gr.Column():
+                with gr.Row():
+                    # visualization = gr.Image(label='Result', type='numpy')
+                    result = gr.Gallery(label='Result', type='numpy')
+        with gr.Row():
+            paths = sorted(pathlib.Path('images').rglob('*.jpg'))
+            example_images = gr.Dataset(components=[input_image],
+                                        samples=[[path.as_posix()]
+                                                 for path in paths])
+        gr.Markdown(FOOTER)
+        input_image.change(fn=update_input_image,
+                           inputs=input_image,
+                           outputs=input_image)
+        run_button.click(fn=infer,
+                         inputs=[
+                             model, input_image
+                         ],
+                         outputs=result)
+        example_images.click(fn=set_example_image,
+                             inputs=example_images,
+                             outputs=input_image)
+    demo.launch(
+        enable_queue=args.enable_queue,
+        server_port=args.port,
+        share=args.share,
+    )
+if __name__ == '__main__':
+    main()

fake_gan.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# another demo
+# https://huggingface.co/spaces/dalle-mini/dalle-mini/blob/21944e2a8508568387951fc66a30e90f1d58819d/app/gradio/app.py
+# This demo needs to be run from the repo folder.
+# python demo/fake_gan/run.py
+import os
+import random
+import time
+import gradio as gr
+def fake_gan(count, *args):
+    time.sleep(1)
+    images = [
+        random.choice(
+            [
+                "https://images.unsplash.com/photo-1507003211169-0a1dd7228f2d?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=387&q=80",
+                "https://images.unsplash.com/photo-1554151228-14d9def656e4?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=386&q=80",
+                "https://images.unsplash.com/photo-1542909168-82c3e7fdca5c?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxzZWFyY2h8MXx8aHVtYW4lMjBmYWNlfGVufDB8fDB8fA%3D%3D&w=1000&q=80",
+                "https://images.unsplash.com/photo-1546456073-92b9f0a8d413?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=387&q=80",
+                "https://images.unsplash.com/photo-1601412436009-d964bd02edbc?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=464&q=80",
+            ]
+        )
+        for _ in range(int(count))
+    ]
+    return images
+cheetah = os.path.join(os.path.dirname(__file__), "files/cheetah1.jpg")
+demo = gr.Interface(
+    fn=fake_gan,
+    inputs=[
+        gr.Number(label="Generation Count"),
+        gr.Image(label="Initial Image (optional)"),
+        gr.Slider(0, 50, 25, label="TV_scale (for smoothness)"),
+        gr.Slider(0, 50, 25, label="Range_Scale (out of range RBG)"),
+        gr.Number(label="Seed"),
+        gr.Number(label="Respacing"),
+    ],
+    outputs=gr.Gallery(label="Generated Images"),
+    title="FD-GAN",
+    description="This is a fake demo of a GAN. In reality, the images are randomly chosen from Unsplash.",
+    examples=[
+        [2, cheetah, 12, None, None, None],
+        [1, cheetah, 2, None, None, None],
+        [4, cheetah, 42, None, None, None],
+        [5, cheetah, 23, None, None, None],
+        [4, cheetah, 11, None, None, None],
+        [3, cheetah, 1, None, None, None],
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()

images/cooking.jpg ADDED Viewed

images/forrest-gump.jpg ADDED Viewed

images/friends.jpg ADDED Viewed

images/mbappe.jpg ADDED Viewed

images/messi.jpg ADDED Viewed