Spaces:

test1444
/

Pose_Video

Running

App Files Files Community

hnthai commited on Mar 14, 2023

Commit

a9bd37f

•

1 Parent(s): 6ccaa09

first commit

Browse files

Files changed (8) hide show

Pose_Video +1 -0
app.py +75 -0
configs/faster_rcnn_r50_fpn_1x_coco.py +228 -0
configs/topdown_heatmap_hrnet_w48_coco_256x192.py +1129 -0
examples/000001_mpiinew_test.mp4 +0 -0
faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth +3 -0
hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth +3 -0
requirements.txt +7 -0

Pose_Video ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 6ccaa09c24051b0d9d4e8a9b9297e06dbc9102e6

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import cv2
+import gradio as gr
+from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
+                         vis_pose_result, process_mmdet_results)
+from mmdet.apis import inference_detector, init_detector
+import mediapy
+pose_config = 'configs/topdown_heatmap_hrnet_w48_coco_256x192.py'
+pose_checkpoint = 'hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth'
+det_config = 'configs/faster_rcnn_r50_fpn_1x_coco.py'
+det_checkpoint = 'faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
+# initialize pose model
+pose_model = init_pose_model(pose_config, pose_checkpoint, device='cpu')
+# initialize detector
+det_model = init_detector(det_config, det_checkpoint, device='cpu')
+max_num_frames=120
+def predict(video_path):
+    cap = cv2.VideoCapture(video_path)
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    preds_all = []
+    # fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    # out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
+    # writer = cv2.VideoWriter(out_file.name, fourcc, fps, (width, height))
+    frames = []
+    for _ in range(max_num_frames):
+        ok, frame = cap.read()
+        if not ok:
+            break
+        rgb_frame = frame[:,:,::-1]
+        mmdet_results = inference_detector(det_model, rgb_frame)
+        person_results = process_mmdet_results(mmdet_results, cat_id=1)
+        pose_results, returned_outputs = inference_top_down_pose_model(
+          pose_model,
+          rgb_frame,
+          person_results,
+          bbox_thr=0.3,
+          format='xyxy',
+          dataset=pose_model.cfg.data.test.type)
+        vis_result = vis_pose_result(
+          pose_model,
+          rgb_frame,
+          pose_results,
+          dataset=pose_model.cfg.data.test.type,
+          show=False)
+        frames.append(vis_result)
+    cap.release()
+    # writer.release()
+    mediapy.write_video("out.mp4", frames, fps=fps)
+    return "out.mp4"
+title = "Pose Estimation video"
+description = ""
+article = ""
+example_list = ['examples/000001_mpiinew_test.mp4']
+# Create the Gradio demo
+demo = gr.Interface(fn=predict,
+                    inputs=gr.Video(label='Input Video'),
+                    outputs=gr.Video(label='Result'),
+                    examples=example_list,
+                    title=title,
+                    description=description,
+                    article=article)
+# Launch the demo!
+demo.queue().launch(show_api=False)

configs/faster_rcnn_r50_fpn_1x_coco.py ADDED Viewed

	@@ -0,0 +1,228 @@

+model = dict(
+    type='FasterRCNN',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[0.0, 0.0, 0.0, 0.0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0.0, 0.0, 0.0, 0.0],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)))
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(
+        type='Normalize',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        to_rgb=True),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(
+                type='Normalize',
+                mean=[123.675, 116.28, 103.53],
+                std=[58.395, 57.12, 57.375],
+                to_rgb=True),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img'])
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type='CocoDataset',
+        ann_file='data/coco/annotations/instances_train2017.json',
+        img_prefix='data/coco/train2017/',
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
+            dict(type='RandomFlip', flip_ratio=0.5),
+            dict(
+                type='Normalize',
+                mean=[123.675, 116.28, 103.53],
+                std=[58.395, 57.12, 57.375],
+                to_rgb=True),
+            dict(type='Pad', size_divisor=32),
+            dict(type='DefaultFormatBundle'),
+            dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
+        ]),
+    val=dict(
+        type='CocoDataset',
+        ann_file='data/coco/annotations/instances_val2017.json',
+        img_prefix='data/coco/val2017/',
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(
+                type='MultiScaleFlipAug',
+                img_scale=(1333, 800),
+                flip=False,
+                transforms=[
+                    dict(type='Resize', keep_ratio=True),
+                    dict(type='RandomFlip'),
+                    dict(
+                        type='Normalize',
+                        mean=[123.675, 116.28, 103.53],
+                        std=[58.395, 57.12, 57.375],
+                        to_rgb=True),
+                    dict(type='Pad', size_divisor=32),
+                    dict(type='ImageToTensor', keys=['img']),
+                    dict(type='Collect', keys=['img'])
+                ])
+        ]),
+    test=dict(
+        type='CocoDataset',
+        ann_file='data/coco/annotations/instances_val2017.json',
+        img_prefix='data/coco/val2017/',
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(
+                type='MultiScaleFlipAug',
+                img_scale=(1333, 800),
+                flip=False,
+                transforms=[
+                    dict(type='Resize', keep_ratio=True),
+                    dict(type='RandomFlip'),
+                    dict(
+                        type='Normalize',
+                        mean=[123.675, 116.28, 103.53],
+                        std=[58.395, 57.12, 57.375],
+                        to_rgb=True),
+                    dict(type='Pad', size_divisor=32),
+                    dict(type='ImageToTensor', keys=['img']),
+                    dict(type='Collect', keys=['img'])
+                ])
+        ]))
+evaluation = dict(interval=1, metric='bbox')
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=None)
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[8, 11])
+runner = dict(type='EpochBasedRunner', max_epochs=12)
+checkpoint_config = dict(interval=1)
+log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
+custom_hooks = [dict(type='NumClassCheckHook')]
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+opencv_num_threads = 0
+mp_start_method = 'fork'
+auto_scale_lr = dict(enable=False, base_batch_size=16)

configs/topdown_heatmap_hrnet_w48_coco_256x192.py ADDED Viewed

	@@ -0,0 +1,1129 @@

+checkpoint_config = dict(interval=10)
+log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+opencv_num_threads = 0
+mp_start_method = 'fork'
+dataset_info = dict(
+    dataset_name='coco',
+    paper_info=dict(
+        author=
+        'Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence',
+        title='Microsoft coco: Common objects in context',
+        container='European conference on computer vision',
+        year='2014',
+        homepage='http://cocodataset.org/'),
+    keypoint_info=dict({
+        0:
+        dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+        1:
+        dict(
+            name='left_eye',
+            id=1,
+            color=[51, 153, 255],
+            type='upper',
+            swap='right_eye'),
+        2:
+        dict(
+            name='right_eye',
+            id=2,
+            color=[51, 153, 255],
+            type='upper',
+            swap='left_eye'),
+        3:
+        dict(
+            name='left_ear',
+            id=3,
+            color=[51, 153, 255],
+            type='upper',
+            swap='right_ear'),
+        4:
+        dict(
+            name='right_ear',
+            id=4,
+            color=[51, 153, 255],
+            type='upper',
+            swap='left_ear'),
+        5:
+        dict(
+            name='left_shoulder',
+            id=5,
+            color=[0, 255, 0],
+            type='upper',
+            swap='right_shoulder'),
+        6:
+        dict(
+            name='right_shoulder',
+            id=6,
+            color=[255, 128, 0],
+            type='upper',
+            swap='left_shoulder'),
+        7:
+        dict(
+            name='left_elbow',
+            id=7,
+            color=[0, 255, 0],
+            type='upper',
+            swap='right_elbow'),
+        8:
+        dict(
+            name='right_elbow',
+            id=8,
+            color=[255, 128, 0],
+            type='upper',
+            swap='left_elbow'),
+        9:
+        dict(
+            name='left_wrist',
+            id=9,
+            color=[0, 255, 0],
+            type='upper',
+            swap='right_wrist'),
+        10:
+        dict(
+            name='right_wrist',
+            id=10,
+            color=[255, 128, 0],
+            type='upper',
+            swap='left_wrist'),
+        11:
+        dict(
+            name='left_hip',
+            id=11,
+            color=[0, 255, 0],
+            type='lower',
+            swap='right_hip'),
+        12:
+        dict(
+            name='right_hip',
+            id=12,
+            color=[255, 128, 0],
+            type='lower',
+            swap='left_hip'),
+        13:
+        dict(
+            name='left_knee',
+            id=13,
+            color=[0, 255, 0],
+            type='lower',
+            swap='right_knee'),
+        14:
+        dict(
+            name='right_knee',
+            id=14,
+            color=[255, 128, 0],
+            type='lower',
+            swap='left_knee'),
+        15:
+        dict(
+            name='left_ankle',
+            id=15,
+            color=[0, 255, 0],
+            type='lower',
+            swap='right_ankle'),
+        16:
+        dict(
+            name='right_ankle',
+            id=16,
+            color=[255, 128, 0],
+            type='lower',
+            swap='left_ankle')
+    }),
+    skeleton_info=dict({
+        0:
+        dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+        1:
+        dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+        2:
+        dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+        3:
+        dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+        4:
+        dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+        5:
+        dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+        6:
+        dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+        7:
+        dict(
+            link=('left_shoulder', 'right_shoulder'),
+            id=7,
+            color=[51, 153, 255]),
+        8:
+        dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+        9:
+        dict(
+            link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+        10:
+        dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+        11:
+        dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+        12:
+        dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+        13:
+        dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+        14:
+        dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+        15:
+        dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+        16:
+        dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+        17:
+        dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+        18:
+        dict(
+            link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
+    }),
+    joint_weights=[
+        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0, 1.0, 1.2,
+        1.2, 1.5, 1.5
+    ],
+    sigmas=[
+        0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+        0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+    ])
+evaluation = dict(interval=10, metric='mAP', save_best='AP')
+optimizer = dict(type='Adam', lr=0.0005)
+optimizer_config = dict(grad_clip=None)
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[170, 200])
+total_epochs = 210
+channel_cfg = dict(
+    num_output_channels=17,
+    dataset_joints=17,
+    dataset_channel=[[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ]],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+model = dict(
+    type='TopDown',
+    pretrained=
+    'https://download.openmmlab.com/mmpose/pretrain_models/hrnet_w48-8ef0771d.pth',
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(48, 96)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(48, 96, 192)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(48, 96, 192, 384)))),
+    keypoint_head=dict(
+        type='TopdownHeatmapSimpleHead',
+        in_channels=48,
+        out_channels=17,
+        num_deconv_layers=0,
+        extra=dict(final_conv_kernel=1),
+        loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
+    train_cfg=dict(),
+    test_cfg=dict(
+        flip_test=True,
+        post_process='default',
+        shift_heatmap=True,
+        modulate_kernel=11))
+data_cfg = dict(
+    image_size=[192, 256],
+    heatmap_size=[48, 64],
+    num_output_channels=17,
+    num_joints=17,
+    dataset_channel=[[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ]],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ],
+    soft_nms=False,
+    nms_thr=1.0,
+    oks_thr=0.9,
+    vis_thr=0.2,
+    use_gt_bbox=False,
+    det_bbox_thr=0.0,
+    bbox_file=
+    'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownRandomFlip', flip_prob=0.5),
+    dict(
+        type='TopDownHalfBodyTransform',
+        num_joints_half_body=8,
+        prob_half_body=0.3),
+    dict(
+        type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(type='TopDownGenerateTarget', sigma=2),
+    dict(
+        type='Collect',
+        keys=['img', 'target', 'target_weight'],
+        meta_keys=[
+            'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
+            'rotation', 'bbox_score', 'flip_pairs'
+        ])
+]
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+            'flip_pairs'
+        ])
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+            'flip_pairs'
+        ])
+]
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=32,
+    workers_per_gpu=2,
+    val_dataloader=dict(samples_per_gpu=32),
+    test_dataloader=dict(samples_per_gpu=32),
+    train=dict(
+        type='TopDownCocoDataset',
+        ann_file='data/coco/annotations/person_keypoints_train2017.json',
+        img_prefix='data/coco/train2017/',
+        data_cfg=dict(
+            image_size=[192, 256],
+            heatmap_size=[48, 64],
+            num_output_channels=17,
+            num_joints=17,
+            dataset_channel=[[
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+            ]],
+            inference_channel=[
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+            ],
+            soft_nms=False,
+            nms_thr=1.0,
+            oks_thr=0.9,
+            vis_thr=0.2,
+            use_gt_bbox=False,
+            det_bbox_thr=0.0,
+            bbox_file=
+            'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+        ),
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(type='TopDownRandomFlip', flip_prob=0.5),
+            dict(
+                type='TopDownHalfBodyTransform',
+                num_joints_half_body=8,
+                prob_half_body=0.3),
+            dict(
+                type='TopDownGetRandomScaleRotation',
+                rot_factor=40,
+                scale_factor=0.5),
+            dict(type='TopDownAffine'),
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+            dict(type='TopDownGenerateTarget', sigma=2),
+            dict(
+                type='Collect',
+                keys=['img', 'target', 'target_weight'],
+                meta_keys=[
+                    'image_file', 'joints_3d', 'joints_3d_visible', 'center',
+                    'scale', 'rotation', 'bbox_score', 'flip_pairs'
+                ])
+        ],
+        dataset_info=dict(
+            dataset_name='coco',
+            paper_info=dict(
+                author=
+                'Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence',
+                title='Microsoft coco: Common objects in context',
+                container='European conference on computer vision',
+                year='2014',
+                homepage='http://cocodataset.org/'),
+            keypoint_info=dict({
+                0:
+                dict(
+                    name='nose',
+                    id=0,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap=''),
+                1:
+                dict(
+                    name='left_eye',
+                    id=1,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap='right_eye'),
+                2:
+                dict(
+                    name='right_eye',
+                    id=2,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap='left_eye'),
+                3:
+                dict(
+                    name='left_ear',
+                    id=3,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap='right_ear'),
+                4:
+                dict(
+                    name='right_ear',
+                    id=4,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap='left_ear'),
+                5:
+                dict(
+                    name='left_shoulder',
+                    id=5,
+                    color=[0, 255, 0],
+                    type='upper',
+                    swap='right_shoulder'),
+                6:
+                dict(
+                    name='right_shoulder',
+                    id=6,
+                    color=[255, 128, 0],
+                    type='upper',
+                    swap='left_shoulder'),
+                7:
+                dict(
+                    name='left_elbow',
+                    id=7,
+                    color=[0, 255, 0],
+                    type='upper',
+                    swap='right_elbow'),
+                8:
+                dict(
+                    name='right_elbow',
+                    id=8,
+                    color=[255, 128, 0],
+                    type='upper',
+                    swap='left_elbow'),
+                9:
+                dict(
+                    name='left_wrist',
+                    id=9,
+                    color=[0, 255, 0],
+                    type='upper',
+                    swap='right_wrist'),
+                10:
+                dict(
+                    name='right_wrist',
+                    id=10,
+                    color=[255, 128, 0],
+                    type='upper',
+                    swap='left_wrist'),
+                11:
+                dict(
+                    name='left_hip',
+                    id=11,
+                    color=[0, 255, 0],
+                    type='lower',
+                    swap='right_hip'),
+                12:
+                dict(
+                    name='right_hip',
+                    id=12,
+                    color=[255, 128, 0],
+                    type='lower',
+                    swap='left_hip'),
+                13:
+                dict(
+                    name='left_knee',
+                    id=13,
+                    color=[0, 255, 0],
+                    type='lower',
+                    swap='right_knee'),
+                14:
+                dict(
+                    name='right_knee',
+                    id=14,
+                    color=[255, 128, 0],
+                    type='lower',
+                    swap='left_knee'),
+                15:
+                dict(
+                    name='left_ankle',
+                    id=15,
+                    color=[0, 255, 0],
+                    type='lower',
+                    swap='right_ankle'),
+                16:
+                dict(
+                    name='right_ankle',
+                    id=16,
+                    color=[255, 128, 0],
+                    type='lower',
+                    swap='left_ankle')
+            }),
+            skeleton_info=dict({
+                0:
+                dict(
+                    link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+                1:
+                dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+                2:
+                dict(
+                    link=('right_ankle', 'right_knee'),
+                    id=2,
+                    color=[255, 128, 0]),
+                3:
+                dict(
+                    link=('right_knee', 'right_hip'),
+                    id=3,
+                    color=[255, 128, 0]),
+                4:
+                dict(
+                    link=('left_hip', 'right_hip'), id=4, color=[51, 153,
+                                                                 255]),
+                5:
+                dict(
+                    link=('left_shoulder', 'left_hip'),
+                    id=5,
+                    color=[51, 153, 255]),
+                6:
+                dict(
+                    link=('right_shoulder', 'right_hip'),
+                    id=6,
+                    color=[51, 153, 255]),
+                7:
+                dict(
+                    link=('left_shoulder', 'right_shoulder'),
+                    id=7,
+                    color=[51, 153, 255]),
+                8:
+                dict(
+                    link=('left_shoulder', 'left_elbow'),
+                    id=8,
+                    color=[0, 255, 0]),
+                9:
+                dict(
+                    link=('right_shoulder', 'right_elbow'),
+                    id=9,
+                    color=[255, 128, 0]),
+                10:
+                dict(
+                    link=('left_elbow', 'left_wrist'),
+                    id=10,
+                    color=[0, 255, 0]),
+                11:
+                dict(
+                    link=('right_elbow', 'right_wrist'),
+                    id=11,
+                    color=[255, 128, 0]),
+                12:
+                dict(
+                    link=('left_eye', 'right_eye'),
+                    id=12,
+                    color=[51, 153, 255]),
+                13:
+                dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+                14:
+                dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+                15:
+                dict(
+                    link=('left_eye', 'left_ear'), id=15, color=[51, 153,
+                                                                 255]),
+                16:
+                dict(
+                    link=('right_eye', 'right_ear'),
+                    id=16,
+                    color=[51, 153, 255]),
+                17:
+                dict(
+                    link=('left_ear', 'left_shoulder'),
+                    id=17,
+                    color=[51, 153, 255]),
+                18:
+                dict(
+                    link=('right_ear', 'right_shoulder'),
+                    id=18,
+                    color=[51, 153, 255])
+            }),
+            joint_weights=[
+                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0,
+                1.0, 1.2, 1.2, 1.5, 1.5
+            ],
+            sigmas=[
+                0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072,
+                0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+            ])),
+    val=dict(
+        type='TopDownCocoDataset',
+        ann_file='data/coco/annotations/person_keypoints_val2017.json',
+        img_prefix='data/coco/val2017/',
+        data_cfg=dict(
+            image_size=[192, 256],
+            heatmap_size=[48, 64],
+            num_output_channels=17,
+            num_joints=17,
+            dataset_channel=[[
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+            ]],
+            inference_channel=[
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+            ],
+            soft_nms=False,
+            nms_thr=1.0,
+            oks_thr=0.9,
+            vis_thr=0.2,
+            use_gt_bbox=False,
+            det_bbox_thr=0.0,
+            bbox_file=
+            'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+        ),
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(type='TopDownAffine'),
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=[
+                    'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+                    'flip_pairs'
+                ])
+        ],
+        dataset_info=dict(
+            dataset_name='coco',
+            paper_info=dict(
+                author=
+                'Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence',
+                title='Microsoft coco: Common objects in context',
+                container='European conference on computer vision',
+                year='2014',
+                homepage='http://cocodataset.org/'),
+            keypoint_info=dict({
+                0:
+                dict(
+                    name='nose',
+                    id=0,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap=''),
+                1:
+                dict(
+                    name='left_eye',
+                    id=1,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap='right_eye'),
+                2:
+                dict(
+                    name='right_eye',
+                    id=2,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap='left_eye'),
+                3:
+                dict(
+                    name='left_ear',
+                    id=3,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap='right_ear'),
+                4:
+                dict(
+                    name='right_ear',
+                    id=4,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap='left_ear'),
+                5:
+                dict(
+                    name='left_shoulder',
+                    id=5,
+                    color=[0, 255, 0],
+                    type='upper',
+                    swap='right_shoulder'),
+                6:
+                dict(
+                    name='right_shoulder',
+                    id=6,
+                    color=[255, 128, 0],
+                    type='upper',
+                    swap='left_shoulder'),
+                7:
+                dict(
+                    name='left_elbow',
+                    id=7,
+                    color=[0, 255, 0],
+                    type='upper',
+                    swap='right_elbow'),
+                8:
+                dict(
+                    name='right_elbow',
+                    id=8,
+                    color=[255, 128, 0],
+                    type='upper',
+                    swap='left_elbow'),
+                9:
+                dict(
+                    name='left_wrist',
+                    id=9,
+                    color=[0, 255, 0],
+                    type='upper',
+                    swap='right_wrist'),
+                10:
+                dict(
+                    name='right_wrist',
+                    id=10,
+                    color=[255, 128, 0],
+                    type='upper',
+                    swap='left_wrist'),
+                11:
+                dict(
+                    name='left_hip',
+                    id=11,
+                    color=[0, 255, 0],
+                    type='lower',
+                    swap='right_hip'),
+                12:
+                dict(
+                    name='right_hip',
+                    id=12,
+                    color=[255, 128, 0],
+                    type='lower',
+                    swap='left_hip'),
+                13:
+                dict(
+                    name='left_knee',
+                    id=13,
+                    color=[0, 255, 0],
+                    type='lower',
+                    swap='right_knee'),
+                14:
+                dict(
+                    name='right_knee',
+                    id=14,
+                    color=[255, 128, 0],
+                    type='lower',
+                    swap='left_knee'),
+                15:
+                dict(
+                    name='left_ankle',
+                    id=15,
+                    color=[0, 255, 0],
+                    type='lower',
+                    swap='right_ankle'),
+                16:
+                dict(
+                    name='right_ankle',
+                    id=16,
+                    color=[255, 128, 0],
+                    type='lower',
+                    swap='left_ankle')
+            }),
+            skeleton_info=dict({
+                0:
+                dict(
+                    link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+                1:
+                dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+                2:
+                dict(
+                    link=('right_ankle', 'right_knee'),
+                    id=2,
+                    color=[255, 128, 0]),
+                3:
+                dict(
+                    link=('right_knee', 'right_hip'),
+                    id=3,
+                    color=[255, 128, 0]),
+                4:
+                dict(
+                    link=('left_hip', 'right_hip'), id=4, color=[51, 153,
+                                                                 255]),
+                5:
+                dict(
+                    link=('left_shoulder', 'left_hip'),
+                    id=5,
+                    color=[51, 153, 255]),
+                6:
+                dict(
+                    link=('right_shoulder', 'right_hip'),
+                    id=6,
+                    color=[51, 153, 255]),
+                7:
+                dict(
+                    link=('left_shoulder', 'right_shoulder'),
+                    id=7,
+                    color=[51, 153, 255]),
+                8:
+                dict(
+                    link=('left_shoulder', 'left_elbow'),
+                    id=8,
+                    color=[0, 255, 0]),
+                9:
+                dict(
+                    link=('right_shoulder', 'right_elbow'),
+                    id=9,
+                    color=[255, 128, 0]),
+                10:
+                dict(
+                    link=('left_elbow', 'left_wrist'),
+                    id=10,
+                    color=[0, 255, 0]),
+                11:
+                dict(
+                    link=('right_elbow', 'right_wrist'),
+                    id=11,
+                    color=[255, 128, 0]),
+                12:
+                dict(
+                    link=('left_eye', 'right_eye'),
+                    id=12,
+                    color=[51, 153, 255]),
+                13:
+                dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+                14:
+                dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+                15:
+                dict(
+                    link=('left_eye', 'left_ear'), id=15, color=[51, 153,
+                                                                 255]),
+                16:
+                dict(
+                    link=('right_eye', 'right_ear'),
+                    id=16,
+                    color=[51, 153, 255]),
+                17:
+                dict(
+                    link=('left_ear', 'left_shoulder'),
+                    id=17,
+                    color=[51, 153, 255]),
+                18:
+                dict(
+                    link=('right_ear', 'right_shoulder'),
+                    id=18,
+                    color=[51, 153, 255])
+            }),
+            joint_weights=[
+                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0,
+                1.0, 1.2, 1.2, 1.5, 1.5
+            ],
+            sigmas=[
+                0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072,
+                0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+            ])),
+    test=dict(
+        type='TopDownCocoDataset',
+        ann_file='data/coco/annotations/person_keypoints_val2017.json',
+        img_prefix='data/coco/val2017/',
+        data_cfg=dict(
+            image_size=[192, 256],
+            heatmap_size=[48, 64],
+            num_output_channels=17,
+            num_joints=17,
+            dataset_channel=[[
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+            ]],
+            inference_channel=[
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+            ],
+            soft_nms=False,
+            nms_thr=1.0,
+            oks_thr=0.9,
+            vis_thr=0.2,
+            use_gt_bbox=False,
+            det_bbox_thr=0.0,
+            bbox_file=
+            'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+        ),
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(type='TopDownAffine'),
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+            dict(
+                type='Collect',
+                keys=['img'],
+                meta_keys=[
+                    'image_file', 'center', 'scale', 'rotation', 'bbox_score',
+                    'flip_pairs'
+                ])
+        ],
+        dataset_info=dict(
+            dataset_name='coco',
+            paper_info=dict(
+                author=
+                'Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence',
+                title='Microsoft coco: Common objects in context',
+                container='European conference on computer vision',
+                year='2014',
+                homepage='http://cocodataset.org/'),
+            keypoint_info=dict({
+                0:
+                dict(
+                    name='nose',
+                    id=0,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap=''),
+                1:
+                dict(
+                    name='left_eye',
+                    id=1,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap='right_eye'),
+                2:
+                dict(
+                    name='right_eye',
+                    id=2,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap='left_eye'),
+                3:
+                dict(
+                    name='left_ear',
+                    id=3,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap='right_ear'),
+                4:
+                dict(
+                    name='right_ear',
+                    id=4,
+                    color=[51, 153, 255],
+                    type='upper',
+                    swap='left_ear'),
+                5:
+                dict(
+                    name='left_shoulder',
+                    id=5,
+                    color=[0, 255, 0],
+                    type='upper',
+                    swap='right_shoulder'),
+                6:
+                dict(
+                    name='right_shoulder',
+                    id=6,
+                    color=[255, 128, 0],
+                    type='upper',
+                    swap='left_shoulder'),
+                7:
+                dict(
+                    name='left_elbow',
+                    id=7,
+                    color=[0, 255, 0],
+                    type='upper',
+                    swap='right_elbow'),
+                8:
+                dict(
+                    name='right_elbow',
+                    id=8,
+                    color=[255, 128, 0],
+                    type='upper',
+                    swap='left_elbow'),
+                9:
+                dict(
+                    name='left_wrist',
+                    id=9,
+                    color=[0, 255, 0],
+                    type='upper',
+                    swap='right_wrist'),
+                10:
+                dict(
+                    name='right_wrist',
+                    id=10,
+                    color=[255, 128, 0],
+                    type='upper',
+                    swap='left_wrist'),
+                11:
+                dict(
+                    name='left_hip',
+                    id=11,
+                    color=[0, 255, 0],
+                    type='lower',
+                    swap='right_hip'),
+                12:
+                dict(
+                    name='right_hip',
+                    id=12,
+                    color=[255, 128, 0],
+                    type='lower',
+                    swap='left_hip'),
+                13:
+                dict(
+                    name='left_knee',
+                    id=13,
+                    color=[0, 255, 0],
+                    type='lower',
+                    swap='right_knee'),
+                14:
+                dict(
+                    name='right_knee',
+                    id=14,
+                    color=[255, 128, 0],
+                    type='lower',
+                    swap='left_knee'),
+                15:
+                dict(
+                    name='left_ankle',
+                    id=15,
+                    color=[0, 255, 0],
+                    type='lower',
+                    swap='right_ankle'),
+                16:
+                dict(
+                    name='right_ankle',
+                    id=16,
+                    color=[255, 128, 0],
+                    type='lower',
+                    swap='left_ankle')
+            }),
+            skeleton_info=dict({
+                0:
+                dict(
+                    link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+                1:
+                dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+                2:
+                dict(
+                    link=('right_ankle', 'right_knee'),
+                    id=2,
+                    color=[255, 128, 0]),
+                3:
+                dict(
+                    link=('right_knee', 'right_hip'),
+                    id=3,
+                    color=[255, 128, 0]),
+                4:
+                dict(
+                    link=('left_hip', 'right_hip'), id=4, color=[51, 153,
+                                                                 255]),
+                5:
+                dict(
+                    link=('left_shoulder', 'left_hip'),
+                    id=5,
+                    color=[51, 153, 255]),
+                6:
+                dict(
+                    link=('right_shoulder', 'right_hip'),
+                    id=6,
+                    color=[51, 153, 255]),
+                7:
+                dict(
+                    link=('left_shoulder', 'right_shoulder'),
+                    id=7,
+                    color=[51, 153, 255]),
+                8:
+                dict(
+                    link=('left_shoulder', 'left_elbow'),
+                    id=8,
+                    color=[0, 255, 0]),
+                9:
+                dict(
+                    link=('right_shoulder', 'right_elbow'),
+                    id=9,
+                    color=[255, 128, 0]),
+                10:
+                dict(
+                    link=('left_elbow', 'left_wrist'),
+                    id=10,
+                    color=[0, 255, 0]),
+                11:
+                dict(
+                    link=('right_elbow', 'right_wrist'),
+                    id=11,
+                    color=[255, 128, 0]),
+                12:
+                dict(
+                    link=('left_eye', 'right_eye'),
+                    id=12,
+                    color=[51, 153, 255]),
+                13:
+                dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+                14:
+                dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+                15:
+                dict(
+                    link=('left_eye', 'left_ear'), id=15, color=[51, 153,
+                                                                 255]),
+                16:
+                dict(
+                    link=('right_eye', 'right_ear'),
+                    id=16,
+                    color=[51, 153, 255]),
+                17:
+                dict(
+                    link=('left_ear', 'left_shoulder'),
+                    id=17,
+                    color=[51, 153, 255]),
+                18:
+                dict(
+                    link=('right_ear', 'right_shoulder'),
+                    id=18,
+                    color=[51, 153, 255])
+            }),
+            joint_weights=[
+                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0,
+                1.0, 1.2, 1.2, 1.5, 1.5
+            ],
+            sigmas=[
+                0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072,
+                0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+            ])))

examples/000001_mpiinew_test.mp4 ADDED Viewed

Binary file (159 kB). View file

faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:047c8118fc5ca88ba5ae1fab72f2cd6b070501fe3af2f3cba5cfa9a89b44b03e
+size 167287506

hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9e0b3ab0439cb68e166c7543e59d2587cd8d7e9acf5ea62a8378eeb82fb50e5
+size 255011654

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+mediapy
+numpy==1.23.5
+torch==1.11.0
+torchvision==0.12.0
+openmim==0.1.5
+mmdet==2.24.1
+mmpose==0.25.1