Spaces:

Gradio-Blocks
/

ViTPose

Running

App Files Files Community

hysts HF staff commited on Jun 10, 2024

Commit

9b1e028

1 Parent(s): 5de7473

Update

Browse files

Files changed (6) hide show

.pre-commit-config.yaml +59 -36
.style.yapf +0 -5
README.md +1 -1
app.py +88 -108
model.py +84 -102
style.css +1 -4

.pre-commit-config.yaml CHANGED Viewed

@@ -1,37 +1,60 @@
-exclude: ^(ViTPose/|mmdet_configs/configs/)
 repos:
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.2.0
-  hooks:
-  - id: check-executables-have-shebangs
-  - id: check-json
-  - id: check-merge-conflict
-  - id: check-shebang-scripts-are-executable
-  - id: check-toml
-  - id: check-yaml
-  - id: double-quote-string-fixer
-  - id: end-of-file-fixer
-  - id: mixed-line-ending
-    args: ['--fix=lf']
-  - id: requirements-txt-fixer
-  - id: trailing-whitespace
-- repo: https://github.com/myint/docformatter
-  rev: v1.4
-  hooks:
-  - id: docformatter
-    args: ['--in-place']
-- repo: https://github.com/pycqa/isort
-  rev: 5.12.0
-  hooks:
-    - id: isort
-- repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v0.991
-  hooks:
-    - id: mypy
-      args: ['--ignore-missing-imports']
-      additional_dependencies: ['types-python-slugify']
-- repo: https://github.com/google/yapf
-  rev: v0.32.0
-  hooks:
-  - id: yapf
-    args: ['--parallel', '--in-place']

 repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: check-executables-have-shebangs
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-shebang-scripts-are-executable
+      - id: check-toml
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/myint/docformatter
+    rev: v1.7.5
+    hooks:
+      - id: docformatter
+        args: ["--in-place"]
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        args: ["--profile", "black"]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.10.0
+    hooks:
+      - id: mypy
+        args: ["--ignore-missing-imports"]
+        additional_dependencies:
+          [
+            "types-python-slugify",
+            "types-requests",
+            "types-PyYAML",
+            "types-pytz",
+          ]
+  - repo: https://github.com/psf/black
+    rev: 24.4.2
+    hooks:
+      - id: black
+        language_version: python3.10
+        args: ["--line-length", "119"]
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.7.1
+    hooks:
+      - id: nbstripout
+        args:
+          [
+            "--extra-keys",
+            "metadata.interpreter metadata.kernelspec cell.metadata.pycharm",
+          ]
+  - repo: https://github.com/nbQA-dev/nbQA
+    rev: 1.8.5
+    hooks:
+      - id: nbqa-black
+      - id: nbqa-pyupgrade
+        args: ["--py37-plus"]
+      - id: nbqa-isort
+        args: ["--float-to-top"]

.style.yapf DELETED Viewed

@@ -1,5 +0,0 @@
-[style]
-based_on_style = pep8
-blank_line_before_nested_class_or_def = false
-spaces_before_comment = 2
-split_before_logical_operator = true

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 📊
 colorFrom: yellow
 colorTo: indigo
 sdk: gradio
-sdk_version: 3.36.1
 app_file: app.py
 pinned: false
 suggested_hardware: t4-small

 colorFrom: yellow
 colorTo: indigo
 sdk: gradio
+sdk_version: 4.36.1
 app_file: app.py
 pinned: false
 suggested_hardware: t4-small

app.py CHANGED Viewed

@@ -9,14 +9,14 @@ import gradio as gr
 from model import AppDetModel, AppPoseModel
-DESCRIPTION = '# [ViTPose](https://github.com/ViTAE-Transformer/ViTPose)'
 def extract_tar() -> None:
-    if pathlib.Path('mmdet_configs/configs').exists():
         return
-    with tarfile.open('mmdet_configs/configs.tar') as f:
-        f.extractall('mmdet_configs')
 extract_tar()
@@ -24,135 +24,115 @@ extract_tar()
 det_model = AppDetModel()
 pose_model = AppPoseModel()
-with gr.Blocks(css='style.css') as demo:
     gr.Markdown(DESCRIPTION)
-    with gr.Box():
-        gr.Markdown('## Step 1')
         with gr.Row():
             with gr.Column():
                 with gr.Row():
-                    input_image = gr.Image(label='Input Image', type='numpy')
                 with gr.Row():
                     detector_name = gr.Dropdown(
-                        label='Detector',
-                        choices=list(det_model.MODEL_DICT.keys()),
-                        value=det_model.model_name)
                 with gr.Row():
-                    detect_button = gr.Button('Detect')
-                    det_preds = gr.Variable()
             with gr.Column():
                 with gr.Row():
-                    detection_visualization = gr.Image(
-                        label='Detection Result',
-                        type='numpy',
-                        elem_id='det-result')
                 with gr.Row():
                     vis_det_score_threshold = gr.Slider(
-                        label='Visualization Score Threshold',
-                        minimum=0,
-                        maximum=1,
-                        step=0.05,
-                        value=0.5)
                 with gr.Row():
-                    redraw_det_button = gr.Button(value='Redraw')
         with gr.Row():
-            paths = sorted(pathlib.Path('images').rglob('*.jpg'))
-            example_images = gr.Examples(examples=[[path.as_posix()]
-                                                   for path in paths],
-                                         inputs=input_image)
-    with gr.Box():
-        gr.Markdown('## Step 2')
         with gr.Row():
             with gr.Column():
                 with gr.Row():
                     pose_model_name = gr.Dropdown(
-                        label='Pose Model',
-                        choices=list(pose_model.MODEL_DICT.keys()),
-                        value=pose_model.model_name)
-                det_score_threshold = gr.Slider(label='Box Score Threshold',
-                                                minimum=0,
-                                                maximum=1,
-                                                step=0.05,
-                                                value=0.5)
                 with gr.Row():
-                    predict_button = gr.Button('Predict')
-                    pose_preds = gr.Variable()
             with gr.Column():
                 with gr.Row():
-                    pose_visualization = gr.Image(label='Result',
-                                                  type='numpy',
-                                                  elem_id='pose-result')
                 with gr.Row():
                     vis_kpt_score_threshold = gr.Slider(
-                        label='Visualization Score Threshold',
-                        minimum=0,
-                        maximum=1,
-                        step=0.05,
-                        value=0.3)
                 with gr.Row():
-                    vis_dot_radius = gr.Slider(label='Dot Radius',
-                                               minimum=1,
-                                               maximum=10,
-                                               step=1,
-                                               value=4)
                 with gr.Row():
-                    vis_line_thickness = gr.Slider(label='Line Thickness',
-                                                   minimum=1,
-                                                   maximum=10,
-                                                   step=1,
-                                                   value=2)
                 with gr.Row():
-                    redraw_pose_button = gr.Button('Redraw')
-    detector_name.change(fn=det_model.set_model,
-                         inputs=detector_name,
-                         outputs=None)
-    detect_button.click(fn=det_model.run,
-                        inputs=[
-                            detector_name,
-                            input_image,
-                            vis_det_score_threshold,
-                        ],
-                        outputs=[
-                            det_preds,
-                            detection_visualization,
-                        ])
-    redraw_det_button.click(fn=det_model.visualize_detection_results,
-                            inputs=[
-                                input_image,
-                                det_preds,
-                                vis_det_score_threshold,
-                            ],
-                            outputs=detection_visualization)
-    pose_model_name.change(fn=pose_model.set_model,
-                           inputs=pose_model_name,
-                           outputs=None)
-    predict_button.click(fn=pose_model.run,
-                         inputs=[
-                             pose_model_name,
-                             input_image,
-                             det_preds,
-                             det_score_threshold,
-                             vis_kpt_score_threshold,
-                             vis_dot_radius,
-                             vis_line_thickness,
-                         ],
-                         outputs=[
-                             pose_preds,
-                             pose_visualization,
-                         ])
-    redraw_pose_button.click(fn=pose_model.visualize_pose_results,
-                             inputs=[
-                                 input_image,
-                                 pose_preds,
-                                 vis_kpt_score_threshold,
-                                 vis_dot_radius,
-                                 vis_line_thickness,
-                             ],
-                             outputs=pose_visualization)
-demo.queue(max_size=10).launch()

 from model import AppDetModel, AppPoseModel
+DESCRIPTION = "# [ViTPose](https://github.com/ViTAE-Transformer/ViTPose)"
 def extract_tar() -> None:
+    if pathlib.Path("mmdet_configs/configs").exists():
         return
+    with tarfile.open("mmdet_configs/configs.tar") as f:
+        f.extractall("mmdet_configs")
 extract_tar()
 det_model = AppDetModel()
 pose_model = AppPoseModel()
+with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
+    with gr.Group():
+        gr.Markdown("## Step 1")
         with gr.Row():
             with gr.Column():
                 with gr.Row():
+                    input_image = gr.Image(label="Input Image", type="numpy")
                 with gr.Row():
                     detector_name = gr.Dropdown(
+                        label="Detector", choices=list(det_model.MODEL_DICT.keys()), value=det_model.model_name
+                    )
                 with gr.Row():
+                    detect_button = gr.Button("Detect")
+                    det_preds = gr.State()
             with gr.Column():
                 with gr.Row():
+                    detection_visualization = gr.Image(label="Detection Result", type="numpy", elem_id="det-result")
                 with gr.Row():
                     vis_det_score_threshold = gr.Slider(
+                        label="Visualization Score Threshold", minimum=0, maximum=1, step=0.05, value=0.5
+                    )
                 with gr.Row():
+                    redraw_det_button = gr.Button(value="Redraw")
         with gr.Row():
+            paths = sorted(pathlib.Path("images").rglob("*.jpg"))
+            example_images = gr.Examples(examples=[[path.as_posix()] for path in paths], inputs=input_image)
+    with gr.Group():
+        gr.Markdown("## Step 2")
         with gr.Row():
             with gr.Column():
                 with gr.Row():
                     pose_model_name = gr.Dropdown(
+                        label="Pose Model", choices=list(pose_model.MODEL_DICT.keys()), value=pose_model.model_name
+                    )
+                det_score_threshold = gr.Slider(
+                    label="Box Score Threshold", minimum=0, maximum=1, step=0.05, value=0.5
+                )
                 with gr.Row():
+                    predict_button = gr.Button("Predict")
+                    pose_preds = gr.State()
             with gr.Column():
                 with gr.Row():
+                    pose_visualization = gr.Image(label="Result", type="numpy", elem_id="pose-result")
                 with gr.Row():
                     vis_kpt_score_threshold = gr.Slider(
+                        label="Visualization Score Threshold", minimum=0, maximum=1, step=0.05, value=0.3
+                    )
                 with gr.Row():
+                    vis_dot_radius = gr.Slider(label="Dot Radius", minimum=1, maximum=10, step=1, value=4)
                 with gr.Row():
+                    vis_line_thickness = gr.Slider(label="Line Thickness", minimum=1, maximum=10, step=1, value=2)
                 with gr.Row():
+                    redraw_pose_button = gr.Button("Redraw")
+    detector_name.change(fn=det_model.set_model, inputs=detector_name)
+    detect_button.click(
+        fn=det_model.run,
+        inputs=[
+            detector_name,
+            input_image,
+            vis_det_score_threshold,
+        ],
+        outputs=[
+            det_preds,
+            detection_visualization,
+        ],
+    )
+    redraw_det_button.click(
+        fn=det_model.visualize_detection_results,
+        inputs=[
+            input_image,
+            det_preds,
+            vis_det_score_threshold,
+        ],
+        outputs=detection_visualization,
+    )
+    pose_model_name.change(fn=pose_model.set_model, inputs=pose_model_name)
+    predict_button.click(
+        fn=pose_model.run,
+        inputs=[
+            pose_model_name,
+            input_image,
+            det_preds,
+            det_score_threshold,
+            vis_kpt_score_threshold,
+            vis_dot_radius,
+            vis_line_thickness,
+        ],
+        outputs=[
+            pose_preds,
+            pose_visualization,
+        ],
+    )
+    redraw_pose_button.click(
+        fn=pose_model.visualize_pose_results,
+        inputs=[
+            input_image,
+            pose_preds,
+            vis_kpt_score_threshold,
+            vis_dot_radius,
+            vis_line_thickness,
+        ],
+        outputs=pose_visualization,
+    )
+if __name__ == "__main__":
+    demo.queue(max_size=10).launch()

model.py CHANGED Viewed

@@ -6,15 +6,15 @@ import shlex
 import subprocess
 import sys
-if os.getenv('SYSTEM') == 'spaces':
     import mim
-    mim.uninstall('mmcv-full', confirm_yes=True)
-    mim.install('mmcv-full==1.5.0', is_yes=True)
-    subprocess.run(shlex.split('pip uninstall -y opencv-python'))
-    subprocess.run(shlex.split('pip uninstall -y opencv-python-headless'))
-    subprocess.run(shlex.split('pip install opencv-python-headless==4.8.0.74'))
 import huggingface_hub
 import numpy as np
@@ -22,47 +22,42 @@ import torch
 import torch.nn as nn
 app_dir = pathlib.Path(__file__).parent
-submodule_dir = app_dir / 'ViTPose'
 sys.path.insert(0, submodule_dir.as_posix())
 from mmdet.apis import inference_detector, init_detector
-from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
-                         process_mmdet_results, vis_pose_result)
 class DetModel:
     MODEL_DICT = {
-        'YOLOX-tiny': {
-            'config':
-            'mmdet_configs/configs/yolox/yolox_tiny_8x8_300e_coco.py',
-            'model':
-            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth',
         },
-        'YOLOX-s': {
-            'config':
-            'mmdet_configs/configs/yolox/yolox_s_8x8_300e_coco.py',
-            'model':
-            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth',
         },
-        'YOLOX-l': {
-            'config':
-            'mmdet_configs/configs/yolox/yolox_l_8x8_300e_coco.py',
-            'model':
-            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth',
         },
-        'YOLOX-x': {
-            'config':
-            'mmdet_configs/configs/yolox/yolox_x_8x8_300e_coco.py',
-            'model':
-            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth',
         },
     }
     def __init__(self):
-        self.device = torch.device(
-            'cuda:0' if torch.cuda.is_available() else 'cpu')
         self._load_all_models_once()
-        self.model_name = 'YOLOX-l'
         self.model = self._load_model(self.model_name)
     def _load_all_models_once(self) -> None:
@@ -71,7 +66,7 @@ class DetModel:
     def _load_model(self, name: str) -> nn.Module:
         d = self.MODEL_DICT[name]
-        return init_detector(d['config'], d['model'], device=self.device)
     def set_model(self, name: str) -> None:
         if name == self.model_name:
@@ -79,9 +74,7 @@ class DetModel:
         self.model_name = name
         self.model = self._load_model(name)
-    def detect_and_visualize(
-            self, image: np.ndarray,
-            score_threshold: float) -> tuple[list[np.ndarray], np.ndarray]:
         out = self.detect(image)
         vis = self.visualize_detection_results(image, out, score_threshold)
         return out, vis
@@ -92,57 +85,46 @@ class DetModel:
         return out
     def visualize_detection_results(
-            self,
-            image: np.ndarray,
-            detection_results: list[np.ndarray],
-            score_threshold: float = 0.3) -> np.ndarray:
         person_det = [detection_results[0]] + [np.array([]).reshape(0, 5)] * 79
         image = image[:, :, ::-1]  # RGB -> BGR
-        vis = self.model.show_result(image,
-                                     person_det,
-                                     score_thr=score_threshold,
-                                     bbox_color=None,
-                                     text_color=(200, 200, 200),
-                                     mask_color=None)
         return vis[:, :, ::-1]  # BGR -> RGB
 class AppDetModel(DetModel):
-    def run(self, model_name: str, image: np.ndarray,
-            score_threshold: float) -> tuple[list[np.ndarray], np.ndarray]:
         self.set_model(model_name)
         return self.detect_and_visualize(image, score_threshold)
 class PoseModel:
     MODEL_DICT = {
-        'ViTPose-B (single-task train)': {
-            'config':
-            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py',
-            'model': 'models/vitpose-b.pth',
         },
-        'ViTPose-L (single-task train)': {
-            'config':
-            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py',
-            'model': 'models/vitpose-l.pth',
         },
-        'ViTPose-B (multi-task train, COCO)': {
-            'config':
-            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py',
-            'model': 'models/vitpose-b-multi-coco.pth',
         },
-        'ViTPose-L (multi-task train, COCO)': {
-            'config':
-            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py',
-            'model': 'models/vitpose-l-multi-coco.pth',
         },
     }
     def __init__(self):
-        self.device = torch.device(
-            'cuda:0' if torch.cuda.is_available() else 'cpu')
-        self.model_name = 'ViTPose-B (multi-task train, COCO)'
         self.model = self._load_model(self.model_name)
     def _load_all_models_once(self) -> None:
@@ -151,9 +133,8 @@ class PoseModel:
     def _load_model(self, name: str) -> nn.Module:
         d = self.MODEL_DICT[name]
-        ckpt_path = huggingface_hub.hf_hub_download('public-data/ViTPose',
-                                                    d['model'])
-        model = init_pose_model(d['config'], ckpt_path, device=self.device)
         return model
     def set_model(self, name: str) -> None:
@@ -172,50 +153,51 @@ class PoseModel:
         vis_line_thickness: int,
     ) -> tuple[list[dict[str, np.ndarray]], np.ndarray]:
         out = self.predict_pose(image, det_results, box_score_threshold)
-        vis = self.visualize_pose_results(image, out, kpt_score_threshold,
-                                          vis_dot_radius, vis_line_thickness)
         return out, vis
     def predict_pose(
-            self,
-            image: np.ndarray,
-            det_results: list[np.ndarray],
-            box_score_threshold: float = 0.5) -> list[dict[str, np.ndarray]]:
         image = image[:, :, ::-1]  # RGB -> BGR
         person_results = process_mmdet_results(det_results, 1)
-        out, _ = inference_top_down_pose_model(self.model,
-                                               image,
-                                               person_results=person_results,
-                                               bbox_thr=box_score_threshold,
-                                               format='xyxy')
         return out
-    def visualize_pose_results(self,
-                               image: np.ndarray,
-                               pose_results: list[np.ndarray],
-                               kpt_score_threshold: float = 0.3,
-                               vis_dot_radius: int = 4,
-                               vis_line_thickness: int = 1) -> np.ndarray:
         image = image[:, :, ::-1]  # RGB -> BGR
-        vis = vis_pose_result(self.model,
-                              image,
-                              pose_results,
-                              kpt_score_thr=kpt_score_threshold,
-                              radius=vis_dot_radius,
-                              thickness=vis_line_thickness)
         return vis[:, :, ::-1]  # BGR -> RGB
 class AppPoseModel(PoseModel):
     def run(
-        self, model_name: str, image: np.ndarray,
-        det_results: list[np.ndarray], box_score_threshold: float,
-        kpt_score_threshold: float, vis_dot_radius: int,
-        vis_line_thickness: int
     ) -> tuple[list[dict[str, np.ndarray]], np.ndarray]:
         self.set_model(model_name)
-        return self.predict_pose_and_visualize(image, det_results,
-                                               box_score_threshold,
-                                               kpt_score_threshold,
-                                               vis_dot_radius,
-                                               vis_line_thickness)

 import subprocess
 import sys
+if os.getenv("SYSTEM") == "spaces":
     import mim
+    mim.uninstall("mmcv-full", confirm_yes=True)
+    mim.install("mmcv-full==1.5.0", is_yes=True)
+    subprocess.run(shlex.split("pip uninstall -y opencv-python"))
+    subprocess.run(shlex.split("pip uninstall -y opencv-python-headless"))
+    subprocess.run(shlex.split("pip install opencv-python-headless==4.8.0.74"))
 import huggingface_hub
 import numpy as np
 import torch.nn as nn
 app_dir = pathlib.Path(__file__).parent
+submodule_dir = app_dir / "ViTPose"
 sys.path.insert(0, submodule_dir.as_posix())
 from mmdet.apis import inference_detector, init_detector
+from mmpose.apis import (
+    inference_top_down_pose_model,
+    init_pose_model,
+    process_mmdet_results,
+    vis_pose_result,
+)
 class DetModel:
     MODEL_DICT = {
+        "YOLOX-tiny": {
+            "config": "mmdet_configs/configs/yolox/yolox_tiny_8x8_300e_coco.py",
+            "model": "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth",
         },
+        "YOLOX-s": {
+            "config": "mmdet_configs/configs/yolox/yolox_s_8x8_300e_coco.py",
+            "model": "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth",
         },
+        "YOLOX-l": {
+            "config": "mmdet_configs/configs/yolox/yolox_l_8x8_300e_coco.py",
+            "model": "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth",
         },
+        "YOLOX-x": {
+            "config": "mmdet_configs/configs/yolox/yolox_x_8x8_300e_coco.py",
+            "model": "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth",
         },
     }
     def __init__(self):
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         self._load_all_models_once()
+        self.model_name = "YOLOX-l"
         self.model = self._load_model(self.model_name)
     def _load_all_models_once(self) -> None:
     def _load_model(self, name: str) -> nn.Module:
         d = self.MODEL_DICT[name]
+        return init_detector(d["config"], d["model"], device=self.device)
     def set_model(self, name: str) -> None:
         if name == self.model_name:
         self.model_name = name
         self.model = self._load_model(name)
+    def detect_and_visualize(self, image: np.ndarray, score_threshold: float) -> tuple[list[np.ndarray], np.ndarray]:
         out = self.detect(image)
         vis = self.visualize_detection_results(image, out, score_threshold)
         return out, vis
         return out
     def visualize_detection_results(
+        self, image: np.ndarray, detection_results: list[np.ndarray], score_threshold: float = 0.3
+    ) -> np.ndarray:
         person_det = [detection_results[0]] + [np.array([]).reshape(0, 5)] * 79
         image = image[:, :, ::-1]  # RGB -> BGR
+        vis = self.model.show_result(
+            image, person_det, score_thr=score_threshold, bbox_color=None, text_color=(200, 200, 200), mask_color=None
+        )
         return vis[:, :, ::-1]  # BGR -> RGB
 class AppDetModel(DetModel):
+    def run(self, model_name: str, image: np.ndarray, score_threshold: float) -> tuple[list[np.ndarray], np.ndarray]:
         self.set_model(model_name)
         return self.detect_and_visualize(image, score_threshold)
 class PoseModel:
     MODEL_DICT = {
+        "ViTPose-B (single-task train)": {
+            "config": "ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py",
+            "model": "models/vitpose-b.pth",
         },
+        "ViTPose-L (single-task train)": {
+            "config": "ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py",
+            "model": "models/vitpose-l.pth",
         },
+        "ViTPose-B (multi-task train, COCO)": {
+            "config": "ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py",
+            "model": "models/vitpose-b-multi-coco.pth",
         },
+        "ViTPose-L (multi-task train, COCO)": {
+            "config": "ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py",
+            "model": "models/vitpose-l-multi-coco.pth",
         },
     }
     def __init__(self):
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.model_name = "ViTPose-B (multi-task train, COCO)"
         self.model = self._load_model(self.model_name)
     def _load_all_models_once(self) -> None:
     def _load_model(self, name: str) -> nn.Module:
         d = self.MODEL_DICT[name]
+        ckpt_path = huggingface_hub.hf_hub_download("public-data/ViTPose", d["model"])
+        model = init_pose_model(d["config"], ckpt_path, device=self.device)
         return model
     def set_model(self, name: str) -> None:
         vis_line_thickness: int,
     ) -> tuple[list[dict[str, np.ndarray]], np.ndarray]:
         out = self.predict_pose(image, det_results, box_score_threshold)
+        vis = self.visualize_pose_results(image, out, kpt_score_threshold, vis_dot_radius, vis_line_thickness)
         return out, vis
     def predict_pose(
+        self, image: np.ndarray, det_results: list[np.ndarray], box_score_threshold: float = 0.5
+    ) -> list[dict[str, np.ndarray]]:
         image = image[:, :, ::-1]  # RGB -> BGR
         person_results = process_mmdet_results(det_results, 1)
+        out, _ = inference_top_down_pose_model(
+            self.model, image, person_results=person_results, bbox_thr=box_score_threshold, format="xyxy"
+        )
         return out
+    def visualize_pose_results(
+        self,
+        image: np.ndarray,
+        pose_results: list[np.ndarray],
+        kpt_score_threshold: float = 0.3,
+        vis_dot_radius: int = 4,
+        vis_line_thickness: int = 1,
+    ) -> np.ndarray:
         image = image[:, :, ::-1]  # RGB -> BGR
+        vis = vis_pose_result(
+            self.model,
+            image,
+            pose_results,
+            kpt_score_thr=kpt_score_threshold,
+            radius=vis_dot_radius,
+            thickness=vis_line_thickness,
+        )
         return vis[:, :, ::-1]  # BGR -> RGB
 class AppPoseModel(PoseModel):
     def run(
+        self,
+        model_name: str,
+        image: np.ndarray,
+        det_results: list[np.ndarray],
+        box_score_threshold: float,
+        kpt_score_threshold: float,
+        vis_dot_radius: int,
+        vis_line_thickness: int,
     ) -> tuple[list[dict[str, np.ndarray]], np.ndarray]:
         self.set_model(model_name)
+        return self.predict_pose_and_visualize(
+            image, det_results, box_score_threshold, kpt_score_threshold, vis_dot_radius, vis_line_thickness
+        )

style.css CHANGED Viewed

@@ -1,5 +1,6 @@
 h1 {
   text-align: center;
 }
 div#det-result {
   max-width: 600px;
@@ -9,7 +10,3 @@ div#pose-result {
   max-width: 600px;
   max-height: 600px;
 }
-img#visitor-badge {
-  display: block;
-  margin: auto;
-}

 h1 {
   text-align: center;
+  display: block;
 }
 div#det-result {
   max-width: 600px;
   max-width: 600px;
   max-height: 600px;
 }