Spaces:

hysts
/

ViTPose_video

Running

App Files Files Community

hysts HF staff commited on Feb 26, 2023

Commit

09e24f9

•

1 Parent(s): d9aa325

Update

Browse files

Files changed (5) hide show

.pre-commit-config.yaml +2 -12
README.md +1 -1
app.py +94 -128
model.py +19 -23
requirements.txt +1 -1

.pre-commit-config.yaml CHANGED Viewed

@@ -21,11 +21,11 @@ repos:
   - id: docformatter
     args: ['--in-place']
 - repo: https://github.com/pycqa/isort
-  rev: 5.10.1
   hooks:
     - id: isort
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v0.812
   hooks:
     - id: mypy
       args: ['--ignore-missing-imports']
@@ -34,13 +34,3 @@ repos:
   hooks:
   - id: yapf
     args: ['--parallel', '--in-place']
-- repo: https://github.com/kynan/nbstripout
-  rev: 0.5.0
-  hooks:
-    - id: nbstripout
-      args: ['--extra-keys', 'metadata.interpreter metadata.kernelspec cell.metadata.pycharm']
-- repo: https://github.com/nbQA-dev/nbQA
-  rev: 1.3.1
-  hooks:
-    - id: nbqa-isort
-    - id: nbqa-yapf

   - id: docformatter
     args: ['--in-place']
 - repo: https://github.com/pycqa/isort
+  rev: 5.12.0
   hooks:
     - id: isort
 - repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v0.991
   hooks:
     - id: mypy
       args: ['--ignore-missing-imports']
   hooks:
   - id: yapf
     args: ['--parallel', '--in-place']

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🦀
 colorFrom: gray
 colorTo: purple
 sdk: gradio
-sdk_version: 3.0.15
 app_file: app.py
 pinned: false
 ---

 colorFrom: gray
 colorTo: purple
 sdk: gradio
+sdk_version: 3.19.1
 app_file: app.py
 pinned: false
 ---

app.py CHANGED Viewed

@@ -2,7 +2,6 @@
 from __future__ import annotations
-import argparse
 import pathlib
 import tarfile
@@ -15,21 +14,7 @@ DESCRIPTION = '''# ViTPose
 This is an unofficial demo for [https://github.com/ViTAE-Transformer/ViTPose](https://github.com/ViTAE-Transformer/ViTPose).
 Related app: [https://huggingface.co/spaces/Gradio-Blocks/ViTPose](https://huggingface.co/spaces/Gradio-Blocks/ViTPose)
 '''
-FOOTER = '<img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=hysts.vitpose_video" />'
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--device', type=str, default='cpu')
-    parser.add_argument('--theme', type=str)
-    parser.add_argument('--share', action='store_true')
-    parser.add_argument('--port', type=int)
-    parser.add_argument('--disable-queue',
-                        dest='enable_queue',
-                        action='store_false')
-    return parser.parse_args()
 def set_example_video(example: list) -> dict:
@@ -43,116 +28,97 @@ def extract_tar() -> None:
         f.extractall('mmdet_configs')
-def main():
-    args = parse_args()
-    extract_tar()
-    model = AppModel(device=args.device)
-    with gr.Blocks(theme=args.theme, css='style.css') as demo:
-        gr.Markdown(DESCRIPTION)
-        with gr.Row():
-            with gr.Column():
-                input_video = gr.Video(label='Input Video',
-                                       format='mp4',
-                                       elem_id='input_video')
-                with gr.Group():
-                    detector_name = gr.Dropdown(
-                        list(model.det_model.MODEL_DICT.keys()),
-                        value=model.det_model.model_name,
-                        label='Detector')
-                    pose_model_name = gr.Dropdown(
-                        list(model.pose_model.MODEL_DICT.keys()),
-                        value=model.pose_model.model_name,
-                        label='Pose Model')
-                    det_score_threshold = gr.Slider(
-                        0,
-                        1,
-                        step=0.05,
-                        value=0.5,
-                        label='Box Score Threshold')
-                    max_num_frames = gr.Slider(
-                        1,
-                        300,
-                        step=1,
-                        value=60,
-                        label='Maximum Number of Frames')
-                    predict_button = gr.Button(value='Predict')
-                    pose_preds = gr.Variable()
-                    paths = sorted(pathlib.Path('videos').rglob('*.mp4'))
-                    example_videos = gr.Dataset(components=[input_video],
-                                                samples=[[path.as_posix()]
-                                                         for path in paths])
-            with gr.Column():
-                with gr.Group():
-                    result = gr.Video(label='Result',
-                                      format='mp4',
-                                      elem_id='result')
-                    vis_kpt_score_threshold = gr.Slider(
-                        0,
-                        1,
-                        step=0.05,
-                        value=0.3,
-                        label='Visualization Score Threshold')
-                    vis_dot_radius = gr.Slider(1,
-                                               10,
-                                               step=1,
-                                               value=4,
-                                               label='Dot Radius')
-                    vis_line_thickness = gr.Slider(1,
-                                                   10,
-                                                   step=1,
-                                                   value=2,
-                                                   label='Line Thickness')
-                    redraw_button = gr.Button(value='Redraw')
-        gr.Markdown(FOOTER)
-        detector_name.change(fn=model.det_model.set_model,
-                             inputs=detector_name,
-                             outputs=None)
-        pose_model_name.change(fn=model.pose_model.set_model,
-                               inputs=pose_model_name,
-                               outputs=None)
-        predict_button.click(fn=model.run,
-                             inputs=[
-                                 input_video,
-                                 detector_name,
-                                 pose_model_name,
-                                 det_score_threshold,
-                                 max_num_frames,
-                                 vis_kpt_score_threshold,
-                                 vis_dot_radius,
-                                 vis_line_thickness,
-                             ],
-                             outputs=[
-                                 result,
-                                 pose_preds,
-                             ])
-        redraw_button.click(fn=model.visualize_pose_results,
-                            inputs=[
-                                input_video,
-                                pose_preds,
-                                vis_kpt_score_threshold,
-                                vis_dot_radius,
-                                vis_line_thickness,
-                            ],
-                            outputs=result)
-        example_videos.click(fn=set_example_video,
-                             inputs=example_videos,
-                             outputs=input_video)
-    demo.launch(
-        enable_queue=args.enable_queue,
-        server_port=args.port,
-        share=args.share,
-    )
-if __name__ == '__main__':
-    main()

 from __future__ import annotations
 import pathlib
 import tarfile
 This is an unofficial demo for [https://github.com/ViTAE-Transformer/ViTPose](https://github.com/ViTAE-Transformer/ViTPose).
 Related app: [https://huggingface.co/spaces/Gradio-Blocks/ViTPose](https://huggingface.co/spaces/Gradio-Blocks/ViTPose)
 '''
 def set_example_video(example: list) -> dict:
         f.extractall('mmdet_configs')
+extract_tar()
+model = AppModel()
+with gr.Blocks(css='style.css') as demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        with gr.Column():
+            input_video = gr.Video(label='Input Video',
+                                   format='mp4',
+                                   elem_id='input_video')
+            detector_name = gr.Dropdown(list(
+                model.det_model.MODEL_DICT.keys()),
+                                        value=model.det_model.model_name,
+                                        label='Detector')
+            pose_model_name = gr.Dropdown(list(
+                model.pose_model.MODEL_DICT.keys()),
+                                          value=model.pose_model.model_name,
+                                          label='Pose Model')
+            det_score_threshold = gr.Slider(0,
+                                            1,
+                                            step=0.05,
+                                            value=0.5,
+                                            label='Box Score Threshold')
+            max_num_frames = gr.Slider(1,
+                                       300,
+                                       step=1,
+                                       value=60,
+                                       label='Maximum Number of Frames')
+            predict_button = gr.Button(value='Predict')
+            pose_preds = gr.Variable()
+            paths = sorted(pathlib.Path('videos').rglob('*.mp4'))
+            example_videos = gr.Dataset(components=[input_video],
+                                        samples=[[path.as_posix()]
+                                                 for path in paths])
+        with gr.Column():
+            result = gr.Video(label='Result', format='mp4', elem_id='result')
+            vis_kpt_score_threshold = gr.Slider(
+                0,
+                1,
+                step=0.05,
+                value=0.3,
+                label='Visualization Score Threshold')
+            vis_dot_radius = gr.Slider(1,
+                                       10,
+                                       step=1,
+                                       value=4,
+                                       label='Dot Radius')
+            vis_line_thickness = gr.Slider(1,
+                                           10,
+                                           step=1,
+                                           value=2,
+                                           label='Line Thickness')
+            redraw_button = gr.Button(value='Redraw')
+    detector_name.change(fn=model.det_model.set_model,
+                         inputs=detector_name,
+                         outputs=None)
+    pose_model_name.change(fn=model.pose_model.set_model,
+                           inputs=pose_model_name,
+                           outputs=None)
+    predict_button.click(fn=model.run,
+                         inputs=[
+                             input_video,
+                             detector_name,
+                             pose_model_name,
+                             det_score_threshold,
+                             max_num_frames,
+                             vis_kpt_score_threshold,
+                             vis_dot_radius,
+                             vis_line_thickness,
+                         ],
+                         outputs=[
+                             result,
+                             pose_preds,
+                         ])
+    redraw_button.click(fn=model.visualize_pose_results,
+                        inputs=[
+                            input_video,
+                            pose_preds,
+                            vis_kpt_score_threshold,
+                            vis_dot_radius,
+                            vis_line_thickness,
+                        ],
+                        outputs=result)
+    example_videos.click(fn=set_example_video,
+                         inputs=example_videos,
+                         outputs=input_video)
+demo.queue().launch(show_api=False)

model.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import os
 import subprocess
 import sys
 import tempfile
@@ -11,9 +12,10 @@ if os.getenv('SYSTEM') == 'spaces':
     mim.uninstall('mmcv-full', confirm_yes=True)
     mim.install('mmcv-full==1.5.0', is_yes=True)
-    subprocess.call('pip uninstall -y opencv-python'.split())
-    subprocess.call('pip uninstall -y opencv-python-headless'.split())
-    subprocess.call('pip install opencv-python-headless==4.5.5.64'.split())
 import cv2
 import huggingface_hub
@@ -27,7 +29,7 @@ from mmdet.apis import inference_detector, init_detector
 from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
                          process_mmdet_results, vis_pose_result)
-HF_TOKEN = os.environ['HF_TOKEN']
 class DetModel:
@@ -58,8 +60,9 @@ class DetModel:
         },
     }
-    def __init__(self, device: str | torch.device):
-        self.device = torch.device(device)
         self._load_all_models_once()
         self.model_name = 'YOLOX-l'
         self.model = self._load_model(self.model_name)
@@ -131,8 +134,9 @@ class PoseModel:
         },
     }
-    def __init__(self, device: str | torch.device):
-        self.device = torch.device(device)
         self.model_name = 'ViTPose-B (multi-task train, COCO)'
         self.model = self._load_model(self.model_name)
@@ -199,9 +203,9 @@ class PoseModel:
 class AppModel:
-    def __init__(self, device: str | torch.device):
-        self.det_model = DetModel(device)
-        self.pose_model = PoseModel(device)
     def run(
         self, video_path: str, det_model_name: str, pose_model_name: str,
@@ -222,8 +226,8 @@ class AppModel:
         preds_all = []
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        temp_file = tempfile.NamedTemporaryFile(suffix='.mp4')
-        writer = cv2.VideoWriter(temp_file.name, fourcc, fps, (width, height))
         for _ in range(max_num_frames):
             ok, frame = cap.read()
             if not ok:
@@ -238,10 +242,6 @@ class AppModel:
         cap.release()
         writer.release()
-        out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
-        subprocess.run(
-            f'ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}'
-            .split())
         return out_file.name, preds_all
     def visualize_pose_results(self, video_path: str,
@@ -257,8 +257,8 @@ class AppModel:
         fps = cap.get(cv2.CAP_PROP_FPS)
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        temp_file = tempfile.NamedTemporaryFile(suffix='.mp4')
-        writer = cv2.VideoWriter(temp_file.name, fourcc, fps, (width, height))
         for pose_preds in pose_preds_all:
             ok, frame = cap.read()
             if not ok:
@@ -271,8 +271,4 @@ class AppModel:
         cap.release()
         writer.release()
-        out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
-        subprocess.run(
-            f'ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}'
-            .split())
         return out_file.name

 from __future__ import annotations
 import os
+import shlex
 import subprocess
 import sys
 import tempfile
     mim.uninstall('mmcv-full', confirm_yes=True)
     mim.install('mmcv-full==1.5.0', is_yes=True)
+    subprocess.call(shlex.split('pip uninstall -y opencv-python'))
+    subprocess.call(shlex.split('pip uninstall -y opencv-python-headless'))
+    subprocess.call(
+        shlex.split('pip install opencv-python-headless==4.5.5.64'))
 import cv2
 import huggingface_hub
 from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
                          process_mmdet_results, vis_pose_result)
+HF_TOKEN = os.getenv('HF_TOKEN')
 class DetModel:
         },
     }
+    def __init__(self):
+        self.device = torch.device(
+            'cuda:0' if torch.cuda.is_available() else 'cpu')
         self._load_all_models_once()
         self.model_name = 'YOLOX-l'
         self.model = self._load_model(self.model_name)
         },
     }
+    def __init__(self):
+        self.device = torch.device(
+            'cuda:0' if torch.cuda.is_available() else 'cpu')
         self.model_name = 'ViTPose-B (multi-task train, COCO)'
         self.model = self._load_model(self.model_name)
 class AppModel:
+    def __init__(self):
+        self.det_model = DetModel()
+        self.pose_model = PoseModel()
     def run(
         self, video_path: str, det_model_name: str, pose_model_name: str,
         preds_all = []
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
+        writer = cv2.VideoWriter(out_file.name, fourcc, fps, (width, height))
         for _ in range(max_num_frames):
             ok, frame = cap.read()
             if not ok:
         cap.release()
         writer.release()
         return out_file.name, preds_all
     def visualize_pose_results(self, video_path: str,
         fps = cap.get(cv2.CAP_PROP_FPS)
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
+        writer = cv2.VideoWriter(out_file.name, fourcc, fps, (width, height))
         for pose_preds in pose_preds_all:
             ok, frame = cap.read()
             if not ok:
         cap.release()
         writer.release()
         return out_file.name

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
 mmcv-full==1.5.0
 mmdet==2.24.1
 mmpose==0.25.1
-numpy==1.22.4
 opencv-python-headless==4.5.5.64
 openmim==0.1.5
 timm==0.5.4

 mmcv-full==1.5.0
 mmdet==2.24.1
 mmpose==0.25.1
+numpy==1.23.5
 opencv-python-headless==4.5.5.64
 openmim==0.1.5
 timm==0.5.4