import os import sys import subprocess import pkg_resources def is_package_installed(package_name): try: pkg_resources.get_distribution(package_name) return True except pkg_resources.DistributionNotFound: return False if is_package_installed("mmcv"): print("MMCV is installed.") else: print("MMCV is not installed. Build it from the source.") os.environ["MMCV_WITH_OPS"] = "1" os.environ["FORCE_MLU"] = "1" subprocess.run(["pip", "install", "-e", "./mmcv"], check=True) subprocess.run(["pip", "list"], check=True) if is_package_installed("pytorch3d"): print("pytorch3d is installed.") else: print("pytorch3d is not installed. Build it from the source.") subprocess.run(["pip", "install", "-e", "./pytorch3d"], check=True) if is_package_installed("MultiScaleDeformableAttention"): print("MultiScaleDeformableAttention is installed.") else: print("MultiScaleDeformableAttention is not installed. Build it from the source.") subprocess.run(["pip", "install", "-e", "./models/aios/ops"], check=True) import os.path as osp from pathlib import Path import cv2 import gradio as gr import torch import math import spaces from huggingface_hub import hf_hub_download hf_hub_download(repo_id="ttxskk/AiOS", filename="aios_checkpoint.pth", local_dir="/home/user/app/pretrained_models") OUT_FOLDER = '/home/user/app/demo_out' os.makedirs(OUT_FOLDER, exist_ok=True) DEMO_CONFIG = '/home/user/app/config/aios_smplx_demo.py' MODEL_PATH = '/home/user/app/pretrained_models/aios_checkpoint.pth' @spaces.GPU(enable_queue=True, duration=300) def infer(video_input, batch_size, threshold=0.3, num_person=1): os.system(f'rm -rf {OUT_FOLDER}/*') os.system(f'torchrun --nproc_per_node 1 \ main.py \ -c {DEMO_CONFIG} \ --options batch_size={batch_size} backbone="resnet50" num_person={num_person} threshold={threshold} \ --resume {MODEL_PATH} \ --eval \ --inference \ --inference_input {video_input} \ --to_vid \ --output_dir {OUT_FOLDER}') video_path = os.path.join(OUT_FOLDER, 'demo_vid.mp4') save_path_img = os.path.join(OUT_FOLDER, 'res_img') save_path_mesh = os.path.join(OUT_FOLDER, 'mesh') save_mesh_file = os.path.join(OUT_FOLDER, 'mesh.zip') os.system(f'zip -r {save_mesh_file} {save_path_mesh}') yield video_path, save_mesh_file TITLE = """

AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation

Recover multiple expressive human pose and shape from an RGB image without any additional requirements, such as an off-the-shelf detection model.

""" VIDEO = '''

''' DESCRIPTION = '''

Note: Score threshold defines the minimum confidence level for person detection. The default value is 0.3. If the confidence score of a detected person falls below this score threshold, the detection will be discarded.

''' with gr.Blocks(title="AiOS", theme=gr.themes.Soft(primary_hue="blue", secondary_hue="gray")) as demo: gr.Markdown(TITLE) gr.HTML(VIDEO) gr.Markdown(DESCRIPTION) with gr.Row(): with gr.Column(scale=2): video_input = gr.Video(label="Input video", elem_classes="video") with gr.Column(scale=1): batch_size = gr.Textbox(label="Batch Size", type="text", value=16) num_person = gr.Textbox(label="Number of Person", type="text", value=1) threshold = gr.Slider(0, 1.0, value=0.3, label='Score Threshold') send_button = gr.Button("Infer") gr.HTML("""
""") with gr.Row(): with gr.Column(): # processed_frames = gr.Image(label="Last processed frame") video_output = gr.Video(elem_classes="video") with gr.Column(): meshes_output = gr.File(label="3D meshes") send_button.click(fn=infer, inputs=[video_input, batch_size, threshold, num_person], outputs=[video_output, meshes_output]) # example_videos = gr.Examples([ # ['./assets/01.mp4'], # ['./assets/02.mp4'], # ['./assets/03.mp4'], # ['./assets/04.mp4'], # ['./assets/05.mp4'], # ['./assets/06.mp4'], # ['./assets/07.mp4'], # ['./assets/08.mp4'], # ['./assets/09.mp4'], # ], # inputs=[video_input, 0.5]) demo.queue().launch(debug=True)