Spaces:

ttxskk
/

AiOS

Sleeping

App Files Files Community

AiOS / app.py

ttxskk

Update app.py

8fe6621 verified about 1 month ago

raw

history blame

5.72 kB

	import os
	import sys
	import subprocess
	import pkg_resources

	def is_package_installed(package_name):
	try:
	pkg_resources.get_distribution(package_name)
	return True
	except pkg_resources.DistributionNotFound:
	return False

	if is_package_installed("mmcv"):
	print("MMCV is installed.")
	else:
	print("MMCV is not installed. Build it from the source.")
	os.environ["MMCV_WITH_OPS"] = "1"
	os.environ["FORCE_MLU"] = "1"
	subprocess.run(["pip", "install", "-e", "./mmcv"], check=True)
	subprocess.run(["pip", "list"], check=True)

	if is_package_installed("pytorch3d"):
	print("pytorch3d is installed.")
	else:
	print("pytorch3d is not installed. Build it from the source.")
	subprocess.run(["pip", "install", "-e", "./pytorch3d"], check=True)

	if is_package_installed("MultiScaleDeformableAttention"):
	print("MultiScaleDeformableAttention is installed.")
	else:
	print("MultiScaleDeformableAttention is not installed. Build it from the source.")
	subprocess.run(["pip", "install", "-e", "./models/aios/ops"], check=True)

	import os.path as osp
	from pathlib import Path
	import cv2
	import gradio as gr
	import torch
	import math
	import spaces
	from huggingface_hub import hf_hub_download

	hf_hub_download(repo_id="ttxskk/AiOS", filename="aios_checkpoint.pth", local_dir="/home/user/app/pretrained_models")

	OUT_FOLDER = '/home/user/app/demo_out'
	os.makedirs(OUT_FOLDER, exist_ok=True)

	DEMO_CONFIG = '/home/user/app/config/aios_smplx_demo.py'
	MODEL_PATH = '/home/user/app/pretrained_models/aios_checkpoint.pth'
	@spaces.GPU(enable_queue=True, duration=300)
	def infer(video_input, batch_size, threshold=0.3, num_person=1):
	os.system(f'rm -rf {OUT_FOLDER}/*')
	os.system(f'torchrun --nproc_per_node 1 \
	main.py \
	-c {DEMO_CONFIG} \
	--options batch_size={batch_size} backbone="resnet50" num_person={num_person} threshold={threshold} \
	--resume {MODEL_PATH} \
	--eval \
	--inference \
	--inference_input {video_input} \
	--to_vid \
	--output_dir {OUT_FOLDER}')

	video_path = os.path.join(OUT_FOLDER, 'demo_vid.mp4')
	save_path_img = os.path.join(OUT_FOLDER, 'res_img')
	save_path_mesh = os.path.join(OUT_FOLDER, 'mesh')
	save_mesh_file = os.path.join(OUT_FOLDER, 'mesh.zip')
	os.system(f'zip -r {save_mesh_file} {save_path_mesh}')
	yield video_path, save_mesh_file

	TITLE = """

	<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
	<div>
	<h1 align="center">AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation</h1>
	</div>
	</div>

	<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
	<div style="display:flex; gap: 0.25rem;" align="center">
	<a href="https://ttxskk.github.io/AiOS/" target="_blank"><img src='https://img.shields.io/badge/Project-Page-Green'></a>
	<a href="https://github.com/ttxskk/AiOS" target="_blank"><img src='https://img.shields.io/badge/Github-Code-blue'></a>
	<a href="https://ttxskk.github.io/AiOS/assets/aios_cvpr24.pdf" target="_blank"><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a>
	</div>
	</div>
	<div style="font-size: 1.1rem; color: #555; max-width: 800px; margin: 1rem auto; line-height: 1.5; justify-content: center; align-items: center; text-align: center;">
	<div>
	<p>Recover multiple expressive human pose and shape from an RGB image without any additional requirements, such as an off-the-shelf detection model.</h1>
	</div>
	</div>
	"""
	VIDEO = '''
	<center>
	<iframe width="960" height="540"
	src="https://www.youtube.com/embed/yzCL7TYpzvc?si=EoxWNE6VPBxsy7Go"
	title="AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation"
	frameborder="0"
	allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
	allowfullscreen>
	</iframe>
	</center><br/>
	'''

	DESCRIPTION = '''
	<p>
	Note: Score threshold defines the minimum confidence level for person detection. The default value is 0.3.
	If the confidence score of a detected person falls below this score threshold, the detection will be discarded.
	</p>
	'''
	with gr.Blocks(title="AiOS", theme=gr.themes.Soft(primary_hue="blue", secondary_hue="gray")) as demo:

	gr.Markdown(TITLE)
	gr.HTML(VIDEO)
	gr.Markdown(DESCRIPTION)
	with gr.Row():
	with gr.Column(scale=2):
	video_input = gr.Video(label="Input video", elem_classes="video")
	with gr.Column(scale=1):
	batch_size = gr.Textbox(label="Batch Size", type="text", value=16)
	num_person = gr.Textbox(label="Number of Person", type="text", value=1)
	threshold = gr.Slider(0, 1.0, value=0.3, label='Score Threshold')
	send_button = gr.Button("Infer")
	gr.HTML("""<br/>""")

	with gr.Row():
	with gr.Column():
	# processed_frames = gr.Image(label="Last processed frame")
	video_output = gr.Video(elem_classes="video")
	with gr.Column():
	meshes_output = gr.File(label="3D meshes")

	send_button.click(fn=infer, inputs=[video_input, batch_size, threshold, num_person], outputs=[video_output, meshes_output])
	# example_videos = gr.Examples([
	# ['./assets/01.mp4'],
	# ['./assets/02.mp4'],
	# ['./assets/03.mp4'],
	# ['./assets/04.mp4'],
	# ['./assets/05.mp4'],
	# ['./assets/06.mp4'],
	# ['./assets/07.mp4'],
	# ['./assets/08.mp4'],
	# ['./assets/09.mp4'],
	# ],
	# inputs=[video_input, 0.5])

	demo.queue().launch(debug=True)