Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| import gradio as gr | |
| import subprocess | |
| import urllib.request | |
| import os | |
| import json | |
| # 1. Modern Tasks API | |
| import mediapipe as mp | |
| from mediapipe.tasks import python | |
| from mediapipe.tasks.python import vision | |
| # Auto-Download Model | |
| MODEL_PATH = "pose_landmarker_lite.task" | |
| MODEL_URL = "https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_lite/float16/1/pose_landmarker_lite.task" | |
| if not os.path.exists(MODEL_PATH): | |
| print("Downloading MediaPipe Pose Model...") | |
| urllib.request.urlretrieve(MODEL_URL, MODEL_PATH) | |
| POSE_CONNECTIONS = [ | |
| (0, 1), (1, 2), (2, 3), (3, 7), (0, 4), (4, 5), (5, 6), (6, 8), (9, 10), | |
| (11, 12), (11, 13), (13, 15), (15, 17), (15, 19), (15, 21), (17, 19), | |
| (12, 14), (14, 16), (16, 18), (16, 20), (16, 22), (18, 20), (11, 23), | |
| (12, 24), (23, 24), (23, 25), (24, 26), (25, 27), (26, 28), (27, 29), | |
| (28, 30), (29, 31), (30, 32), (27, 31), (28, 32) | |
| ] | |
| def extract_pose_and_data(video_path): | |
| if video_path is None: | |
| return None, None, None | |
| output_video_path = "final_output.mp4" | |
| temp_video = "temp_silent.mp4" | |
| output_json_path = "pose_data.json" | |
| cap = cv2.VideoCapture(video_path) | |
| width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
| height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| fps = cap.get(cv2.CAP_PROP_FPS) | |
| fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
| out = cv2.VideoWriter(temp_video, fourcc, fps, (width, height)) | |
| base_options = python.BaseOptions(model_asset_path=MODEL_PATH) | |
| options = vision.PoseLandmarkerOptions( | |
| base_options=base_options, | |
| running_mode=vision.RunningMode.VIDEO | |
| ) | |
| # Storage for Blender Data | |
| all_frames_data = [] | |
| with vision.PoseLandmarker.create_from_options(options) as landmarker: | |
| frame_idx = 0 | |
| while cap.isOpened(): | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame) | |
| timestamp_ms = int((frame_idx / fps) * 1000) | |
| result = landmarker.detect_for_video(mp_image, timestamp_ms) | |
| canvas = np.zeros((height, width, 3), dtype=np.uint8) | |
| frame_entry = { | |
| "frame": frame_idx, | |
| "timestamp_ms": timestamp_ms, | |
| "landmarks": [] | |
| } | |
| if result.pose_landmarks and result.pose_world_landmarks: | |
| # 1. Extract 3D World Data for JSON (For Blender) | |
| for landmark in result.pose_world_landmarks[0]: | |
| frame_entry["landmarks"].append({ | |
| "x": landmark.x, | |
| "y": landmark.y, | |
| "z": landmark.z, | |
| "visibility": landmark.visibility | |
| }) | |
| # 2. Draw 2D Data for Video (For EbSynth) | |
| pose = result.pose_landmarks[0] | |
| for connection in POSE_CONNECTIONS: | |
| start_idx, end_idx = connection | |
| start_pt, end_pt = pose[start_idx], pose[end_idx] | |
| start_px = (int(start_pt.x * width), int(start_pt.y * height)) | |
| end_px = (int(end_pt.x * width), int(end_pt.y * height)) | |
| cv2.line(canvas, start_px, end_px, (0, 255, 0), 10) | |
| for landmark in pose: | |
| px = (int(landmark.x * width), int(landmark.y * height)) | |
| cv2.circle(canvas, px, 15, (255, 255, 255), -1) | |
| all_frames_data.append(frame_entry) | |
| out.write(canvas) | |
| frame_idx += 1 | |
| cap.release() | |
| out.release() | |
| # Save the JSON file | |
| with open(output_json_path, 'w') as f: | |
| json.dump(all_frames_data, f, indent=4) | |
| # Merge Audio Native FFmpeg | |
| try: | |
| command = [ | |
| "ffmpeg", "-y", "-i", temp_video, "-i", video_path, | |
| "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0?", | |
| "-shortest", output_video_path | |
| ] | |
| subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| except Exception as e: | |
| print("FFmpeg error:", e) | |
| output_video_path = temp_video | |
| # Return: Video File, JSON File (for download), JSON Dictionary (for UI Copying) | |
| return output_video_path, output_json_path, all_frames_data | |
| # Gradio UI Setup | |
| with gr.Blocks(title="Pose & 3D Data Extractor") as interface: | |
| gr.Markdown("# 🕺 Pose Video & 3D JSON Extractor") | |
| gr.Markdown("Generates a thick stickman for EbSynth and extracts `pose_world_landmarks` (x, y, z) for Blender IK.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| video_input = gr.Video(label="Upload Dancing Clip (15-30s)") | |
| submit_btn = gr.Button("Extract Pose & Data", variant="primary") | |
| with gr.Column(): | |
| video_output = gr.Video(label="Meaty Stickman Output") | |
| file_output = gr.File(label="Download 3D JSON Data") | |
| with gr.Row(): | |
| # The gr.JSON component automatically includes a "Copy" button in the top right | |
| json_output = gr.JSON(label="Raw JSON Data (Click top right to Copy)") | |
| submit_btn.click( | |
| fn=extract_pose_and_data, | |
| inputs=video_input, | |
| outputs=[video_output, file_output, json_output] | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() |