Spaces:

roll-ai
/

EPiC-fps

Paused

File size: 11,458 Bytes

# import os
# import subprocess
# from datetime import datetime
# from pathlib import Path
# import gradio as gr

# # -----------------------------
# # Setup paths and env
# # -----------------------------
# HF_HOME = "/app/hf_cache"
# os.environ["HF_HOME"] = HF_HOME
# os.environ["TRANSFORMERS_CACHE"] = HF_HOME
# os.makedirs(HF_HOME, exist_ok=True)

# PRETRAINED_DIR = "/app/pretrained"
# os.makedirs(PRETRAINED_DIR, exist_ok=True)


# # -----------------------------
# # Step 1: Optional Model Download
# # -----------------------------
# def download_models():
#     expected_model = os.path.join(PRETRAINED_DIR, "RAFT/raft-things.pth")
#     if not Path(expected_model).exists():
#         print("⚙️ Downloading pretrained models...")
#         try:
#             subprocess.check_call(["bash", "download/download_models.sh"])
#             print("✅ Models downloaded.")
#         except subprocess.CalledProcessError as e:
#             print(f"❌ Model download failed: {e}")
#     else:
#         print("✅ Pretrained models already exist.")


# download_models()


# # -----------------------------
# # Step 2: Inference Logic
# # -----------------------------

# def run_epic_inference(video_path, caption, motion_type):
#     temp_input_path = "/app/temp_input.mp4"
#     output_dir = f"/app/output_anchor"
#     video_output_path = f"{output_dir}/masked_videos/output.mp4"
#     traj_name = motion_type
#     traj_txt = f"/app/inference/v2v_data/test/trajs/{traj_name}.txt"
#     # Save uploaded video
#     if video_path:
#         os.system(f"cp '{video_path}' {temp_input_path}")

#     command = [
#     "python", "/app/inference/v2v_data/inference.py",
#     "--video_path", temp_input_path,
#     "--stride", "1",
#     "--out_dir", output_dir,
#     "--radius_scale", "1",
#     "--camera", "target",
#     "--mask", 
#     "--target_pose", "0", "30", "-0.6", "0", "0",
#     "--traj_txt", traj_txt,
#     "--save_name", "output",
#     "--mode", "gradual",
#     ]

#     # Run inference command
#     try:
#         result = subprocess.run(command, capture_output=True, text=True, check=True)
#         print("Getting Anchor Videos run successfully.")
#         logs = result.stdout
#     except subprocess.CalledProcessError as e:
#         logs = f"❌ Inference failed:\n{e.stderr}"
#         return logs, None

#     # Locate the output video
#     if video_output_path:
#         return logs, str(video_output_path)
#     else:
#         return f"Inference succeeded but no output video found in {output_dir}", None
# def print_output_directory(out_dir): 
#     result = ""
#     for root, dirs, files in os.walk(out_dir):
#         level = root.replace(out_dir, '').count(os.sep)
#         indent = ' ' * 4 * level
#         result += f"{indent}{os.path.basename(root)}/"
#         sub_indent = ' ' * 4 * (level + 1)
#         for f in files:
#             result += f"{sub_indent}{f}\n"
#     return result

# def inference(video_path, caption, motion_type):
#     logs, video_masked = run_epic_inference(video_path, caption, motion_type)

#     MODEL_PATH="/app/pretrained/CogVideoX-5b-I2V"

#     ckpt_steps=500
#     ckpt_dir="/app/out/EPiC_pretrained"
#     ckpt_file=f"checkpoint-{ckpt_steps}.pt"
#     ckpt_path=f"{ckpt_dir}/{ckpt_file}"

#     video_root_dir= f"/app/output_anchor"
#     out_dir=f"/app/output"

#     command = [
#         "python", "/app/inference/cli_demo_camera_i2v_pcd.py",
#         "--video_root_dir", video_root_dir,
#         "--base_model_path", MODEL_PATH,
#         "--controlnet_model_path", ckpt_path,
#         "--output_path", out_dir,
#         "--start_camera_idx", "0",
#         "--end_camera_idx", "8",
#         "--controlnet_weights", "1.0",
#         "--controlnet_guidance_start", "0.0",
#         "--controlnet_guidance_end", "0.4",
#         "--controlnet_input_channels", "3",
#         "--controlnet_transformer_num_attn_heads", "4",
#         "--controlnet_transformer_attention_head_dim", "64",
#         "--controlnet_transformer_out_proj_dim_factor", "64",
#         "--controlnet_transformer_out_proj_dim_zero_init",
#         "--vae_channels", "16",
#         "--num_frames", "49",
#         "--controlnet_transformer_num_layers", "8",
#         "--infer_with_mask",
#         "--pool_style", "max",
#         "--seed", "43"
#     ]

#     # Run the command
#     result = subprocess.run(command, capture_output=True, text=True)
#     if result.returncode == 0:
#         print("Inference completed successfully.")
#     else:
#         print(f"Error occurred during inference: {result.stderr}")

#     # Print output directory contents
#     logs = result.stdout
#     result = print_output_directory(out_dir)

#     return logs+result, str(f"{out_dir}/00000_43_out.mp4")

# # output 43
# # output/    00000_43_out.mp4
# #     00000_43_reference.mp4
# #     00000_43_out_reference.mp4

# # -----------------------------
# # Step 3: Create Gradio UI
# # -----------------------------
# demo = gr.Interface(
#     fn=inference,
#     inputs=[
#         gr.Video(label="Upload Video (MP4)"),
#         gr.Textbox(label="Caption", placeholder="e.g., Amalfi coast with boats"),
#         gr.Dropdown(
#             choices=["zoom_in", "rotate", "orbit", "pan", "loop1"],
#             label="Camera Motion Type",
#             value="zoom_in",
#         ),
#     ],
#     outputs=[gr.Textbox(label="Inference Logs"), gr.Video(label="Generated Video")],
#     title="🎬 EPiC: Efficient Video Camera Control",
#     description="Upload a video, describe the scene, and apply cinematic camera motion using pretrained EPiC models.",
# )

# # -----------------------------
# # Step 4: Launch App
# # -----------------------------
# if __name__ == "__main__":
#     demo.launch(server_name="0.0.0.0", server_port=7860)


import os
import subprocess
from datetime import datetime
from pathlib import Path
import gradio as gr

# -----------------------------
# Setup paths and env
# -----------------------------
HF_HOME = "/app/hf_cache"
os.environ["HF_HOME"] = HF_HOME
os.environ["TRANSFORMERS_CACHE"] = HF_HOME
os.makedirs(HF_HOME, exist_ok=True)

PRETRAINED_DIR = "/app/pretrained"
os.makedirs(PRETRAINED_DIR, exist_ok=True)

# -----------------------------
# Step 1: Optional Model Download
# -----------------------------
def download_models():
    expected_model = os.path.join(PRETRAINED_DIR, "RAFT/raft-things.pth")
    if not Path(expected_model).exists():
        print("⚙️ Downloading pretrained models...")
        try:
            subprocess.check_call(["bash", "download/download_models.sh"])
            print("✅ Models downloaded.")
        except subprocess.CalledProcessError as e:
            print(f"❌ Model download failed: {e}")
    else:
        print("✅ Pretrained models already exist.")

download_models()

# -----------------------------
# Step 2: Inference Logic
# -----------------------------
def run_epic_inference(video_path, num_frames, target_pose, mode):
    temp_input_path = "/app/temp_input.mp4"
    output_dir = "/app/output_anchor"
    video_output_path = f"{output_dir}/masked_videos/output.mp4"

    # Save uploaded video
    if video_path:
        os.system(f"cp '{video_path}' {temp_input_path}")

    try:
        theta, phi, r, x, y = target_pose.strip().split()
    except ValueError:
        return f"❌ Invalid target pose format. Use: θ φ r x y", None
    logs =  f"Running inference with target pose: θ={theta}, φ={phi}, r={r}, x={x}, y={y}\n"
    command = [
        "python", "/app/inference/v2v_data/inference.py",
        "--video_path", temp_input_path,
        "--stride", "1",
        "--out_dir", output_dir,
        "--radius_scale", "1",
        "--camera", "target",
        "--mask",
        "--target_pose", theta, phi, r, x, y,
        "--video_length", str(num_frames),
        "--save_name", "output",
        "--mode", mode,
    ]

    try:
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        logs += result.stdout
    except subprocess.CalledProcessError as e:
        logs += f"❌ Inference failed:\n{e.stderr}"
        return logs, None

    return logs, str(video_output_path) if os.path.exists(video_output_path) else (logs, None)

def print_output_directory(out_dir): 
    result = ""
    for root, dirs, files in os.walk(out_dir):
        level = root.replace(out_dir, '').count(os.sep)
        indent = ' ' * 4 * level
        result += f"{indent}{os.path.basename(root)}/\n"
        sub_indent = ' ' * 4 * (level + 1)
        for f in files:
            result += f"{sub_indent}{f}\n"
    return result

def inference(video_path, num_frames, fps, target_pose, mode):
    logs, video_masked = run_epic_inference(video_path, num_frames, target_pose, mode)

    result_dir = print_output_directory("/app/output_anchor")


    MODEL_PATH = "/app/pretrained/CogVideoX-5b-I2V"
    ckpt_steps = 500
    ckpt_dir = "/app/out/EPiC_pretrained"
    ckpt_file = f"checkpoint-{ckpt_steps}.pt"
    ckpt_path = f"{ckpt_dir}/{ckpt_file}"

    video_root_dir = "/app/output_anchor"
    out_dir = "/app/output"


    command = [
        "python", "/app/inference/cli_demo_camera_i2v_pcd.py",
        "--video_root_dir", video_root_dir,
        "--base_model_path", MODEL_PATH,
        "--controlnet_model_path", ckpt_path,
        "--output_path", out_dir,
        "--start_camera_idx", "0",
        "--end_camera_idx", "8",
        "--controlnet_weights", "1.0",
        "--controlnet_guidance_start", "0.0",
        "--controlnet_guidance_end", "0.4",
        "--controlnet_input_channels", "3",
        "--controlnet_transformer_num_attn_heads", "4",
        "--controlnet_transformer_attention_head_dim", "64",
        "--controlnet_transformer_out_proj_dim_factor", "64",
        "--controlnet_transformer_out_proj_dim_zero_init",
        "--vae_channels", "16",
        "--num_frames", str(num_frames),
        "--controlnet_transformer_num_layers", "8",
        "--infer_with_mask",
        "--pool_style", "max",
        "--seed", "43"
    ]

    result = subprocess.run(command, capture_output=True, text=True)
    logs += "\n" + result.stdout
    result_dir = print_output_directory(out_dir)
    if result.returncode == 0:
        logs += "Inference completed successfully."
    else:
        logs += f"Error occurred during inference: {result.stderr}"

    return logs + result_dir + "Hello! it is successful", str(f"{out_dir}/00000_43_out.mp4")

# -----------------------------
# Step 3: Create Gradio UI
# -----------------------------
demo = gr.Interface(
    fn=inference,
    inputs=[
        gr.Video(label="Upload Video (MP4)"),
        gr.Slider(minimum=1, maximum=120, value=50, step=1, label="Number of Frames"),
        gr.Slider(minimum=1, maximum=90, value=10, step=1, label="FPS"),
        gr.Textbox(label="Target Pose (θ φ r x y)", placeholder="e.g., 0 30 -0.6 0 0"),
        gr.Dropdown(choices=["gradual", "direct", "bullet"], value="gradual", label="Camera Mode"),
    ],
    outputs=[
        gr.Textbox(label="Inference Logs"),
        gr.Video(label="Generated Video")
    ],
    title="🎬 EPiC: Efficient Video Camera Control",
    description="Upload a video, describe the scene, and apply cinematic camera motion using pretrained EPiC models.",
)

# -----------------------------
# Step 4: Launch App
# -----------------------------
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)