|
|
|
|
|
import subprocess, os, tempfile, shutil, uuid, base64 |
|
|
from typing import Dict |
|
|
|
|
|
class Handler: |
|
|
def __init__(self, model_dir: str): |
|
|
self.model_dir = model_dir |
|
|
self.code_dir = os.path.join(model_dir, "Wan2.2") |
|
|
self.ckpt_dir = os.path.join(model_dir, "Wan2.2-S2V-14B") |
|
|
|
|
|
def __call__(self, inputs: Dict): |
|
|
prompt = inputs.get("prompt", "a person is talking") |
|
|
image_b64 = inputs.get("image_b64") |
|
|
audio_b64 = inputs.get("audio_b64") |
|
|
|
|
|
tmpd = tempfile.mkdtemp() |
|
|
try: |
|
|
image_path = os.path.join(tmpd, "input.jpg") |
|
|
audio_path = os.path.join(tmpd, "input.wav") |
|
|
|
|
|
if image_b64: |
|
|
with open(image_path, "wb") as f: |
|
|
f.write(base64.b64decode(image_b64)) |
|
|
if audio_b64: |
|
|
with open(audio_path, "wb") as f: |
|
|
f.write(base64.b64decode(audio_b64)) |
|
|
|
|
|
out_path = os.path.join(tmpd, f"out_{uuid.uuid4().hex}.mp4") |
|
|
|
|
|
cmd = [ |
|
|
"python", "generate.py", |
|
|
"--task", "s2v-14B", |
|
|
"--size", "1024*704", |
|
|
"--ckpt_dir", self.ckpt_dir, |
|
|
"--offload_model", "True", |
|
|
"--convert_model_dtype", |
|
|
"--prompt", prompt, |
|
|
"--image", image_path, |
|
|
"--audio", audio_path, |
|
|
"--num_clip", "1" |
|
|
] |
|
|
|
|
|
subprocess.check_call(cmd, cwd=self.code_dir) |
|
|
|
|
|
|
|
|
if os.path.exists("outputs"): |
|
|
video_file = sorted(os.listdir("outputs"))[-1] |
|
|
with open(os.path.join("outputs", video_file), "rb") as f: |
|
|
return {"video_b64": base64.b64encode(f.read()).decode("utf-8")} |
|
|
else: |
|
|
with open(out_path, "rb") as f: |
|
|
return {"video_b64": base64.b64encode(f.read()).decode("utf-8")} |
|
|
|
|
|
finally: |
|
|
shutil.rmtree(tmpd, ignore_errors=True) |
|
|
|