| import re |
| from typing import Optional |
|
|
| import torch |
| from pydantic import BaseModel, Field |
| from typing_extensions import override |
|
|
| from comfy_api.latest import IO, ComfyExtension, Input |
| from comfy_api_nodes.util import ( |
| ApiEndpoint, |
| audio_to_base64_string, |
| download_url_to_image_tensor, |
| download_url_to_video_output, |
| get_number_of_images, |
| poll_op, |
| sync_op, |
| tensor_to_base64_string, |
| validate_audio_duration, |
| ) |
|
|
|
|
| class Text2ImageInputField(BaseModel): |
| prompt: str = Field(...) |
| negative_prompt: Optional[str] = Field(None) |
|
|
|
|
| class Image2ImageInputField(BaseModel): |
| prompt: str = Field(...) |
| negative_prompt: Optional[str] = Field(None) |
| images: list[str] = Field(..., min_length=1, max_length=2) |
|
|
|
|
| class Text2VideoInputField(BaseModel): |
| prompt: str = Field(...) |
| negative_prompt: Optional[str] = Field(None) |
| audio_url: Optional[str] = Field(None) |
|
|
|
|
| class Image2VideoInputField(BaseModel): |
| prompt: str = Field(...) |
| negative_prompt: Optional[str] = Field(None) |
| img_url: str = Field(...) |
| audio_url: Optional[str] = Field(None) |
|
|
|
|
| class Txt2ImageParametersField(BaseModel): |
| size: str = Field(...) |
| n: int = Field(1, description="Number of images to generate.") |
| seed: int = Field(..., ge=0, le=2147483647) |
| prompt_extend: bool = Field(True) |
| watermark: bool = Field(True) |
|
|
|
|
| class Image2ImageParametersField(BaseModel): |
| size: Optional[str] = Field(None) |
| n: int = Field(1, description="Number of images to generate.") |
| seed: int = Field(..., ge=0, le=2147483647) |
| watermark: bool = Field(True) |
|
|
|
|
| class Text2VideoParametersField(BaseModel): |
| size: str = Field(...) |
| seed: int = Field(..., ge=0, le=2147483647) |
| duration: int = Field(5, ge=5, le=10) |
| prompt_extend: bool = Field(True) |
| watermark: bool = Field(True) |
| audio: bool = Field(False, description="Should be audio generated automatically") |
|
|
|
|
| class Image2VideoParametersField(BaseModel): |
| resolution: str = Field(...) |
| seed: int = Field(..., ge=0, le=2147483647) |
| duration: int = Field(5, ge=5, le=10) |
| prompt_extend: bool = Field(True) |
| watermark: bool = Field(True) |
| audio: bool = Field(False, description="Should be audio generated automatically") |
|
|
|
|
| class Text2ImageTaskCreationRequest(BaseModel): |
| model: str = Field(...) |
| input: Text2ImageInputField = Field(...) |
| parameters: Txt2ImageParametersField = Field(...) |
|
|
|
|
| class Image2ImageTaskCreationRequest(BaseModel): |
| model: str = Field(...) |
| input: Image2ImageInputField = Field(...) |
| parameters: Image2ImageParametersField = Field(...) |
|
|
|
|
| class Text2VideoTaskCreationRequest(BaseModel): |
| model: str = Field(...) |
| input: Text2VideoInputField = Field(...) |
| parameters: Text2VideoParametersField = Field(...) |
|
|
|
|
| class Image2VideoTaskCreationRequest(BaseModel): |
| model: str = Field(...) |
| input: Image2VideoInputField = Field(...) |
| parameters: Image2VideoParametersField = Field(...) |
|
|
|
|
| class TaskCreationOutputField(BaseModel): |
| task_id: str = Field(...) |
| task_status: str = Field(...) |
|
|
|
|
| class TaskCreationResponse(BaseModel): |
| output: Optional[TaskCreationOutputField] = Field(None) |
| request_id: str = Field(...) |
| code: Optional[str] = Field(None, description="The error code of the failed request.") |
| message: Optional[str] = Field(None, description="Details of the failed request.") |
|
|
|
|
| class TaskResult(BaseModel): |
| url: Optional[str] = Field(None) |
| code: Optional[str] = Field(None) |
| message: Optional[str] = Field(None) |
|
|
|
|
| class ImageTaskStatusOutputField(TaskCreationOutputField): |
| task_id: str = Field(...) |
| task_status: str = Field(...) |
| results: Optional[list[TaskResult]] = Field(None) |
|
|
|
|
| class VideoTaskStatusOutputField(TaskCreationOutputField): |
| task_id: str = Field(...) |
| task_status: str = Field(...) |
| video_url: Optional[str] = Field(None) |
| code: Optional[str] = Field(None) |
| message: Optional[str] = Field(None) |
|
|
|
|
| class ImageTaskStatusResponse(BaseModel): |
| output: Optional[ImageTaskStatusOutputField] = Field(None) |
| request_id: str = Field(...) |
|
|
|
|
| class VideoTaskStatusResponse(BaseModel): |
| output: Optional[VideoTaskStatusOutputField] = Field(None) |
| request_id: str = Field(...) |
|
|
|
|
| RES_IN_PARENS = re.compile(r"\((\d+)\s*[x×]\s*(\d+)\)") |
|
|
|
|
| class WanTextToImageApi(IO.ComfyNode): |
| @classmethod |
| def define_schema(cls): |
| return IO.Schema( |
| node_id="WanTextToImageApi", |
| display_name="Wan Text to Image", |
| category="api node/image/Wan", |
| description="Generates image based on text prompt.", |
| inputs=[ |
| IO.Combo.Input( |
| "model", |
| options=["wan2.5-t2i-preview"], |
| default="wan2.5-t2i-preview", |
| tooltip="Model to use.", |
| ), |
| IO.String.Input( |
| "prompt", |
| multiline=True, |
| default="", |
| tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.", |
| ), |
| IO.String.Input( |
| "negative_prompt", |
| multiline=True, |
| default="", |
| tooltip="Negative text prompt to guide what to avoid.", |
| optional=True, |
| ), |
| IO.Int.Input( |
| "width", |
| default=1024, |
| min=768, |
| max=1440, |
| step=32, |
| optional=True, |
| ), |
| IO.Int.Input( |
| "height", |
| default=1024, |
| min=768, |
| max=1440, |
| step=32, |
| optional=True, |
| ), |
| IO.Int.Input( |
| "seed", |
| default=0, |
| min=0, |
| max=2147483647, |
| step=1, |
| display_mode=IO.NumberDisplay.number, |
| control_after_generate=True, |
| tooltip="Seed to use for generation.", |
| optional=True, |
| ), |
| IO.Boolean.Input( |
| "prompt_extend", |
| default=True, |
| tooltip="Whether to enhance the prompt with AI assistance.", |
| optional=True, |
| ), |
| IO.Boolean.Input( |
| "watermark", |
| default=True, |
| tooltip='Whether to add an "AI generated" watermark to the result.', |
| optional=True, |
| ), |
| ], |
| outputs=[ |
| IO.Image.Output(), |
| ], |
| hidden=[ |
| IO.Hidden.auth_token_comfy_org, |
| IO.Hidden.api_key_comfy_org, |
| IO.Hidden.unique_id, |
| ], |
| is_api_node=True, |
| ) |
|
|
| @classmethod |
| async def execute( |
| cls, |
| model: str, |
| prompt: str, |
| negative_prompt: str = "", |
| width: int = 1024, |
| height: int = 1024, |
| seed: int = 0, |
| prompt_extend: bool = True, |
| watermark: bool = True, |
| ): |
| initial_response = await sync_op( |
| cls, |
| ApiEndpoint(path="/proxy/wan/api/v1/services/aigc/text2image/image-synthesis", method="POST"), |
| response_model=TaskCreationResponse, |
| data=Text2ImageTaskCreationRequest( |
| model=model, |
| input=Text2ImageInputField(prompt=prompt, negative_prompt=negative_prompt), |
| parameters=Txt2ImageParametersField( |
| size=f"{width}*{height}", |
| seed=seed, |
| prompt_extend=prompt_extend, |
| watermark=watermark, |
| ), |
| ), |
| ) |
| if not initial_response.output: |
| raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}") |
| response = await poll_op( |
| cls, |
| ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), |
| response_model=ImageTaskStatusResponse, |
| status_extractor=lambda x: x.output.task_status, |
| estimated_duration=9, |
| poll_interval=3, |
| ) |
| return IO.NodeOutput(await download_url_to_image_tensor(str(response.output.results[0].url))) |
|
|
|
|
| class WanImageToImageApi(IO.ComfyNode): |
| @classmethod |
| def define_schema(cls): |
| return IO.Schema( |
| node_id="WanImageToImageApi", |
| display_name="Wan Image to Image", |
| category="api node/image/Wan", |
| description="Generates an image from one or two input images and a text prompt. " |
| "The output image is currently fixed at 1.6 MP; its aspect ratio matches the input image(s).", |
| inputs=[ |
| IO.Combo.Input( |
| "model", |
| options=["wan2.5-i2i-preview"], |
| default="wan2.5-i2i-preview", |
| tooltip="Model to use.", |
| ), |
| IO.Image.Input( |
| "image", |
| tooltip="Single-image editing or multi-image fusion, maximum 2 images.", |
| ), |
| IO.String.Input( |
| "prompt", |
| multiline=True, |
| default="", |
| tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.", |
| ), |
| IO.String.Input( |
| "negative_prompt", |
| multiline=True, |
| default="", |
| tooltip="Negative text prompt to guide what to avoid.", |
| optional=True, |
| ), |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| IO.Int.Input( |
| "seed", |
| default=0, |
| min=0, |
| max=2147483647, |
| step=1, |
| display_mode=IO.NumberDisplay.number, |
| control_after_generate=True, |
| tooltip="Seed to use for generation.", |
| optional=True, |
| ), |
| IO.Boolean.Input( |
| "watermark", |
| default=True, |
| tooltip='Whether to add an "AI generated" watermark to the result.', |
| optional=True, |
| ), |
| ], |
| outputs=[ |
| IO.Image.Output(), |
| ], |
| hidden=[ |
| IO.Hidden.auth_token_comfy_org, |
| IO.Hidden.api_key_comfy_org, |
| IO.Hidden.unique_id, |
| ], |
| is_api_node=True, |
| ) |
|
|
| @classmethod |
| async def execute( |
| cls, |
| model: str, |
| image: torch.Tensor, |
| prompt: str, |
| negative_prompt: str = "", |
| |
| |
| seed: int = 0, |
| watermark: bool = True, |
| ): |
| n_images = get_number_of_images(image) |
| if n_images not in (1, 2): |
| raise ValueError(f"Expected 1 or 2 input images, got {n_images}.") |
| images = [] |
| for i in image: |
| images.append("data:image/png;base64," + tensor_to_base64_string(i, total_pixels=4096 * 4096)) |
| initial_response = await sync_op( |
| cls, |
| ApiEndpoint(path="/proxy/wan/api/v1/services/aigc/image2image/image-synthesis", method="POST"), |
| response_model=TaskCreationResponse, |
| data=Image2ImageTaskCreationRequest( |
| model=model, |
| input=Image2ImageInputField(prompt=prompt, negative_prompt=negative_prompt, images=images), |
| parameters=Image2ImageParametersField( |
| |
| seed=seed, |
| watermark=watermark, |
| ), |
| ), |
| ) |
| if not initial_response.output: |
| raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}") |
| response = await poll_op( |
| cls, |
| ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), |
| response_model=ImageTaskStatusResponse, |
| status_extractor=lambda x: x.output.task_status, |
| estimated_duration=42, |
| poll_interval=4, |
| ) |
| return IO.NodeOutput(await download_url_to_image_tensor(str(response.output.results[0].url))) |
|
|
|
|
| class WanTextToVideoApi(IO.ComfyNode): |
| @classmethod |
| def define_schema(cls): |
| return IO.Schema( |
| node_id="WanTextToVideoApi", |
| display_name="Wan Text to Video", |
| category="api node/video/Wan", |
| description="Generates video based on text prompt.", |
| inputs=[ |
| IO.Combo.Input( |
| "model", |
| options=["wan2.5-t2v-preview"], |
| default="wan2.5-t2v-preview", |
| tooltip="Model to use.", |
| ), |
| IO.String.Input( |
| "prompt", |
| multiline=True, |
| default="", |
| tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.", |
| ), |
| IO.String.Input( |
| "negative_prompt", |
| multiline=True, |
| default="", |
| tooltip="Negative text prompt to guide what to avoid.", |
| optional=True, |
| ), |
| IO.Combo.Input( |
| "size", |
| options=[ |
| "480p: 1:1 (624x624)", |
| "480p: 16:9 (832x480)", |
| "480p: 9:16 (480x832)", |
| "720p: 1:1 (960x960)", |
| "720p: 16:9 (1280x720)", |
| "720p: 9:16 (720x1280)", |
| "720p: 4:3 (1088x832)", |
| "720p: 3:4 (832x1088)", |
| "1080p: 1:1 (1440x1440)", |
| "1080p: 16:9 (1920x1080)", |
| "1080p: 9:16 (1080x1920)", |
| "1080p: 4:3 (1632x1248)", |
| "1080p: 3:4 (1248x1632)", |
| ], |
| default="480p: 1:1 (624x624)", |
| optional=True, |
| ), |
| IO.Int.Input( |
| "duration", |
| default=5, |
| min=5, |
| max=10, |
| step=5, |
| display_mode=IO.NumberDisplay.number, |
| tooltip="Available durations: 5 and 10 seconds", |
| optional=True, |
| ), |
| IO.Audio.Input( |
| "audio", |
| optional=True, |
| tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.", |
| ), |
| IO.Int.Input( |
| "seed", |
| default=0, |
| min=0, |
| max=2147483647, |
| step=1, |
| display_mode=IO.NumberDisplay.number, |
| control_after_generate=True, |
| tooltip="Seed to use for generation.", |
| optional=True, |
| ), |
| IO.Boolean.Input( |
| "generate_audio", |
| default=False, |
| optional=True, |
| tooltip="If there is no audio input, generate audio automatically.", |
| ), |
| IO.Boolean.Input( |
| "prompt_extend", |
| default=True, |
| tooltip="Whether to enhance the prompt with AI assistance.", |
| optional=True, |
| ), |
| IO.Boolean.Input( |
| "watermark", |
| default=True, |
| tooltip='Whether to add an "AI generated" watermark to the result.', |
| optional=True, |
| ), |
| ], |
| outputs=[ |
| IO.Video.Output(), |
| ], |
| hidden=[ |
| IO.Hidden.auth_token_comfy_org, |
| IO.Hidden.api_key_comfy_org, |
| IO.Hidden.unique_id, |
| ], |
| is_api_node=True, |
| ) |
|
|
| @classmethod |
| async def execute( |
| cls, |
| model: str, |
| prompt: str, |
| negative_prompt: str = "", |
| size: str = "480p: 1:1 (624x624)", |
| duration: int = 5, |
| audio: Optional[Input.Audio] = None, |
| seed: int = 0, |
| generate_audio: bool = False, |
| prompt_extend: bool = True, |
| watermark: bool = True, |
| ): |
| width, height = RES_IN_PARENS.search(size).groups() |
| audio_url = None |
| if audio is not None: |
| validate_audio_duration(audio, 3.0, 29.0) |
| audio_url = "data:audio/mp3;base64," + audio_to_base64_string(audio, "mp3", "libmp3lame") |
|
|
| initial_response = await sync_op( |
| cls, |
| ApiEndpoint(path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", method="POST"), |
| response_model=TaskCreationResponse, |
| data=Text2VideoTaskCreationRequest( |
| model=model, |
| input=Text2VideoInputField(prompt=prompt, negative_prompt=negative_prompt, audio_url=audio_url), |
| parameters=Text2VideoParametersField( |
| size=f"{width}*{height}", |
| duration=duration, |
| seed=seed, |
| audio=generate_audio, |
| prompt_extend=prompt_extend, |
| watermark=watermark, |
| ), |
| ), |
| ) |
| if not initial_response.output: |
| raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}") |
| response = await poll_op( |
| cls, |
| ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), |
| response_model=VideoTaskStatusResponse, |
| status_extractor=lambda x: x.output.task_status, |
| estimated_duration=120 * int(duration / 5), |
| poll_interval=6, |
| ) |
| return IO.NodeOutput(await download_url_to_video_output(response.output.video_url)) |
|
|
|
|
| class WanImageToVideoApi(IO.ComfyNode): |
| @classmethod |
| def define_schema(cls): |
| return IO.Schema( |
| node_id="WanImageToVideoApi", |
| display_name="Wan Image to Video", |
| category="api node/video/Wan", |
| description="Generates video based on the first frame and text prompt.", |
| inputs=[ |
| IO.Combo.Input( |
| "model", |
| options=["wan2.5-i2v-preview"], |
| default="wan2.5-i2v-preview", |
| tooltip="Model to use.", |
| ), |
| IO.Image.Input( |
| "image", |
| ), |
| IO.String.Input( |
| "prompt", |
| multiline=True, |
| default="", |
| tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.", |
| ), |
| IO.String.Input( |
| "negative_prompt", |
| multiline=True, |
| default="", |
| tooltip="Negative text prompt to guide what to avoid.", |
| optional=True, |
| ), |
| IO.Combo.Input( |
| "resolution", |
| options=[ |
| "480P", |
| "720P", |
| "1080P", |
| ], |
| default="480P", |
| optional=True, |
| ), |
| IO.Int.Input( |
| "duration", |
| default=5, |
| min=5, |
| max=10, |
| step=5, |
| display_mode=IO.NumberDisplay.number, |
| tooltip="Available durations: 5 and 10 seconds", |
| optional=True, |
| ), |
| IO.Audio.Input( |
| "audio", |
| optional=True, |
| tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.", |
| ), |
| IO.Int.Input( |
| "seed", |
| default=0, |
| min=0, |
| max=2147483647, |
| step=1, |
| display_mode=IO.NumberDisplay.number, |
| control_after_generate=True, |
| tooltip="Seed to use for generation.", |
| optional=True, |
| ), |
| IO.Boolean.Input( |
| "generate_audio", |
| default=False, |
| optional=True, |
| tooltip="If there is no audio input, generate audio automatically.", |
| ), |
| IO.Boolean.Input( |
| "prompt_extend", |
| default=True, |
| tooltip="Whether to enhance the prompt with AI assistance.", |
| optional=True, |
| ), |
| IO.Boolean.Input( |
| "watermark", |
| default=True, |
| tooltip='Whether to add an "AI generated" watermark to the result.', |
| optional=True, |
| ), |
| ], |
| outputs=[ |
| IO.Video.Output(), |
| ], |
| hidden=[ |
| IO.Hidden.auth_token_comfy_org, |
| IO.Hidden.api_key_comfy_org, |
| IO.Hidden.unique_id, |
| ], |
| is_api_node=True, |
| ) |
|
|
| @classmethod |
| async def execute( |
| cls, |
| model: str, |
| image: torch.Tensor, |
| prompt: str, |
| negative_prompt: str = "", |
| resolution: str = "480P", |
| duration: int = 5, |
| audio: Optional[Input.Audio] = None, |
| seed: int = 0, |
| generate_audio: bool = False, |
| prompt_extend: bool = True, |
| watermark: bool = True, |
| ): |
| if get_number_of_images(image) != 1: |
| raise ValueError("Exactly one input image is required.") |
| image_url = "data:image/png;base64," + tensor_to_base64_string(image, total_pixels=2000 * 2000) |
| audio_url = None |
| if audio is not None: |
| validate_audio_duration(audio, 3.0, 29.0) |
| audio_url = "data:audio/mp3;base64," + audio_to_base64_string(audio, "mp3", "libmp3lame") |
| initial_response = await sync_op( |
| cls, |
| ApiEndpoint(path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", method="POST"), |
| response_model=TaskCreationResponse, |
| data=Image2VideoTaskCreationRequest( |
| model=model, |
| input=Image2VideoInputField( |
| prompt=prompt, negative_prompt=negative_prompt, img_url=image_url, audio_url=audio_url |
| ), |
| parameters=Image2VideoParametersField( |
| resolution=resolution, |
| duration=duration, |
| seed=seed, |
| audio=generate_audio, |
| prompt_extend=prompt_extend, |
| watermark=watermark, |
| ), |
| ), |
| ) |
| if not initial_response.output: |
| raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}") |
| response = await poll_op( |
| cls, |
| ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), |
| response_model=VideoTaskStatusResponse, |
| status_extractor=lambda x: x.output.task_status, |
| estimated_duration=120 * int(duration / 5), |
| poll_interval=6, |
| ) |
| return IO.NodeOutput(await download_url_to_video_output(response.output.video_url)) |
|
|
|
|
| class WanApiExtension(ComfyExtension): |
| @override |
| async def get_node_list(self) -> list[type[IO.ComfyNode]]: |
| return [ |
| WanTextToImageApi, |
| WanImageToImageApi, |
| WanTextToVideoApi, |
| WanImageToVideoApi, |
| ] |
|
|
|
|
| async def comfy_entrypoint() -> WanApiExtension: |
| return WanApiExtension() |
|
|