Spaces:

huanngzh
/

MV-Adapter-T2MV-Anime

Running on Zero

File size: 5,040 Bytes

import random

import gradio as gr
import numpy as np

import spaces
import torch

from inference_t2mv_sdxl import prepare_pipeline, run_pipeline

import transformers

transformers.utils.move_cache()

# Base model
base_model = "cagliostrolab/animagine-xl-3.1"

# Device and dtype
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"

# Hyperparameters
NUM_VIEWS = 6
HEIGHT = 768
WIDTH = 768
MAX_SEED = np.iinfo(np.int32).max

pipe = prepare_pipeline(
    base_model=base_model,
    vae_model="madebyollin/sdxl-vae-fp16-fix",
    unet_model=None,
    lora_model=None,
    adapter_path="huanngzh/mv-adapter",
    scheduler=None,
    num_views=NUM_VIEWS,
    device=device,
    dtype=dtype,
)


@spaces.GPU()
def infer(
    prompt,
    seed=42,
    randomize_seed=False,
    guidance_scale=7.0,
    num_inference_steps=30,
    negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
    progress=gr.Progress(track_tqdm=True),
):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    if isinstance(seed, str):
        try:
            seed = int(seed.strip())
        except ValueError:
            seed = 42

    images = run_pipeline(
        pipe,
        num_views=NUM_VIEWS,
        text=prompt,
        height=HEIGHT,
        width=WIDTH,
        num_inference_steps=num_inference_steps,
        guidance_scale=guidance_scale,
        seed=seed,
        negative_prompt=negative_prompt,
        device=device,
    )
    return images, seed


examples = {
    "stabilityai/stable-diffusion-xl-base-1.0": [
        ["An astronaut riding a horse", 0],
        ["A DSLR photo of a frog wearing a sweater", 21],
    ],
    "cagliostrolab/animagine-xl-3.1": [
        [
            "1girl, izayoi sakuya, touhou, solo, maid headdress, maid, apron, short sleeves, dress, closed mouth, white apron, serious face, upper body, masterpiece, best quality, very aesthetic, absurdres",
            0,
        ],
        [
            "1boy, male focus, ikari shinji, neon genesis evangelion, solo, serious face,(masterpiece), (best quality), (ultra-detailed), very aesthetic, illustration, disheveled hair, moist skin, intricate details",
            0,
        ],
        [
            "1girl, pink hair, pink shirts, smile, shy, masterpiece, anime",
            0,
        ],
    ],
}

css = """
#col-container {
    margin: 0 auto;
    max-width: 600px;
}
"""

with gr.Blocks(css=css) as demo:

    with gr.Column(elem_id="col-container"):
        gr.Markdown(
            f"""# MV-Adapter [Text-to-Multi-View]
Generate 768x768 multi-view images using {base_model} <br>
[[page](https://huanngzh.github.io/MV-Adapter-Page/)] [[repo](https://github.com/huanngzh/MV-Adapter)] [Tips: please follow the prompt template in [usage-guidelines](https://huggingface.co/cagliostrolab/animagine-xl-3.1#usage-guidelines)]
        """
        )

        with gr.Row():
            prompt = gr.Text(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
            )

            run_button = gr.Button("Run", scale=0)

        result = gr.Gallery(
            label="Result",
            show_label=False,
            columns=[3],
            rows=[2],
            object_fit="contain",
            height="auto",
        )

        with gr.Accordion("Advanced Settings", open=False):
            seed = gr.Slider(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=0,
            )
            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

            with gr.Row():
                num_inference_steps = gr.Slider(
                    label="Number of inference steps",
                    minimum=1,
                    maximum=50,
                    step=1,
                    value=30,
                )

            with gr.Row():
                guidance_scale = gr.Slider(
                    label="CFG scale",
                    minimum=0.0,
                    maximum=10.0,
                    step=0.1,
                    value=7.0,
                )

            with gr.Row():
                negative_prompt = gr.Textbox(
                    label="Negative prompt",
                    placeholder="Enter your negative prompt",
                    value="watermark, ugly, deformed, noisy, blurry, low contrast",
                )

        gr.Examples(
            examples=examples[base_model],
            fn=infer,
            inputs=[prompt, seed],
            outputs=[result, seed],
            # cache_examples=True,
        )

    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            prompt,
            seed,
            randomize_seed,
            guidance_scale,
            num_inference_steps,
            negative_prompt,
        ],
        outputs=[result, seed],
    )

demo.launch()