Spaces:

alkzar90
/

ukiyo-e-postal

Runtime error

App Files Files Community

alkzar90 commited on Jan 5, 2023

Commit

1a635ad

•

1 Parent(s): e021f7b

Create app.py with Ukiyo postal generator service!

Browse files

Files changed (1) hide show

app.py +193 -0

app.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import open_clip
+import gradio as gr
+import numpy as np
+import torch
+import torchvision
+from tqdm.auto import tqdm
+from PIL import Image, ImageColor
+from torchvision import transforms
+from diffusers import DDIMScheduler, DDPMPipeline
+device = (
+    "mps"
+    if torch.backends.mps.is_available()
+    else "cuda"
+    if torch.cuda.is_available()
+    else "cpu"
+)
+# Load the pretrained pipeline
+pipeline_name = "alkzar90/sd-class-ukiyo-e-256"
+image_pipe = DDPMPipeline.from_pretrained(pipeline_name).to(device)
+# Sample some images with a DDIM Scheduler over 40 steps
+scheduler = DDIMScheduler.from_pretrained(pipeline_name)
+scheduler.set_timesteps(num_inference_steps=40)
+# Color guidance
+#-------------------------------------------------------------------------------
+# Color guidance function
+def color_loss(images, target_color=(0.1, 0.9, 0.5)):
+    """Given a target color (R, G, B) return a loss for how far away on average
+    the images' pixels are from that color. Defaults to a light teal: (0.1, 0.9, 0.5)"""
+    target = (
+        torch.tensor(target_color).to(images.device) * 2 - 1
+    )  # Map target color to (-1, 1)
+    target = target[
+        None, :, None, None
+    ]  # Get shape right to work with the images (b, c, h, w)
+    error = torch.abs(
+        images - target
+    ).mean()  # Mean absolute difference between the image pixels and the target color
+    return error
+# CLIP guidance
+#-------------------------------------------------------------------------------
+clip_model, _, preprocess = open_clip.create_model_and_transforms(
+    "ViT-B-32", pretrained="openai"
+)
+clip_model.to(device)
+# Transforms to resize and augment an image + normalize to match CLIP's training data
+tfms = transforms.Compose(
+    [
+        transforms.RandomResizedCrop(224),  # Random CROP each time
+        transforms.RandomAffine(
+            5
+        ),  # One possible random augmentation: skews the image
+        transforms.RandomHorizontalFlip(),  # You can add additional augmentations if you like
+        transforms.Normalize(
+            mean=(0.48145466, 0.4578275, 0.40821073),
+            std=(0.26862954, 0.26130258, 0.27577711),
+        ),
+    ]
+)
+# CLIP guidance function
+def clip_loss(image, text_features):
+    image_features = clip_model.encode_image(
+        tfms(image)
+    )  # Note: applies the above transforms
+    input_normed = torch.nn.functional.normalize(image_features.unsqueeze(1), dim=2)
+    embed_normed = torch.nn.functional.normalize(text_features.unsqueeze(0), dim=2)
+    dists = (
+        input_normed.sub(embed_normed).norm(dim=2).div(2).arcsin().pow(2).mul(2)
+    )  # Squared Great Circle Distance
+    return dists.mean()
+#  Sample generator loop
+#-------------------------------------------------------------------------------
+def generate(color,
+             color_loss_scale,
+             num_examples=4,
+             seed=None,
+             prompt=None,
+             prompt_loss_scale=None,
+             prompt_n_cuts=None,
+             inference_steps=50,
+             ):
+    scheduler.set_timesteps(num_inference_steps=inference_steps)
+    if seed:
+      torch.manual_seed(seed)
+    if prompt:
+      text = open_clip.tokenize([prompt]).to(device)
+      with torch.no_grad(), torch.cuda.amp.autocast():
+        text_features = clip_model.encode_text(text)
+    target_color = ImageColor.getcolor(color, "RGB")  # Target color as RGB
+    target_color = [a / 255 for a in target_color]  # Rescale from (0, 255) to (0, 1)
+    x = torch.randn(num_examples, 3, 256, 256).to(device)
+    for i, t in tqdm(enumerate(scheduler.timesteps)):
+        model_input = scheduler.scale_model_input(x, t)
+        with torch.no_grad():
+            noise_pred = image_pipe.unet(model_input, t)["sample"]
+        x = x.detach().requires_grad_()
+        x0 = scheduler.step(noise_pred, t, x).pred_original_sample
+        # color loss
+        loss = color_loss(x0, target_color) * color_loss_scale
+        cond_color_grad = -torch.autograd.grad(loss, x)[0]
+        # Modify x based solely on the color gradient -> x_cond
+        x_cond = x.detach() + cond_color_grad
+        # prompt loss (modify x_cond with cond_prompt_grad) based on
+        # the original x (not modifified previously with cond_color_grad)
+        if prompt:
+          cond_prompt_grad = 0
+          for cut in range(prompt_n_cuts):
+            # Set requires grad on x
+            x = x.detach().requires_grad_()
+            # Get the predicted x0:
+            x0 = scheduler.step(noise_pred, t, x).pred_original_sample
+            # Calculate loss
+            prompt_loss = clip_loss(x0, text_features) * prompt_loss_scale
+            # Get gradient (scale by n_cuts since we want the average)
+            cond_prompt_grad -= torch.autograd.grad(prompt_loss, x, retain_graph=True)[0] / prompt_n_cuts
+          # Modify x based on this gradient
+          alpha_bar = scheduler.alphas_cumprod[i]
+          x_cond = (
+              x_cond + cond_prompt_grad * alpha_bar.sqrt()
+          )  # Note the additional scaling factor here!
+        x = scheduler.step(noise_pred, t, x_cond).prev_sample
+    grid = torchvision.utils.make_grid(x, nrow=4)
+    im = grid.permute(1, 2, 0).cpu().clip(-1, 1) * 0.5 + 0.5
+    im = Image.fromarray(np.array(im * 255).astype(np.uint8))
+    im.save("test.jpeg")
+    return im
+# GRADIO Interface
+#-------------------------------------------------------------------------------
+TITLE="Ukiyo-e postal generator service 🎴!"
+DESCRIPTION="This model is a diffusion model for unconditional image generation of Ukiyo-e images ✍ 🎨. \nThe model was train using fine-tuning with the google/ddpm-celebahq-256 pretrain-model and the dataset: https://huggingface.co/datasets/huggan/ukiyoe2photo"
+CSS = ".output-image, .input-image, .image-preview {height: 250px !important}"
+# See the gradio docs for the types of inputs and outputs available
+inputs = [
+    gr.ColorPicker(label="color (click on the square to pick the color)", value="#DF5C16"),  # Add any inputs you need here
+    gr.Slider(label="color_guidance_scale (how strong to blend the color)", minimum=0, maximum=30, value=6.7),
+    gr.Slider(label="num_examples (# images generated)", minimum=4, maximum=12, value=8, step=4),
+    gr.Number(label="seed (reproducibility and experimentation)", value=666),
+    gr.Text(label="Text prompt (optional)", value=None),
+    gr.Slider(label="prompt_guidance_scale (...)", minimum=0, maximum=1000, value=10),
+    gr.Slider(label="prompt_n_cuts", minimum=4, maximum=12, step=4),
+    gr.Slider(label="Number of inference steps (+ steps -> + guidance effect)", mimimum=40, maximum=60, value=40, step=1),
+]
+outputs = gr.Image(label="result")
+# And the minimal interface
+demo = gr.Interface(
+    fn=generate,
+    inputs=inputs,
+    outputs=outputs,
+    css=CSS,
+    examples=[
+        ["#DF5C16", 6.7, 12, 666, None, None, None, 40],
+        ["#C01660", 13.5, 12, 1990, None, None, None, 40],
+        ["#44CCAA", 8.9, 12, 1512, None, None, None, 40],
+        ["#39A291", 5.0, 12, 666, "A sakura tree", 60, 8, 52],
+        ["#0E0907", 0.0, 12, 666, "A big whale in the ocean", 60, 8, 52],
+        ["#19A617", 4.6, 12, 666, "An island with sunset at background", 140, 8, 47],
+    ],
+    title=TITLE,
+    description=DESCRIPTION,
+)
+if __name__ == "__main__":
+    demo.launch(enable_queue=True)