Spaces:
Running
Running
| import PIL | |
| import torch | |
| import numpy as np | |
| from PIL import Image | |
| from tqdm import tqdm | |
| import torch.nn.functional as F | |
| import torchvision.transforms as T | |
| from diffusers import LMSDiscreteScheduler, DiffusionPipeline | |
| # configurations | |
| torch_device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" | |
| height, width = 512, 512 | |
| guidance_scale = 8 | |
| blue_loss_scale = 200 | |
| num_inference_steps = 50 | |
| elastic_transformer = T.ElasticTransform(alpha=550.0, sigma=5.0) | |
| pretrained_model_name_or_path = "segmind/tiny-sd" | |
| pipe = DiffusionPipeline.from_pretrained( | |
| pretrained_model_name_or_path, | |
| low_cpu_mem_usage = True | |
| ).to(torch_device) | |
| pipe.load_textual_inversion("sd-concepts-library/dreams") | |
| pipe.load_textual_inversion("sd-concepts-library/midjourney-style") | |
| pipe.load_textual_inversion("sd-concepts-library/moebius") | |
| pipe.load_textual_inversion("sd-concepts-library/style-of-marc-allante") | |
| pipe.load_textual_inversion("sd-concepts-library/wlop-style") | |
| concepts_mapping = { | |
| "Dream": '<meeg>', "Midjourney":'<midjourney-style>', | |
| "Marc Allante": '<Marc_Allante>', "Moebius": '<moebius>', | |
| "Wlop": '<wlop-style>' | |
| } | |
| def image_loss(images, method='elastic'): | |
| # elastic loss | |
| if method == 'elastic': | |
| transformed_imgs = elastic_transformer(images) | |
| error = torch.abs(transformed_imgs - images).mean() | |
| # symmetry loss - Flip the image along the width | |
| elif method == "symmetry": | |
| flipped_image = torch.flip(images, [3]) | |
| error = F.mse_loss(images, flipped_image) | |
| # saturation loss | |
| elif method == 'saturation': | |
| transformed_imgs = T.functional.adjust_saturation(images,saturation_factor = 10) | |
| error = torch.abs(transformed_imgs - images).mean() | |
| # blue loss | |
| elif method == 'blue': | |
| error = torch.abs(images[:,2] - 0.9).mean() # [:,2] -> all images in batch, only the blue channel | |
| return error | |
| HTML_TEMPLATE = """ | |
| <style> | |
| body { | |
| background: linear-gradient(135deg, #f5f7fa, #c3cfe2); | |
| } | |
| #app-header { | |
| text-align: center; | |
| background: rgba(255, 255, 255, 0.8); /* Semi-transparent white */ | |
| padding: 20px; | |
| border-radius: 10px; | |
| box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
| position: relative; /* To position the artifacts */ | |
| } | |
| #app-header h1 { | |
| color: #4CAF50; | |
| font-size: 2em; | |
| margin-bottom: 10px; | |
| } | |
| .concept { | |
| position: relative; | |
| transition: transform 0.3s; | |
| } | |
| .concept:hover { | |
| transform: scale(1.1); | |
| } | |
| .concept img { | |
| width: 100px; | |
| border-radius: 10px; | |
| box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
| } | |
| .concept-description { | |
| position: absolute; | |
| bottom: -30px; | |
| left: 50%; | |
| transform: translateX(-50%); | |
| background-color: #4CAF50; | |
| color: white; | |
| padding: 5px 10px; | |
| border-radius: 5px; | |
| opacity: 0; | |
| transition: opacity 0.3s; | |
| } | |
| .concept:hover .concept-description { | |
| opacity: 1; | |
| } | |
| /* Artifacts */ | |
| .artifact { | |
| position: absolute; | |
| background: rgba(76, 175, 80, 0.1); /* Semi-transparent green */ | |
| border-radius: 50%; /* Make it circular */ | |
| } | |
| .artifact.large { | |
| width: 300px; | |
| height: 300px; | |
| top: -50px; | |
| left: -150px; | |
| } | |
| .artifact.medium { | |
| width: 200px; | |
| height: 200px; | |
| bottom: -50px; | |
| right: -100px; | |
| } | |
| .artifact.small { | |
| width: 100px; | |
| height: 100px; | |
| top: 50%; | |
| left: 50%; | |
| transform: translate(-50%, -50%); | |
| } | |
| </style> | |
| <div id="app-header"> | |
| <!-- Artifacts --> | |
| <div class="artifact large"></div> | |
| <div class="artifact medium"></div> | |
| <div class="artifact small"></div> | |
| <!-- Content --> | |
| <h1>Art Generator</h1> | |
| <p>Generate new art in five different styles by providing a prompt.</p> | |
| <div style="display: flex; justify-content: center; gap: 20px; margin-top: 20px;"> | |
| <div class="concept"> | |
| <img src="https://github.com/Delve-ERAV1/S20/assets/11761529/30ac92f8-fc62-4aab-9221-043865c6fe7c" alt="Midjourney"> | |
| <div class="concept-description">Midjourney Style</div> | |
| </div> | |
| <div class="concept"> | |
| <img src="https://github.com/Delve-ERAV1/S20/assets/11761529/54c9a61e-df9f-4054-835b-ec2c6ba5916c" alt="Dreams"> | |
| <div class="concept-description">Dreams Style</div> | |
| </div> | |
| <div class="concept"> | |
| <img src="https://github.com/Delve-ERAV1/S20/assets/11761529/2f37e402-15d1-4a74-ba85-bb1566da930e" alt="Moebius"> | |
| <div class="concept-description">Moebius Style</div> | |
| </div> | |
| <div class="concept"> | |
| <img src="https://github.com/Delve-ERAV1/S20/assets/11761529/f838e767-ac20-4996-b5be-65c61b365ce0" alt="Allante"> | |
| <div class="concept-description">Hong Kong born artist inspired by western and eastern influences</div> | |
| </div> | |
| <div class="concept"> | |
| <img src="https://github.com/Delve-ERAV1/S20/assets/11761529/9958140a-1b62-4972-83ca-85b023e3863f" alt="Wlop"> | |
| <div class="concept-description">WLOP (Born 1987) is known for Digital Art (NFTs)</div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| def get_examples(): | |
| examples = [ | |
| ['A powerful man in dreadlocks', 'Dream', 'Symmetry', 45], | |
| ['World Peace', 'Marc Allante', 'Saturation', 147], | |
| ['Storm trooper in the desert, dramatic lighting, high-detail', 'Moebius', 'Elastic', 28], | |
| ['Delicious Italian pizza on a table, a window in the background overlooking a city skyline', 'Wlop', 'Blue', 50], | |
| ] | |
| return(examples) | |
| def latents_to_pil(latents): | |
| # bath of latents -> list of images | |
| latents = (1 / 0.18215) * latents | |
| with torch.no_grad(): | |
| image = pipe.vae.decode(latents).sample | |
| image = (image / 2 + 0.5).clamp(0, 1) # 0 to 1 | |
| image = image.detach().cpu().permute(0, 2, 3, 1).numpy() | |
| image = (image * 255).round().astype("uint8") | |
| return Image.fromarray(image[0]) | |
| def generate_art(prompt, concept, method, seed): | |
| prompt = f"{prompt} in the style of {concepts_mapping[concept]}" | |
| img_no_loss = latents_to_pil(generate_image(prompt, method, seed)) | |
| img_loss = latents_to_pil(generate_image(prompt, method, seed, loss_apply=True)) | |
| return([img_no_loss, img_loss]) | |
| def generate_image(prompt, method, seed, loss_apply=False): | |
| generator = torch.manual_seed(seed) | |
| batch_size = 1 | |
| method = method.lower() | |
| # scheduler | |
| scheduler = LMSDiscreteScheduler(beta_start = 0.00085, beta_end = 0.012, beta_schedule = "scaled_linear", num_train_timesteps = 1000) | |
| scheduler.set_timesteps(50) | |
| scheduler.timesteps = scheduler.timesteps.to(torch.float32) | |
| # text embeddings of the prompt | |
| text_input = pipe.tokenizer([prompt], padding='max_length', max_length = pipe.tokenizer.model_max_length, truncation= True, return_tensors="pt") | |
| input_ids = text_input.input_ids.to(torch_device) | |
| with torch.no_grad(): | |
| text_embeddings = pipe.text_encoder(text_input.input_ids.to(torch_device))[0] | |
| max_length = text_input.input_ids.shape[-1] | |
| uncond_input = pipe.tokenizer( | |
| [""] * 1, padding="max_length", max_length= max_length, return_tensors="pt" | |
| ) | |
| with torch.no_grad(): | |
| uncond_embeddings = pipe.text_encoder(uncond_input.input_ids.to(torch_device))[0] | |
| text_embeddings = torch.cat([uncond_embeddings,text_embeddings]) | |
| # random latent | |
| latents = torch.randn( | |
| (batch_size, pipe.unet.config.in_channels, height// 8, width //8), | |
| generator = generator, | |
| ) | |
| latents = latents.to(torch_device) | |
| latents = latents * scheduler.init_noise_sigma | |
| for i, t in tqdm(enumerate(scheduler.timesteps), total = len(scheduler.timesteps)): | |
| latent_model_input = torch.cat([latents] * 2) | |
| sigma = scheduler.sigmas[i] | |
| latent_model_input = scheduler.scale_model_input(latent_model_input, t) | |
| with torch.no_grad(): | |
| noise_pred = pipe.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"] | |
| noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) | |
| noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) | |
| if loss_apply and i%5 == 0: | |
| latents = latents.detach().requires_grad_() | |
| latents_x0 = latents - sigma * noise_pred | |
| # use vae to decode the image | |
| denoised_images = pipe.vae.decode((1/ 0.18215) * latents_x0).sample / 2 + 0.5 # range(0,1) | |
| loss = image_loss(denoised_images, method) * blue_loss_scale | |
| cond_grad = torch.autograd.grad(loss, latents)[0] | |
| latents = latents.detach() - cond_grad * sigma**2 | |
| latents = scheduler.step(noise_pred,t, latents).prev_sample | |
| return latents |