import os import json import copy import time import random import logging import numpy as np from typing import Any, Dict, List, Optional, Union import torch from PIL import Image import gradio as gr from diffusers import ( DiffusionPipeline, AutoencoderTiny, AutoencoderKL, AutoPipelineForImage2Image, FluxPipeline, FlowMatchEulerDiscreteScheduler) from huggingface_hub import ( hf_hub_download, HfFileSystem, ModelCard, snapshot_download) import spaces def calculate_shift( image_seq_len, base_seq_len: int = 256, max_seq_len: int = 4096, base_shift: float = 0.5, max_shift: float = 1.16, ): m = (max_shift - base_shift) / (max_seq_len - base_seq_len) b = base_shift - m * base_seq_len mu = image_seq_len * m + b return mu def retrieve_timesteps( scheduler, num_inference_steps: Optional[int] = None, device: Optional[Union[str, torch.device]] = None, timesteps: Optional[List[int]] = None, sigmas: Optional[List[float]] = None, **kwargs, ): if timesteps is not None and sigmas is not None: raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values") if timesteps is not None: scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs) timesteps = scheduler.timesteps num_inference_steps = len(timesteps) elif sigmas is not None: scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs) timesteps = scheduler.timesteps num_inference_steps = len(timesteps) else: scheduler.set_timesteps(num_inference_steps, device=device, **kwargs) timesteps = scheduler.timesteps return timesteps, num_inference_steps # FLUX pipeline @torch.inference_mode() def flux_pipe_call_that_returns_an_iterable_of_images( self, prompt: Union[str, List[str]] = None, prompt_2: Optional[Union[str, List[str]]] = None, height: Optional[int] = None, width: Optional[int] = None, num_inference_steps: int = 28, timesteps: List[int] = None, guidance_scale: float = 3.5, num_images_per_prompt: Optional[int] = 1, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, latents: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.FloatTensor] = None, pooled_prompt_embeds: Optional[torch.FloatTensor] = None, output_type: Optional[str] = "pil", return_dict: bool = True, joint_attention_kwargs: Optional[Dict[str, Any]] = None, max_sequence_length: int = 512, good_vae: Optional[Any] = None, ): height = height or self.default_sample_size * self.vae_scale_factor width = width or self.default_sample_size * self.vae_scale_factor self.check_inputs( prompt, prompt_2, height, width, prompt_embeds=prompt_embeds, pooled_prompt_embeds=pooled_prompt_embeds, max_sequence_length=max_sequence_length, ) self._guidance_scale = guidance_scale self._joint_attention_kwargs = joint_attention_kwargs self._interrupt = False batch_size = 1 if isinstance(prompt, str) else len(prompt) device = self._execution_device lora_scale = joint_attention_kwargs.get("scale", None) if joint_attention_kwargs is not None else None prompt_embeds, pooled_prompt_embeds, text_ids = self.encode_prompt( prompt=prompt, prompt_2=prompt_2, prompt_embeds=prompt_embeds, pooled_prompt_embeds=pooled_prompt_embeds, device=device, num_images_per_prompt=num_images_per_prompt, max_sequence_length=max_sequence_length, lora_scale=lora_scale, ) num_channels_latents = self.transformer.config.in_channels // 4 latents, latent_image_ids = self.prepare_latents( batch_size * num_images_per_prompt, num_channels_latents, height, width, prompt_embeds.dtype, device, generator, latents, ) sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps) image_seq_len = latents.shape[1] mu = calculate_shift( image_seq_len, self.scheduler.config.base_image_seq_len, self.scheduler.config.max_image_seq_len, self.scheduler.config.base_shift, self.scheduler.config.max_shift, ) timesteps, num_inference_steps = retrieve_timesteps( self.scheduler, num_inference_steps, device, timesteps, sigmas, mu=mu, ) self._num_timesteps = len(timesteps) guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32).expand(latents.shape[0]) if self.transformer.config.guidance_embeds else None for i, t in enumerate(timesteps): if self.interrupt: continue timestep = t.expand(latents.shape[0]).to(latents.dtype) noise_pred = self.transformer( hidden_states=latents, timestep=timestep / 1000, guidance=guidance, pooled_projections=pooled_prompt_embeds, encoder_hidden_states=prompt_embeds, txt_ids=text_ids, img_ids=latent_image_ids, joint_attention_kwargs=self.joint_attention_kwargs, return_dict=False, )[0] latents_for_image = self._unpack_latents(latents, height, width, self.vae_scale_factor) latents_for_image = (latents_for_image / self.vae.config.scaling_factor) + self.vae.config.shift_factor image = self.vae.decode(latents_for_image, return_dict=False)[0] yield self.image_processor.postprocess(image, output_type=output_type)[0] latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0] torch.cuda.empty_cache() latents = self._unpack_latents(latents, height, width, self.vae_scale_factor) latents = (latents / good_vae.config.scaling_factor) + good_vae.config.shift_factor image = good_vae.decode(latents, return_dict=False)[0] self.maybe_free_model_hooks() torch.cuda.empty_cache() yield self.image_processor.postprocess(image, output_type=output_type)[0] #-----------------------------------------------------------------------------------LoRA's--------------------------------------------------------------------------# loras = [ #1 { "image": "https://huggingface.co/prithivMLmods/Canopus-LoRA-Flux-FaceRealism/resolve/main/images/11.png", "title": "Flux Face Realism", "repo": "prithivMLmods/Canopus-LoRA-Flux-FaceRealism", "trigger_word": "Realism" }, #2 { "image": "https://huggingface.co/alvdansen/softserve_anime/resolve/main/images/ComfyUI_00134_.png", "title": "Softserve Anime", "repo": "alvdansen/softserve_anime", "trigger_word": "sftsrv style illustration" }, #3 { "image": "https://huggingface.co/prithivMLmods/Canopus-LoRA-Flux-Anime/resolve/main/assets/4.png", "title": "Flux Anime", "repo": "prithivMLmods/Canopus-LoRA-Flux-Anime", "trigger_word": "Anime" }, #4 { "image": "https://huggingface.co/Shakker-Labs/FLUX.1-dev-LoRA-One-Click-Creative-Template/resolve/main/images/f2cc649985648e57b9b9b14ca7a8744ac8e50d75b3a334ed4df0f368.jpg", "title": "Creative Template", "repo": "Shakker-Labs/FLUX.1-dev-LoRA-One-Click-Creative-Template", "trigger_word": "The background is 4 real photos, and in the middle is a cartoon picture summarizing the real photos." }, #5 { "image": "https://huggingface.co/prithivMLmods/Canopus-LoRA-Flux-UltraRealism-2.0/resolve/main/images/3.png", "title": "Ultra Realism", "repo": "prithivMLmods/Canopus-LoRA-Flux-UltraRealism-2.0", "trigger_word": "Ultra realistic" }, #6 { "image": "https://huggingface.co/gokaygokay/Flux-Game-Assets-LoRA-v2/resolve/main/images/example_y2bqpuphc.png", "title": "Game Assets", "repo": "gokaygokay/Flux-Game-Assets-LoRA-v2", "trigger_word": "wbgmsst, white background" }, #7 { "image": "https://huggingface.co/alvdansen/softpasty-flux-dev/resolve/main/images/ComfyUI_00814_%20(2).png", "title": "Softpasty", "repo": "alvdansen/softpasty-flux-dev", "trigger_word": "araminta_illus illustration style" }, #8 { "image": "https://huggingface.co/Shakker-Labs/FLUX.1-dev-LoRA-add-details/resolve/main/images/0.png", "title": "Details Add", "repo": "Shakker-Labs/FLUX.1-dev-LoRA-add-details", "trigger_word": "" }, #9 { "image": "https://huggingface.co/alvdansen/frosting_lane_flux/resolve/main/images/content%20-%202024-08-11T010011.238.jpeg", "title": "Frosting Lane", "repo": "alvdansen/frosting_lane_flux", "trigger_word": "frstingln illustration" }, #10 { "image": "https://huggingface.co/aleksa-codes/flux-ghibsky-illustration/resolve/main/images/example5.jpg", "title": "Ghibsky Illustration", "repo": "aleksa-codes/flux-ghibsky-illustration", "trigger_word": "GHIBSKY style painting" }, #11 { "image": "https://huggingface.co/Shakker-Labs/FLUX.1-dev-LoRA-Dark-Fantasy/resolve/main/images/c2215bd73da9f14fcd63cc93350e66e2901bdafa6fb8abaaa2c32a1b.jpg", "title": "Dark Fantasy", "repo": "Shakker-Labs/FLUX.1-dev-LoRA-Dark-Fantasy", "trigger_word": "" }, #12 { "image": "https://huggingface.co/Norod78/Flux_1_Dev_LoRA_Paper-Cutout-Style/resolve/main/d13591878d5043f3989dd6eb1c25b710_233c18effb4b491cb467ca31c97e90b5.png", "title": "Paper Cutout", "repo": "Norod78/Flux_1_Dev_LoRA_Paper-Cutout-Style", "trigger_word": "Paper Cutout Style" }, #13 { "image": "https://huggingface.co/alvdansen/mooniverse/resolve/main/images/out-0%20(17).webp", "title": "Mooniverse", "repo": "alvdansen/mooniverse", "trigger_word": "surreal style" }, #14 { "image": "https://huggingface.co/alvdansen/pola-photo-flux/resolve/main/images/out-0%20-%202024-09-22T130819.351.webp", "title": "Pola Photo", "repo": "alvdansen/pola-photo-flux", "trigger_word": "polaroid style" }, #15 { "image": "https://huggingface.co/multimodalart/flux-tarot-v1/resolve/main/images/7e180627edd846e899b6cd307339140d_5b2a09f0842c476b83b6bd2cb9143a52.png", "title": "Flux Tarot", "repo": "multimodalart/flux-tarot-v1", "trigger_word": "in the style of TOK a trtcrd tarot style" }, #16 { "image": "https://huggingface.co/prithivMLmods/Flux-Dev-Real-Anime-LoRA/resolve/main/images/111.png", "title": "Real Anime", "repo": "prithivMLmods/Flux-Dev-Real-Anime-LoRA", "trigger_word": "Real Anime" }, #17 { "image": "https://huggingface.co/diabolic6045/Flux_Sticker_Lora/resolve/main/images/example_s3pxsewcb.png", "title": "Stickers", "repo": "diabolic6045/Flux_Sticker_Lora", "trigger_word": "5t1cker 5ty1e" }, #18 { "image": "https://huggingface.co/VideoAditor/Flux-Lora-Realism/resolve/main/images/feel-the-difference-between-using-flux-with-lora-from-xlab-v0-j0ehybmvxehd1.png", "title": "Realism", "repo": "XLabs-AI/flux-RealismLora", "trigger_word": "" }, #19 { "image": "https://huggingface.co/alvdansen/flux-koda/resolve/main/images/ComfyUI_00583_%20(1).png", "title": "Koda", "repo": "alvdansen/flux-koda", "trigger_word": "flmft style" }, #20 { "image": "https://huggingface.co/mgwr/Cine-Aesthetic/resolve/main/images/00019-1333633802.png", "title": "Cine Aesthetic", "repo": "mgwr/Cine-Aesthetic", "trigger_word": "mgwr/cine" }, #21 { "image": "https://huggingface.co/SebastianBodza/flux_cute3D/resolve/main/images/astronaut.webp", "title": "Cute 3D", "repo": "SebastianBodza/flux_cute3D", "trigger_word": "NEOCUTE3D" }, #22 { "image": "https://huggingface.co/bingbangboom/flux_dreamscape/resolve/main/images/3.jpg", "title": "Dreamscape", "repo": "bingbangboom/flux_dreamscape", "trigger_word": "in the style of BSstyle004" }, #23 { "image": "https://huggingface.co/prithivMLmods/Canopus-Cute-Kawaii-Flux-LoRA/resolve/main/images/11.png", "title": "Cute Kawaii", "repo": "prithivMLmods/Canopus-Cute-Kawaii-Flux-LoRA", "trigger_word": "cute-kawaii" }, #24 { "image": "https://cdn-uploads.huggingface.co/production/uploads/64b24543eec33e27dc9a6eca/_jyra-jKP_prXhzxYkg1O.png", "title": "Pastel Anime", "repo": "Raelina/Flux-Pastel-Anime", "trigger_word": "Anime" }, #25 { "image": "https://huggingface.co/Shakker-Labs/FLUX.1-dev-LoRA-Vector-Journey/resolve/main/images/f7a66b51c89896854f31bef743dc30f33c6ea3c0ed8f9ff04d24b702.jpg", "title": "Vector", "repo": "Shakker-Labs/FLUX.1-dev-LoRA-Vector-Journey", "trigger_word": "artistic style blends reality and illustration elements" }, #26 { "image": "https://huggingface.co/bingbangboom/flux-miniature-worlds/resolve/main/images/2.jpg", "title": "Miniature", "repo": "bingbangboom/flux-miniature-worlds", "trigger_word": "Image in the style of MNTRWRLDS" }, #27 { "image": "https://huggingface.co/glif-loradex-trainer/bingbangboom_flux_surf/resolve/main/samples/1729012111574__000002000_0.jpg", "title": "Surf Bingbangboom", "repo": "glif-loradex-trainer/bingbangboom_flux_surf", "trigger_word": "SRFNGV01" }, #28 { "image": "https://huggingface.co/prithivMLmods/Canopus-Snoopy-Charlie-Brown-Flux-LoRA/resolve/main/000.png", "title": "Snoopy Charlie", "repo": "prithivMLmods/Canopus-Snoopy-Charlie-Brown-Flux-LoRA", "trigger_word": "Snoopy Charlie Brown" }, #29 { "image": "https://huggingface.co/alvdansen/sonny-anime-fixed/resolve/main/images/uqAuIMqA6Z7mvPkHg4qJE_f4c3cbe64e0349e7b946d02adeacdca3.png", "title": "Fixed Sonny", "repo": "alvdansen/sonny-anime-fixed", "trigger_word": "nm22 style" }, #30 { "image": "https://huggingface.co/davisbro/flux-multi-angle/resolve/main/multi-angle-examples/3.png", "title": "Multi Angle", "repo": "davisbro/flux-multi-angle", "trigger_word": "A TOK composite photo of a person posing at different angles" } #add--new LoRA Below ↓ - Before that Use(,) ] #--------------------------------------------------Model Initialization-----------------------------------------------------------------------------------------# dtype = torch.bfloat16 device = "cuda" if torch.cuda.is_available() else "cpu" base_model = "black-forest-labs/FLUX.1-dev" #TAEF1 is very tiny autoencoder which uses the same "latent API" as FLUX.1's VAE. FLUX.1 is useful for real-time previewing of the FLUX.1 generation process.# taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device) good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype).to(device) pipe = DiffusionPipeline.from_pretrained(base_model, torch_dtype=dtype, vae=taef1).to(device) pipe_i2i = AutoPipelineForImage2Image.from_pretrained(base_model, vae=good_vae, transformer=pipe.transformer, text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer, text_encoder_2=pipe.text_encoder_2, tokenizer_2=pipe.tokenizer_2, torch_dtype=dtype ) MAX_SEED = 2**32-1 pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(pipe) class calculateDuration: def __init__(self, activity_name=""): self.activity_name = activity_name def __enter__(self): self.start_time = time.time() return self def __exit__(self, exc_type, exc_value, traceback): self.end_time = time.time() self.elapsed_time = self.end_time - self.start_time if self.activity_name: print(f"Elapsed time for {self.activity_name}: {self.elapsed_time:.6f} seconds") else: print(f"Elapsed time: {self.elapsed_time:.6f} seconds") def update_selection(evt: gr.SelectData, width, height): selected_lora = loras[evt.index] new_placeholder = f"Type a prompt for {selected_lora['title']}" lora_repo = selected_lora["repo"] updated_text = f"### Selected: [{lora_repo}](https://huggingface.co/{lora_repo}) ✅" if "aspect" in selected_lora: if selected_lora["aspect"] == "portrait": width = 768 height = 1024 elif selected_lora["aspect"] == "landscape": width = 1024 height = 768 else: width = 1024 height = 1024 return ( gr.update(placeholder=new_placeholder), updated_text, evt.index, width, height, ) @spaces.GPU(duration=70) def generate_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale, progress): pipe.to("cuda") generator = torch.Generator(device="cuda").manual_seed(seed) with calculateDuration("Generating image"): # Generate image for img in pipe.flux_pipe_call_that_returns_an_iterable_of_images( prompt=prompt_mash, num_inference_steps=steps, guidance_scale=cfg_scale, width=width, height=height, generator=generator, joint_attention_kwargs={"scale": lora_scale}, output_type="pil", good_vae=good_vae, ): yield img def generate_image_to_image(prompt_mash, image_input_path, image_strength, steps, cfg_scale, width, height, lora_scale, seed): generator = torch.Generator(device="cuda").manual_seed(seed) pipe_i2i.to("cuda") image_input = load_image(image_input_path) final_image = pipe_i2i( prompt=prompt_mash, image=image_input, strength=image_strength, num_inference_steps=steps, guidance_scale=cfg_scale, width=width, height=height, generator=generator, joint_attention_kwargs={"scale": lora_scale}, output_type="pil", ).images[0] return final_image @spaces.GPU(duration=70) def run_lora(prompt, image_input, image_strength, cfg_scale, steps, selected_index, randomize_seed, seed, width, height, lora_scale, progress=gr.Progress(track_tqdm=True)): if selected_index is None: raise gr.Error("You must select a LoRA before proceeding.") selected_lora = loras[selected_index] lora_path = selected_lora["repo"] trigger_word = selected_lora["trigger_word"] if(trigger_word): if "trigger_position" in selected_lora: if selected_lora["trigger_position"] == "prepend": prompt_mash = f"{trigger_word} {prompt}" else: prompt_mash = f"{prompt} {trigger_word}" else: prompt_mash = f"{trigger_word} {prompt}" else: prompt_mash = prompt with calculateDuration("Unloading LoRA"): pipe.unload_lora_weights() pipe_i2i.unload_lora_weights() #LoRA weights flow with calculateDuration(f"Loading LoRA weights for {selected_lora['title']}"): pipe_to_use = pipe_i2i if image_input is not None else pipe weight_name = selected_lora.get("weights", None) pipe_to_use.load_lora_weights( lora_path, weight_name=weight_name, low_cpu_mem_usage=True ) with calculateDuration("Randomizing seed"): if randomize_seed: seed = random.randint(0, MAX_SEED) if(image_input is not None): final_image = generate_image_to_image(prompt_mash, image_input, image_strength, steps, cfg_scale, width, height, lora_scale, seed) yield final_image, seed, gr.update(visible=False) else: image_generator = generate_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale, progress) final_image = None step_counter = 0 for image in image_generator: step_counter+=1 final_image = image progress_bar = f'
"+trigger_word+"
as the trigger word" if trigger_word else "No trigger word found. If there's a trigger word, include it in your prompt"}