RNS-NoobAI-Hybrid

Running on Zero

App Files Files Community

RNS-NoobAI-Hybrid / app.py

silveroxides

Update app.py

90963cb verified about 14 hours ago

raw

history blame contribute delete

18.3 kB

	import spaces
	import os
	import torch
	import random
	from huggingface_hub import snapshot_download
	from diffusers import StableDiffusionXLPipeline, AutoencoderKL
	from diffusers import (
	EulerAncestralDiscreteScheduler,
	DPMSolverMultistepScheduler,
	DPMSolverSDEScheduler,
	HeunDiscreteScheduler,
	DDIMScheduler,
	LMSDiscreteScheduler,
	PNDMScheduler,
	UniPCMultistepScheduler,
	)
	from diffusers.models.attention_processor import AttnProcessor2_0
	import gradio as gr
	from PIL import Image
	import numpy as np
	from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
	from safetensors.torch import load_file
	import requests
	from RealESRGAN import RealESRGAN

	import os

	from unittest.mock import patch
	from typing import Union
	from transformers.dynamic_module_utils import get_imports


	def fixed_get_imports(filename):
	"""Work around for https://huggingface.co/microsoft/phi-1_5/discussions/72."""
	if not str(filename).endswith("/modeling_florence2.py"):
	return get_imports(filename)
	imports = get_imports(filename)
	imports.remove("flash_attn")
	return imports


	import subprocess
	#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

	def download_file(url, folder_path, filename):
	if not os.path.exists(folder_path):
	os.makedirs(folder_path)
	file_path = os.path.join(folder_path, filename)

	if os.path.isfile(file_path):
	print(f"File already exists: {file_path}")
	else:
	response = requests.get(url, stream=True)
	if response.status_code == 200:
	with open(file_path, 'wb') as file:
	for chunk in response.iter_content(chunk_size=1024):
	file.write(chunk)
	print(f"File successfully downloaded and saved: {file_path}")
	else:
	print(f"Error downloading the file. Status code: {response.status_code}")

	# Download ESRGAN models
	download_file("https://huggingface.co/ai-forever/Real-ESRGAN/resolve/main/RealESRGAN_x2.pth?download=true", "models/upscalers/", "RealESRGAN_x2.pth")
	download_file("https://huggingface.co/ai-forever/Real-ESRGAN/resolve/main/RealESRGAN_x4.pth?download=true", "models/upscalers/", "RealESRGAN_x4.pth")
	download_file("https://huggingface.co/silveroxides/sdxl_lowstep_beta_v1/resolve/main/pytorch_lora_weights.safetensors?download=true", "models/lora/", "LowStepBeta_v001.safetensors")
	# Download the model files
	ckpt_dir_realpony = snapshot_download(repo_id="silveroxides/RNS_RealPonyV20")
	ckpt_dir_noobai = snapshot_download(repo_id="silveroxides/NoobAI-XL-EPS-1.0-Vwe")
	ckpt_dir_hybridpony = snapshot_download(repo_id="silveroxides/RealHybridPony")

	# Load the models
	vae_realpony = AutoencoderKL.from_pretrained(os.path.join(ckpt_dir_realpony, "vae"), torch_dtype=torch.float16)
	vae_noobai = AutoencoderKL.from_pretrained(os.path.join(ckpt_dir_noobai, "vae"), torch_dtype=torch.float16)
	vae_hybridpony = AutoencoderKL.from_pretrained(os.path.join(ckpt_dir_hybridpony, "vae"), torch_dtype=torch.float16)

	pipe_realpony = StableDiffusionXLPipeline.from_pretrained(
	ckpt_dir_realpony,
	vae=vae_realpony,
	custom_pipeline="multimodalart/sdxl_perturbed_attention_guidance",
	torch_dtype=torch.float16,
	use_safetensors=True,
	)
	pipe_noobai = StableDiffusionXLPipeline.from_pretrained(
	ckpt_dir_noobai,
	vae=vae_noobai,
	custom_pipeline="multimodalart/sdxl_perturbed_attention_guidance",
	torch_dtype=torch.float16,
	use_safetensors=True,
	)
	pipe_hybridpony = StableDiffusionXLPipeline.from_pretrained(
	ckpt_dir_hybridpony,
	vae=vae_hybridpony,
	custom_pipeline="multimodalart/sdxl_perturbed_attention_guidance",
	torch_dtype=torch.float16,
	use_safetensors=True,
	)

	pipe_realpony = pipe_realpony.to("cuda")
	pipe_noobai = pipe_noobai.to("cuda")
	pipe_hybridpony = pipe_hybridpony.to("cuda")

	pipe_realpony.unet.set_attn_processor(AttnProcessor2_0())
	pipe_noobai.unet.set_attn_processor(AttnProcessor2_0())
	pipe_hybridpony.unet.set_attn_processor(AttnProcessor2_0())


	# Define samplers
	samplers = {
	"Euler a": EulerAncestralDiscreteScheduler.from_config(pipe_realpony.scheduler.config),
	"DPM++ SDE Karras": DPMSolverSDEScheduler.from_config(pipe_realpony.scheduler.config, use_karras_sigmas=True),
	"Heun": HeunDiscreteScheduler.from_config(pipe_realpony.scheduler.config),
	# New samplers
	"DPM++ 2M SDE Karras": DPMSolverMultistepScheduler.from_config(pipe_realpony.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"),
	"DPM++ 2M": DPMSolverMultistepScheduler.from_config(pipe_realpony.scheduler.config),
	"DDIM": DDIMScheduler.from_config(pipe_realpony.scheduler.config),
	"LMS": LMSDiscreteScheduler.from_config(pipe_realpony.scheduler.config),
	"PNDM": PNDMScheduler.from_config(pipe_realpony.scheduler.config),
	"UniPC": UniPCMultistepScheduler.from_config(pipe_realpony.scheduler.config),
	}

	DEFAULT_POSITIVE_PREFIX = "score_8_up, score_7_up, accurate, genuine"
	DEFAULT_POSITIVE_SUFFIX = "perfect composition, detailed, photorealism, real life, raw, reality, cinematic"
	DEFAULT_NEGATIVE_PREFIX = "score_1, score_2, score_3, text, artist name, signature, watermark, logo, url, web address"
	DEFAULT_NEGATIVE_SUFFIX = "low quality, low resolution, simple background, bad composition, deformed, disfigured, sketch, unfinished"

	# Initialize Florence model
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# lora_model_path = "models/lora/LowStepBeta_v001.safetensors"
	# lora = load_file(lora_model_path, device=device)

	#def load_models():
	#with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports):
	florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
	florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)
	#return florence_model, florence_processor

	#florence_model, florence_processor = load_models()

	# Prompt Enhancer
	enhancer_medium = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchance", device=device)
	enhancer_long = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchance-Long", device=device)

	class LazyRealESRGAN:
	def __init__(self, device, scale):
	self.device = device
	self.scale = scale
	self.model = None

	def load_model(self):
	if self.model is None:
	self.model = RealESRGAN(self.device, scale=self.scale)
	self.model.load_weights(f'models/upscalers/RealESRGAN_x{self.scale}.pth', download=False)

	def predict(self, img):
	self.load_model()
	return self.model.predict(img)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	lazy_realesrgan_x2 = LazyRealESRGAN(device, scale=2)
	lazy_realesrgan_x4 = LazyRealESRGAN(device, scale=4)

	# Florence caption function
	def florence_caption(image):
	# Convert image to PIL if it's not already
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	inputs = florence_processor(text="<DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
	generated_ids = florence_model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=1024,
	early_stopping=False,
	do_sample=False,
	num_beams=3,
	)
	generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
	parsed_answer = florence_processor.post_process_generation(
	generated_text,
	task="<DETAILED_CAPTION>",
	image_size=(image.width, image.height)
	)
	return parsed_answer["<DETAILED_CAPTION>"]

	# Prompt Enhancer function
	def enhance_prompt(input_prompt, model_choice):
	if model_choice == "Medium":
	result = enhancer_medium("Enhance the description: " + input_prompt)
	enhanced_text = result[0]['summary_text']
	else: # Long
	result = enhancer_long("Enhance the description: " + input_prompt)
	enhanced_text = result[0]['summary_text']

	return enhanced_text

	def upscale_image(image, scale):
	# Ensure image is a PIL Image object
	if not isinstance(image, Image.Image):
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image)
	else:
	raise ValueError("Input must be a PIL Image or a numpy array")

	if scale == 2:
	return lazy_realesrgan_x2.predict(image)
	elif scale == 4:
	return lazy_realesrgan_x4.predict(image)
	else:
	return image

	@spaces.GPU(duration=120)
	def generate_image(model_choice, additional_positive_prompt, additional_negative_prompt, height, width, num_inference_steps,
	guidance_scale, pag_scale, pag_layers, num_images_per_prompt, use_lowstep_lora, use_random_seed, seed, sampler, clip_skip,
	use_florence2, use_medium_enhancer, use_long_enhancer,
	use_positive_prefix, use_positive_suffix, use_negative_prefix, use_negative_suffix,
	use_upscaler, upscale_factor,
	input_image=None, progress=gr.Progress(track_tqdm=True)):

	# Select the appropriate pipe based on the model choice
	if model_choice == "Real Pony RNS":
	pipe = pipe_realpony
	elif model_choice == "NoobAI-XL EPS 1.0":
	pipe = pipe_noobai
	else: # "Hybrid Pony SDXL"
	pipe = pipe_hybridpony

	if use_lowstep_lora:
	pipe.load_lora_weights("./models/lora", weight_name="LowStepBeta_v001.safetensors")

	if use_random_seed:
	seed = random.randint(0, 2**32 - 1)
	else:
	seed = int(seed) # Ensure seed is an integer

	# Set the scheduler based on the selected sampler
	pipe.scheduler = samplers[sampler]

	# Set clip skip
	pipe.text_encoder.config.num_hidden_layers -= (clip_skip - 1)

	# Start with the default positive prompt prefix if enabled
	full_positive_prompt = DEFAULT_POSITIVE_PREFIX + ", " if use_positive_prefix else ""

	# Add Florence-2 caption if enabled and image is provided
	if use_florence2 and input_image is not None:
	florence2_caption = florence_caption(input_image)
	florence2_caption = florence2_caption.lower().replace('.', ',')
	additional_positive_prompt = f"{florence2_caption}, {additional_positive_prompt}" if additional_positive_prompt else florence2_caption

	# Enhance only the additional positive prompt if enhancers are enabled
	if additional_positive_prompt:
	enhanced_prompt = additional_positive_prompt
	if use_medium_enhancer:
	medium_enhanced = enhance_prompt(enhanced_prompt, "Medium")
	medium_enhanced = medium_enhanced.lower().replace('.', ',')
	enhanced_prompt = f"{enhanced_prompt}, {medium_enhanced}"
	if use_long_enhancer:
	long_enhanced = enhance_prompt(enhanced_prompt, "Long")
	long_enhanced = long_enhanced.lower().replace('.', ',')
	enhanced_prompt = f"{enhanced_prompt}, {long_enhanced}"
	full_positive_prompt += enhanced_prompt

	# Add the default positive suffix if enabled
	if use_positive_suffix:
	full_positive_prompt += f", {DEFAULT_POSITIVE_SUFFIX}"

	# Combine default negative prompt with additional negative prompt
	full_negative_prompt = ""
	if use_negative_prefix:
	full_negative_prompt += f"{DEFAULT_NEGATIVE_PREFIX}, "
	full_negative_prompt += additional_negative_prompt if additional_negative_prompt else ""
	if use_negative_suffix:
	full_negative_prompt += f", {DEFAULT_NEGATIVE_SUFFIX}"

	try:
	images = pipe(
	prompt=full_positive_prompt,
	negative_prompt=full_negative_prompt,
	height=height,
	width=width,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	pag_scale=pag_scale,
	pag_layers=pag_layers,
	num_images_per_prompt=num_images_per_prompt,
	generator=torch.Generator(pipe.device).manual_seed(seed)
	).images

	if use_upscaler:
	print("Upscaling images")
	upscaled_images = []
	for i, img in enumerate(images):
	print(f"Upscaling image {i+1}")
	if not isinstance(img, Image.Image):
	print(f"Converting image {i+1} to PIL Image")
	img = Image.fromarray(np.uint8(img))
	upscaled_img = upscale_image(img, upscale_factor)
	upscaled_images.append(upscaled_img)
	images = upscaled_images

	print("Returning results")
	return images, seed, full_positive_prompt, full_negative_prompt
	except Exception as e:
	print(f"Error during image generation: {str(e)}")
	import traceback
	traceback.print_exc()
	return None, seed, full_positive_prompt, full_negative_prompt

	# Gradio interface
	with gr.Blocks(theme='bethecloud/storj_theme') as demo:
	gr.HTML("""
	<h1 align="center">Real Pony RNS / NoobAI-XL EPS 1.0 / Hybrid Pony SDXL</h1>
	<p align="center">
	<a href="https://huggingface.co/silveroxides/RNS_RealPonyV20" target="_blank">[RNS RealPony]</a>
	<a href="https://huggingface.co/silveroxides/NoobAI-XL-EPS-1.0-Vwe" target="_blank">[NoobAI-XL EPS 1.0]</a>
	<a href="https://huggingface.co/silveroxides/RealHybridPony" target="_blank">[Hybrid Pony SDXL]</a><br>
	<a href="https://huggingface.co/microsoft/Florence-2-base" target="_blank">[Florence-2 Model]</a>
	<a href="https://huggingface.co/gokaygokay/Lamini-Prompt-Enchance-Long" target="_blank">[Prompt Enhancer Long]</a>
	<a href="https://huggingface.co/gokaygokay/Lamini-Prompt-Enchance" target="_blank">[Prompt Enhancer Medium]</a>
	</p>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	model_choice = gr.Dropdown(
	["Real Pony RNS", "NoobAI-XL EPS 1.0", "Hybrid Pony SDXL"],
	label="Model Choice",
	value="Real Pony RNS")
	positive_prompt = gr.Textbox(label="Positive Prompt", placeholder="Add your positive prompt here")
	negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="Add your negative prompt here")

	with gr.Accordion("Advanced settings", open=False):
	height = gr.Slider(512, 2048, 1024, step=64, label="Height")
	width = gr.Slider(512, 2048, 1024, step=64, label="Width")
	num_inference_steps = gr.Slider(20, 100, 30, step=1, label="Number of Inference Steps")
	guidance_scale = gr.Slider(1, 20, 6, step=0.1, label="Guidance Scale")
	pag_scale = gr.Number(label="Pag Scale", value=3.0)
	pag_layers = gr.Dropdown(label="Model layers to apply Pag to", info="mid is the one used on the paper, up and down blocks seem unstable", choices=["up", "mid", "down"], multiselect=True, value="mid")
	num_images_per_prompt = gr.Slider(1, 4, 1, step=1, label="Number of images per prompt")
	use_lowstep_lora = gr.Checkbox(label="Use Lowstep LoRA", value=False)
	use_random_seed = gr.Checkbox(label="Use Random Seed", value=True)
	seed = gr.Number(label="Seed", value=0, precision=0)
	sampler = gr.Dropdown(label="Sampler", choices=list(samplers.keys()), value="Euler a")
	clip_skip = gr.Slider(1, 4, 2, step=1, label="Clip skip")

	with gr.Accordion("Captioner and Enhancers", open=False):
	input_image = gr.Image(label="Input Image for Florence-2 Captioner")
	use_florence2 = gr.Checkbox(label="Use Florence-2 Captioner", value=False)
	use_medium_enhancer = gr.Checkbox(label="Use Medium Prompt Enhancer", value=False)
	use_long_enhancer = gr.Checkbox(label="Use Long Prompt Enhancer", value=False)

	with gr.Accordion("Upscaler Settings", open=False):
	use_upscaler = gr.Checkbox(label="Use Upscaler", value=False)
	upscale_factor = gr.Radio(label="Upscale Factor", choices=[2, 4], value=2)

	generate_btn = gr.Button("Generate Image")

	with gr.Accordion("Prefix and Suffix Settings", open=True):
	use_positive_prefix = gr.Checkbox(
	label="Use Positive Prefix",
	value=True,
	info=f"Prefix: {DEFAULT_POSITIVE_PREFIX}"
	)
	use_positive_suffix = gr.Checkbox(
	label="Use Positive Suffix",
	value=True,
	info=f"Suffix: {DEFAULT_POSITIVE_SUFFIX}"
	)
	use_negative_prefix = gr.Checkbox(
	label="Use Negative Prefix",
	value=True,
	info=f"Prefix: {DEFAULT_NEGATIVE_PREFIX}"
	)
	use_negative_suffix = gr.Checkbox(
	label="Use Negative Suffix",
	value=True,
	info=f"Suffix: {DEFAULT_NEGATIVE_SUFFIX}"
	)

	with gr.Column(scale=1):
	output_gallery = gr.Gallery(label="Result", elem_id="gallery", show_label=False)
	seed_used = gr.Number(label="Seed Used")
	full_positive_prompt_used = gr.Textbox(label="Full Positive Prompt Used")
	full_negative_prompt_used = gr.Textbox(label="Full Negative Prompt Used")

	generate_btn.click(
	fn=generate_image,
	inputs=[
	model_choice, # Add this new input
	positive_prompt, negative_prompt, height, width, num_inference_steps,
	guidance_scale, pag_scale, pag_layers, num_images_per_prompt, use_lowstep_lora, use_random_seed, seed, sampler,
	clip_skip, use_florence2, use_medium_enhancer, use_long_enhancer,
	use_positive_prefix, use_positive_suffix, use_negative_prefix, use_negative_suffix,
	use_upscaler, upscale_factor,
	input_image
	],
	outputs=[output_gallery, seed_used, full_positive_prompt_used, full_negative_prompt_used]
	)

	demo.launch(debug=True)