Fast_Stable_diffusion_CPU

Running

App Files Files Community

Fast_Stable_diffusion_CPU / scripts /main.py

r3gm

Update scripts/main.py

1fa7834 9 months ago

raw

history blame contribute delete

No virus

21.3 kB

	from concurrent.futures import ThreadPoolExecutor
	from pathlib import Path
	from typing import Optional
	import uuid
	from lcm.lcm_scheduler import LCMScheduler
	from lcm.lcm_pipeline import LatentConsistencyModelPipeline
	from lcm.lcm_i2i_pipeline import LatentConsistencyModelImg2ImgPipeline, LCMSchedulerWithTimestamp
	from diffusers.image_processor import PipelineImageInput
	# import modules.scripts as scripts
	# import modules.shared
	# from modules import script_callbacks
	import os
	import random
	import time
	import numpy as np
	import gradio as gr
	from PIL import Image, PngImagePlugin
	import torch

	scheduler = LCMScheduler.from_pretrained(
	"SimianLuo/LCM_Dreamshaper_v7", subfolder="scheduler")

	pipe = LatentConsistencyModelPipeline.from_pretrained(
	"SimianLuo/LCM_Dreamshaper_v7", scheduler = scheduler, safety_checker = None)



	DESCRIPTION = '''# Latent Consistency Model
	Running [LCM_Dreamshaper_v7](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7) \| [Project Page](https://latent-consistency-models.github.io) \| [Extension Page](https://github.com/0xbitches/sd-webui-lcm)
	'''

	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "768"))


	def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	return seed


	def save_image(img, metadata: dict):
	save_dir = './outputs/LCM-txt2img/'
	Path(save_dir).mkdir(exist_ok=True, parents=True)
	seed = metadata["seed"]
	unique_id = uuid.uuid4()
	filename = save_dir + f"{unique_id}-{seed}" + ".png"

	meta_tuples = [(k, str(v)) for k, v in metadata.items()]
	png_info = PngImagePlugin.PngInfo()
	for k, v in meta_tuples:
	png_info.add_text(k, v)
	img.save(filename, pnginfo=png_info)

	return filename


	def save_images(image_array, metadata: dict):
	paths = []
	with ThreadPoolExecutor() as executor:
	paths = list(executor.map(save_image, image_array,
	[metadata]*len(image_array)))
	return paths


	def generate(
	prompt: str,
	seed: int = 0,
	width: int = 512,
	height: int = 512,
	guidance_scale: float = 8.0,
	num_inference_steps: int = 4,
	num_images: int = 4,
	randomize_seed: bool = False,
	use_fp16: bool = True,
	use_torch_compile: bool = False,
	use_cpu: bool = False,
	progress=gr.Progress(track_tqdm=True)
	) -> Image.Image:
	seed = randomize_seed_fn(seed, randomize_seed)
	torch.manual_seed(seed)

	selected_device = 'cuda'
	if use_cpu:
	selected_device = "cpu"
	if use_fp16:
	use_fp16 = False
	print("LCM warning: running on CPU, overrode FP16 with FP32")
	global pipe, scheduler
	pipe = LatentConsistencyModelPipeline(
	vae= pipe.vae,
	text_encoder = pipe.text_encoder,
	tokenizer = pipe.tokenizer,
	unet = pipe.unet,
	scheduler = scheduler,
	safety_checker = pipe.safety_checker,
	feature_extractor = pipe.feature_extractor,
	)
	# pipe = LatentConsistencyModelPipeline.from_pretrained(
	# "SimianLuo/LCM_Dreamshaper_v7", scheduler = scheduler, safety_checker = None)

	if use_fp16:
	pipe.to(torch_device=selected_device, torch_dtype=torch.float16)
	else:
	pipe.to(torch_device=selected_device, torch_dtype=torch.float32)

	# Windows does not support torch.compile for now
	if os.name != 'nt' and use_torch_compile:
	pipe.unet = torch.compile(pipe.unet, mode='max-autotune')

	start_time = time.time()
	result = pipe(
	prompt=prompt,
	width=width,
	height=height,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	num_images_per_prompt=num_images,
	original_inference_steps=50,
	output_type="pil",
	device = selected_device
	).images
	paths = save_images(result, metadata={"prompt": prompt, "seed": seed, "width": width,
	"height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps})

	elapsed_time = time.time() - start_time
	print("LCM inference time: ", elapsed_time, "seconds")
	return paths, seed


	def generate_i2i(
	prompt: str,
	image: PipelineImageInput = None,
	strength: float = 0.8,
	seed: int = 0,
	guidance_scale: float = 8.0,
	num_inference_steps: int = 4,
	num_images: int = 4,
	randomize_seed: bool = False,
	use_fp16: bool = True,
	use_torch_compile: bool = False,
	use_cpu: bool = False,
	progress=gr.Progress(track_tqdm=True),
	width: Optional[int] = 512,
	height: Optional[int] = 512,
	) -> Image.Image:
	seed = randomize_seed_fn(seed, randomize_seed)
	torch.manual_seed(seed)

	selected_device = 'cuda'
	if use_cpu:
	selected_device = "cpu"
	if use_fp16:
	use_fp16 = False
	print("LCM warning: running on CPU, overrode FP16 with FP32")
	global pipe, scheduler
	pipe = LatentConsistencyModelImg2ImgPipeline(
	vae= pipe.vae,
	text_encoder = pipe.text_encoder,
	tokenizer = pipe.tokenizer,
	unet = pipe.unet,
	scheduler = None, #scheduler,
	safety_checker = pipe.safety_checker,
	feature_extractor = pipe.feature_extractor,
	requires_safety_checker = False,
	)
	# pipe = LatentConsistencyModelImg2ImgPipeline.from_pretrained(
	# "SimianLuo/LCM_Dreamshaper_v7", safety_checker = None)

	if use_fp16:
	pipe.to(torch_device=selected_device, torch_dtype=torch.float16)
	else:
	pipe.to(torch_device=selected_device, torch_dtype=torch.float32)

	# Windows does not support torch.compile for now
	if os.name != 'nt' and use_torch_compile:
	pipe.unet = torch.compile(pipe.unet, mode='max-autotune')

	width, height = image.size

	start_time = time.time()
	result = pipe(
	prompt=prompt,
	image=image,
	strength=strength,
	width=width,
	height=height,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	num_images_per_prompt=num_images,
	original_inference_steps=50,
	output_type="pil",
	device = selected_device
	).images
	paths = save_images(result, metadata={"prompt": prompt, "seed": seed, "width": width,
	"height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps})

	elapsed_time = time.time() - start_time
	print("LCM inference time: ", elapsed_time, "seconds")
	return paths, seed

	import cv2

	def video_to_frames(video_path):
	# Open the video file
	cap = cv2.VideoCapture(video_path)

	# Check if the video opened successfully
	if not cap.isOpened():
	print("Error: LCM Could not open video.")
	return

	# Read frames from the video
	pil_images = []
	while True:
	ret, frame = cap.read()
	if not ret:
	break

	# Convert BGR to RGB (OpenCV uses BGR by default)
	rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	# Convert numpy array to PIL Image
	pil_image = Image.fromarray(rgb_frame)

	# Append the PIL Image to the list
	pil_images.append(pil_image)

	# Release the video capture object
	cap.release()

	return pil_images

	def frames_to_video(pil_images, output_path, fps):
	if not pil_images:
	print("Error: No images to convert.")
	return

	img_array = []
	for pil_image in pil_images:
	img_array.append(np.array(pil_image))

	height, width, layers = img_array[0].shape
	size = (width, height)

	out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
	for i in range(len(img_array)):
	out.write(cv2.cvtColor(img_array[i], cv2.COLOR_RGB2BGR))
	out.release()

	def generate_v2v(
	prompt: str,
	video: any = None,
	strength: float = 0.8,
	seed: int = 0,
	guidance_scale: float = 8.0,
	num_inference_steps: int = 4,
	randomize_seed: bool = False,
	use_fp16: bool = True,
	use_torch_compile: bool = False,
	use_cpu: bool = False,
	fps: int = 10,
	save_frames: bool = False,
	# progress=gr.Progress(track_tqdm=True),
	width: Optional[int] = 512,
	height: Optional[int] = 512,
	num_images: Optional[int] = 1,
	) -> Image.Image:
	seed = randomize_seed_fn(seed, randomize_seed)
	torch.manual_seed(seed)

	selected_device = 'cuda'
	if use_cpu:
	selected_device = "cpu"
	if use_fp16:
	use_fp16 = False
	print("LCM warning: running on CPU, overrode FP16 with FP32")
	global pipe, scheduler
	pipe = LatentConsistencyModelImg2ImgPipeline(
	vae= pipe.vae,
	text_encoder = pipe.text_encoder,
	tokenizer = pipe.tokenizer,
	unet = pipe.unet,
	scheduler = None,
	safety_checker = pipe.safety_checker,
	feature_extractor = pipe.feature_extractor,
	requires_safety_checker = False,
	)
	# pipe = LatentConsistencyModelImg2ImgPipeline.from_pretrained(
	# "SimianLuo/LCM_Dreamshaper_v7", safety_checker = None)

	if use_fp16:
	pipe.to(torch_device=selected_device, torch_dtype=torch.float16)
	else:
	pipe.to(torch_device=selected_device, torch_dtype=torch.float32)

	# Windows does not support torch.compile for now
	if os.name != 'nt' and use_torch_compile:
	pipe.unet = torch.compile(pipe.unet, mode='max-autotune')

	frames = video_to_frames(video)
	if frames is None:
	print("Error: LCM could not convert video.")
	return
	width, height = frames[0].size

	start_time = time.time()

	results = []
	for frame in frames:
	result = pipe(
	prompt=prompt,
	image=frame,
	strength=strength,
	width=width,
	height=height,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	num_images_per_prompt=1,
	original_inference_steps=50,
	output_type="pil",
	device = selected_device
	).images
	if save_frames:
	paths = save_images(result, metadata={"prompt": prompt, "seed": seed, "width": width,
	"height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps})
	results.extend(result)

	elapsed_time = time.time() - start_time
	print("LCM vid2vid inference complete! Processing", len(frames), "frames took", elapsed_time, "seconds")

	save_dir = './outputs/LCM-vid2vid/'
	Path(save_dir).mkdir(exist_ok=True, parents=True)
	unique_id = uuid.uuid4()
	_, input_ext = os.path.splitext(video)
	output_path = save_dir + f"{unique_id}-{seed}" + f"{input_ext}"
	frames_to_video(results, output_path, fps)
	return output_path



	examples = [
	"portrait photo of a girl, photograph, highly detailed face, depth of field, moody light, golden hour, style by Dan Winters, Russell James, Steve McCurry, centered, extremely detailed, Nikon D850, award winning photography",
	"Self-portrait oil painting, a beautiful cyborg with golden hair, 8k",
	"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
	"A photo of beautiful mountain with realistic sunset and blue lake, highly detailed, masterpiece",
	]

	with gr.Blocks() as lcm:
	with gr.Tab("LCM txt2img"):
	gr.Markdown("Latent Consistency Models: Synthesizing High-Resolution Images with Few-step Inference")
	gr.Markdown("Try the guide on Colab's free tier [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/R3gm/InsightSolver-Colab/blob/main/Latent_Consistency_Models.ipynb)")
	with gr.Row():
	prompt = gr.Textbox(label="Prompt",
	show_label=False,
	lines=3,
	placeholder="Prompt",
	elem_classes=["prompt"])
	run_button = gr.Button("Run", scale=0)
	with gr.Row():
	result = gr.Gallery(
	label="Generated images", show_label=False, elem_id="gallery", grid=[2], preview=True
	)

	with gr.Accordion("Advanced options", open=False):
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	randomize=True
	)
	randomize_seed = gr.Checkbox(
	label="Randomize seed across runs", value=True)
	use_fp16 = gr.Checkbox(
	label="Run LCM in fp16 (for lower VRAM)", value=False)
	use_torch_compile = gr.Checkbox(
	label="Run LCM with torch.compile (currently not supported on Windows)", value=False)
	use_cpu = gr.Checkbox(label="Run LCM on CPU", value=True)
	with gr.Row():
	width = gr.Slider(
	label="Width",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=32,
	value=512,
	)
	height = gr.Slider(
	label="Height",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=32,
	value=512,
	)
	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance scale for base",
	minimum=2,
	maximum=14,
	step=0.1,
	value=8.0,
	)
	num_inference_steps = gr.Slider(
	label="Number of inference steps for base",
	minimum=1,
	maximum=8,
	step=1,
	value=4,
	)
	with gr.Row():
	num_images = gr.Slider(
	label="Number of images (batch count)",
	minimum=1,
	maximum=int(os.getenv("MAX_NUM_IMAGES")),
	step=1,
	value=1,
	)

	gr.Examples(
	examples=examples,
	inputs=prompt,
	outputs=result,
	fn=generate
	)

	run_button.click(
	fn=generate,
	inputs=[
	prompt,
	seed,
	width,
	height,
	guidance_scale,
	num_inference_steps,
	num_images,
	randomize_seed,
	use_fp16,
	use_torch_compile,
	use_cpu
	],
	outputs=[result, seed],
	)

	with gr.Tab("LCM img2img"):
	with gr.Row():
	prompt = gr.Textbox(label="Prompt",
	show_label=False,
	lines=3,
	placeholder="Prompt",
	elem_classes=["prompt"])
	run_i2i_button = gr.Button("Run", scale=0)
	with gr.Row():
	image_input = gr.Image(label="Upload your Image", type="pil")
	result = gr.Gallery(
	label="Generated images",
	show_label=False,
	elem_id="gallery",
	preview=True
	)

	with gr.Accordion("Advanced options", open=False):
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	randomize=True
	)
	randomize_seed = gr.Checkbox(
	label="Randomize seed across runs", value=True)
	use_fp16 = gr.Checkbox(
	label="Run LCM in fp16 (for lower VRAM)", value=False)
	use_torch_compile = gr.Checkbox(
	label="Run LCM with torch.compile (currently not supported on Windows)", value=False)
	use_cpu = gr.Checkbox(label="Run LCM on CPU", value=True)
	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance scale for base",
	minimum=2,
	maximum=14,
	step=0.1,
	value=8.0,
	)
	num_inference_steps = gr.Slider(
	label="Number of inference steps for base",
	minimum=1,
	maximum=8,
	step=1,
	value=4,
	)
	with gr.Row():
	num_images = gr.Slider(
	label="Number of images (batch count)",
	minimum=1,
	maximum=int(os.getenv("MAX_NUM_IMAGES")),
	step=1,
	value=1,
	)
	strength = gr.Slider(
	label="Prompt Strength",
	minimum=0.1,
	maximum=1.0,
	step=0.1,
	value=0.5,
	)

	run_i2i_button.click(
	fn=generate_i2i,
	inputs=[
	prompt,
	image_input,
	strength,
	seed,
	guidance_scale,
	num_inference_steps,
	num_images,
	randomize_seed,
	use_fp16,
	use_torch_compile,
	use_cpu
	],
	outputs=[result, seed],
	)


	with gr.Tab("LCM vid2vid"):

	show_v2v = False if os.getenv("SHOW_VID2VID") == "NO" else True
	gr.Markdown("Not recommended for use with CPU. Duplicate the space and modify SHOW_VID2VID to enable it. 🚫💻")
	with gr.Tabs(visible=show_v2v) as tabs:
	#with gr.Tab("", visible=show_v2v):

	with gr.Row():
	prompt = gr.Textbox(label="Prompt",
	show_label=False,
	lines=3,
	placeholder="Prompt",
	elem_classes=["prompt"])
	run_v2v_button = gr.Button("Run", scale=0)
	with gr.Row():
	video_input = gr.Video(label="Source Video")
	video_output = gr.Video(label="Generated Video")

	with gr.Accordion("Advanced options", open=False):
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	randomize=True
	)
	randomize_seed = gr.Checkbox(
	label="Randomize seed across runs", value=True)
	use_fp16 = gr.Checkbox(
	label="Run LCM in fp16 (for lower VRAM)", value=False)
	use_torch_compile = gr.Checkbox(
	label="Run LCM with torch.compile (currently not supported on Windows)", value=False)
	use_cpu = gr.Checkbox(label="Run LCM on CPU", value=True)
	save_frames = gr.Checkbox(label="Save intermediate frames", value=False)
	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance scale for base",
	minimum=2,
	maximum=14,
	step=0.1,
	value=8.0,
	)
	num_inference_steps = gr.Slider(
	label="Number of inference steps for base",
	minimum=1,
	maximum=8,
	step=1,
	value=4,
	)
	with gr.Row():
	fps = gr.Slider(
	label="Output FPS",
	minimum=1,
	maximum=200,
	step=1,
	value=10,
	)
	strength = gr.Slider(
	label="Prompt Strength",
	minimum=0.1,
	maximum=1.0,
	step=0.05,
	value=0.5,
	)

	run_v2v_button.click(
	fn=generate_v2v,
	inputs=[
	prompt,
	video_input,
	strength,
	seed,
	guidance_scale,
	num_inference_steps,
	randomize_seed,
	use_fp16,
	use_torch_compile,
	use_cpu,
	fps,
	save_frames
	],
	outputs=video_output,
	)

	if __name__ == "__main__":
	lcm.queue().launch()