Spaces:

AIML-TUDA
/

semantic-diffusion

Sleeping

App Files Files Community

semantic-diffusion / app.py

mbrack

Initial commit

2bb6dee over 1 year ago

raw history blame

No virus

11.9 kB

	from contextlib import nullcontext
	import gradio as gr
	import torch
	from torch import autocast
	from diffusers import SemanticStableDiffusionPipeline

	device = "cuda" if torch.cuda.is_available() else "cpu"

	pipe = SemanticStableDiffusionPipeline.from_pretrained("/runwayml/stable-diffusion-v1-5/")
	pipe = pipe.to(device)
	gen = torch.Generator(device=device)

	# Sometimes the nsfw checker is confused by the Pokémon images, you can disable
	# it at your own risk here
	disable_safety = False

	if disable_safety:
	def null_safety(images, **kwargs):
	return images, False
	pipe.safety_checker = null_safety


	def infer(prompt, steps, scale, seed, editing_prompt_1 = None, reverse_editing_direction_1 = False, edit_warmup_steps_1=10, edit_guidance_scale_1=5, edit_threshold_1=0.95,
	editing_prompt_2 = None, reverse_editing_direction_2 = False, edit_warmup_steps_2=10, edit_guidance_scale_2=5, edit_threshold_2=0.95,
	edit_momentum_scale=0.5, edit_mom_beta=0.6):


	gen.manual_seed(seed)
	images = pipe(prompt, guidance_scale=scale, num_inference_steps=steps, generator=gen).images

	editing_prompt = [editing_prompt_1, editing_prompt_2]
	reverse_editing_direction = [reverse_editing_direction_1, reverse_editing_direction_2]
	edit_warmup_steps = [edit_warmup_steps_1, edit_warmup_steps_2]
	edit_guidance_scale = [edit_guidance_scale_1, edit_guidance_scale_2]
	edit_threshold = [edit_threshold_1, edit_threshold_2]

	indices = [ind for ind, val in enumerate(editing_prompt) if val is None or len(val) <= 1]

	for index in sorted(indices, reverse=True):
	del editing_prompt[index]
	del reverse_editing_direction[index]
	del edit_warmup_steps[index]
	del edit_guidance_scale[index]
	del edit_threshold[index]


	gen.manual_seed(seed)
	images.extend(pipe(prompt, guidance_scale=scale, num_inference_steps=steps, generator=gen,
	editing_prompt=editing_prompt, reverse_editing_direction=reverse_editing_direction, edit_warmup_steps=edit_warmup_steps, edit_guidance_scale=edit_guidance_scale,
	edit_momentum_scale=edit_momentum_scale, edit_mom_beta=edit_mom_beta
	).images)

	return images

	css = """
	a {
	color: inherit;
	text-decoration: underline;
	}
	.gradio-container {
	font-family: 'IBM Plex Sans', sans-serif;
	}
	.gr-button {
	color: white;
	border-color: #9d66e5;
	background: #9d66e5;
	}
	input[type='range'] {
	accent-color: #9d66e5;
	}
	.dark input[type='range'] {
	accent-color: #dfdfdf;
	}
	.container {
	max-width: 730px;
	margin: auto;
	padding-top: 1.5rem;
	}
	#gallery {
	min-height: 22rem;
	margin-bottom: 15px;
	margin-left: auto;
	margin-right: auto;
	border-bottom-right-radius: .5rem !important;
	border-bottom-left-radius: .5rem !important;
	}
	#gallery>div>.h-full {
	min-height: 20rem;
	}
	.details:hover {
	text-decoration: underline;
	}
	.gr-button {
	white-space: nowrap;
	}
	.gr-button:focus {
	border-color: rgb(147 197 253 / var(--tw-border-opacity));
	outline: none;
	box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
	--tw-border-opacity: 1;
	--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
	--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
	--tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
	--tw-ring-opacity: .5;
	}
	#advanced-options {
	margin-bottom: 20px;
	}
	.footer {
	margin-bottom: 45px;
	margin-top: 35px;
	text-align: center;
	border-bottom: 1px solid #e5e5e5;
	}
	.footer>p {
	font-size: .8rem;
	display: inline-block;
	padding: 0 10px;
	transform: translateY(10px);
	background: white;
	}

	.dark .footer {
	border-color: #303030;
	}
	.dark .footer>p {
	background: #0b0f19;
	}
	.acknowledgments h4{
	margin: 1.25em 0 .25em 0;
	font-weight: bold;
	font-size: 115%;
	}
	"""

	block = gr.Blocks(css=css)

	examples = [
	[
	'a photo of a cat',
	50,
	7,
	3,
	'sunglasses',
	False,
	10,
	6,
	0.95,
	'',
	False,
	10,
	5,
	0.95
	],
	[
	'an image of a crowded boulevard, realistic, 4k',
	50,
	7,
	9,
	'crowd, crowded, people',
	True,
	10,
	8.3,
	0.9,
	'',
	False,
	10,
	5,
	0.95
	],
	[
	'a castle next to a river',
	50,
	7,
	48,
	'boat on a river',
	False,
	15,
	6,
	0.9,
	'monet, impression, sunrise',
	False,
	18,
	6,
	0.8
	],
	[
	'a portrait of a king, full body shot, 8k',
	50,
	7,
	33,
	'male',
	True,
	5,
	5,
	0.9,
	'female',
	False,
	5,
	5,
	0.9
	],
	[
	'a photo of a flowerpot',
	50,
	7,
	2,
	'glasses',
	False,
	12,
	5,
	0.975,
	'',
	False,
	10,
	5,
	0.95
	],
	[
	'a photo of the face of a woman',
	50,
	7,
	21,
	'smiling, smile',
	False,
	15,
	3,
	0.99,
	'curls, wavy hair, curly hair',
	False,
	13,
	3,
	0.925
	],
	]

	with block:
	gr.HTML(
	"""
	<div style="text-align: center; max-width: 750px; margin: 0 auto;">
	<div>
	<img class="logo" src="https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1666181274838-62fa1d95e8c9c532aa75331c.png" alt="AIML Logo"
	style="margin: auto; max-width: 7rem;">
	<h1 style="font-weight: 900; font-size: 3rem;">
	Semantic Guidance for Diffusion
	</h1>
	</div>
	<p style="margin-bottom: 10px; font-size: 94%">
	Interact with semantic concepts during the diffusion process. Details can be found in the paper <a href="https://arxiv.org/abs/2301.12247" style="text-decoration: underline;" target="_blank">SEGA: Instructing Diffusion using Semantic Dimensions</a>. <br/> Simply use the edit prompts to make arbitrary changes to the generation.
	</p>
	</div>
	"""
	)
	with gr.Group():
	with gr.Box():
	with gr.Row().style(mobile_collapse=False, equal_height=True):
	text = gr.Textbox(
	label="Enter your prompt",
	show_label=False,
	max_lines=1,
	placeholder="Enter your prompt",
	).style(
	border=(True, False, True, True),
	rounded=(True, False, False, True),
	container=False,
	)
	btn = gr.Button("Generate image").style(
	margin=False,
	rounded=(False, True, True, False),
	)
	with gr.Box():
	with gr.Row().style(mobile_collapse=False, equal_height=True):
	edit_1 = gr.Textbox(
	label="Edit Prompt 1",
	show_label=False,
	max_lines=1,
	placeholder="Enter your 1st edit prompt",
	).style(
	border=(True, False, True, True),
	rounded=(True, False, False, True),
	container=False,
	)
	with gr.Group():
	with gr.Row().style(mobile_collapse=False, equal_height=True):
	rev_1 = gr.Checkbox(
	label='Reverse')
	warmup_1 = gr.Slider(label='Warmup', minimum=0, maximum=50, value=10, step=1, interactive=True)
	scale_1 = gr.Slider(label='Scale', minimum=1, maximum=10, value=5, step=0.25, interactive=True)
	threshold_1 = gr.Slider(label='Threshold', minimum=0.5, maximum=0.99, value=0.95, steps=0.01, interactive=True)
	with gr.Row().style(mobile_collapse=False, equal_height=True):
	edit_2 = gr.Textbox(
	label="Edit Prompt 2",
	show_label=False,
	max_lines=1,
	placeholder="Enter your 2nd edit prompt",
	).style(
	border=(True, False, True, True),
	rounded=(True, False, False, True),
	container=False,
	)
	with gr.Group():
	with gr.Row().style(mobile_collapse=False, equal_height=True):
	rev_2 = gr.Checkbox(
	label='Reverse')
	warmup_2 = gr.Slider(label='Warmup', minimum=0, maximum=50, value=10, step=1, interactive=True)
	scale_2 = gr.Slider(label='Scale', minimum=1, maximum=10, value=5, step=0.25, interactive=True)
	threshold_2 = gr.Slider(label='Threshold', minimum=0.5, maximum=0.99, value=0.95, steps=0.01, interactive=True)



	gallery = gr.Gallery(
	label="Generated images", show_label=False, elem_id="gallery"
	).style(grid=[2], height="auto")


	with gr.Row(elem_id="advanced-options"):
	scale = gr.Slider(label="Scale", minimum=3, maximum=15, value=7, step=1)
	steps = gr.Slider(label="Steps", minimum=5, maximum=50, value=50, step=5, interactive=False)
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=2147483647,
	step=1,
	#randomize=True,
	)


	ex = gr.Examples(examples=examples, fn=infer, inputs=[text, steps, scale, seed, edit_1, rev_1, warmup_1, scale_1, threshold_1, edit_2, rev_2, warmup_2, scale_2, threshold_2], outputs=gallery, cache_examples=False)
	ex.dataset.headers = [""]


	text.submit(infer, inputs=[text, steps, scale, seed, edit_1, rev_1, warmup_1, scale_1, threshold_1, edit_2, rev_2, warmup_2, scale_2, threshold_2], outputs=gallery)
	btn.click(infer, inputs=[text, steps, scale, seed, edit_1, rev_1, warmup_1, scale_1, threshold_1, edit_2, rev_2, warmup_2, scale_2, threshold_2], outputs=gallery)
	gr.HTML(
	"""
	<div class="footer">
	<p> Gradio Demo by AIML@TU Darmstadt and 🤗 Hugging Face
	</p>
	</div>
	<div class="acknowledgments">
	<p>Created by <a href="https://www.aiml.informatik.tu-darmstadt.de/people/mbrack/">Manuel Brack</a> and <a href="justinpinkney.com">Patrick Schramowski</a> at <a href="https://www.aiml.informatik.tu-darmstadt.de">AIML Lab</a>.</p>
	</div>
	"""
	)

	block.launch()