Spaces:

alfredplpl
/

sd3-with-LLM

Sleeping

App Files Files Community

sd3-with-LLM / app.py

alfredplpl

Update app.py

94d1cb1 verified about 1 month ago

raw

history blame

No virus

6.82 kB

	# Thanks: https://huggingface.co/spaces/stabilityai/stable-diffusion-3-medium
	import os
	import gradio as gr
	import numpy as np
	import random
	import torch
	from diffusers import StableDiffusion3Pipeline, SD3Transformer2DModel, FlowMatchEulerDiscreteScheduler
	import spaces
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	device = "cuda"
	dtype = torch.float16

	repo = "stabilityai/stable-diffusion-3-medium"
	t2i = StableDiffusion3Pipeline.from_pretrained(repo, torch_dtype=torch.float16, revision="refs/pr/26",token=os.environ["TOKEN"]).to(device)

	model_id = "microsoft/Phi-3-medium-128k-instruct"
	upsampler = AutoModelForCausalLM.from_pretrained(
	model_id,
	device_map=device,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	)
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = 1344

	if(0):
	print("start inference...")
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	generator = torch.Generator().manual_seed(seed)

	messages = [
	{"role": "user", "content": "クールなアニメ風の少女"},
	{"role": "assistant", "content": "An anime style illustration of a cool-looking teenage girl with an edgy, confident expression. She has piercing eyes, a slight smirk, and colorful hair that flows in the wind. She wears a trendy punk-inspired outfit with a leather jacket, ripped jeans, and combat boots. The background has an urban nighttime feel with city lights and graffiti to match her rebellious vibe. The colors are vibrant with high contrast to give an impactful look. The overall style captures her undeniable coolness and fearless attitude."},
	{"role": "user", "content": "美味しそうな肉"},
	{"role": "assistant", "content": "A gourmet scene in a high-end restaurant kitchen where a chef is presenting a plate of cooked beef testicles, garnished elegantly with herbs and spices. The chef, a middle-aged Caucasian man wearing a white chef's hat and coat, is inspecting the dish with a satisfied expression. The kitchen background is bustling with other chefs and kitchen staff, and the atmosphere is warm and inviting with hanging pots and pans, and a glowing, busy stove in the background. The focus is on the chef's proud presentation of this unusual but delicately prepared dish."},
	{"role": "user", "content": prompt},
	]
	tokenized_input = tokenizer.apply_chat_templete(messages, add_generation_prompt=True, return_tensors="pt")
	with torch.inference_mode():
	output = upsampler.generate(
	tokenized_input.to(upsampler.device),
	max_new_tokens=512,
	do_sample=True,
	top_p=0.95,
	temperature=0.7,
	repetition_penalty=1.05,
	)[0]
	print(tokenizer.decode(output))
	upsampled_prompt=tokenizer.decode(output)

	@spaces.GPU
	def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
	upsampled_prompt="An anime style illustration of a cool-looking teenage girl with an edgy, confident expression. She has piercing eyes, a slight smirk, and colorful hair that flows in the wind. She wears a trendy punk-inspired outfit with a leather jacket, ripped jeans, and combat boots. The background has an urban nighttime feel with city lights and graffiti to match her rebellious vibe. The colors are vibrant with high contrast to give an impactful look. The overall style captures her undeniable coolness and fearless attitude."
	print(upsampled_prompt)

	image = t2i(
	prompt = upsampled_prompt,
	negative_prompt = negative_prompt,
	guidance_scale = guidance_scale,
	num_inference_steps = num_inference_steps,
	width = width,
	height = height,
	generator = generator
	).images[0]

	return image, seed

	examples = [
	"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
	"An astronaut riding a green horse",
	"A delicious ceviche cheesecake slice",
	]

	css="""
	#col-container {
	margin: 0 auto;
	max-width: 580px;
	}
	"""

	with gr.Blocks(css=css) as demo:

	with gr.Column(elem_id="col-container"):
	gr.Markdown(f"""
	# 日本語が入力できる [SD3 Medium](https://huggingface.co/stabilityai/stable-diffusion-3-medium)
	""")

	with gr.Row():

	prompt = gr.Text(
	label="Prompt",
	show_label=False,
	max_lines=1,
	placeholder="Enter your prompt",
	container=False,
	)

	run_button = gr.Button("Run", scale=0)

	result = gr.Image(label="Result", show_label=False)

	with gr.Accordion("Advanced Settings", open=False):

	negative_prompt = gr.Text(
	label="Negative prompt",
	max_lines=1,
	placeholder="Enter a negative prompt",
	)

	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)

	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

	with gr.Row():

	width = gr.Slider(
	label="Width",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=64,
	value=1024,
	)

	height = gr.Slider(
	label="Height",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=64,
	value=1024,
	)

	with gr.Row():

	guidance_scale = gr.Slider(
	label="Guidance scale",
	minimum=0.0,
	maximum=10.0,
	step=0.1,
	value=5.0,
	)

	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=1,
	maximum=50,
	step=1,
	value=28,
	)

	gr.Examples(
	examples = examples,
	inputs = [prompt]
	)
	gr.on(
	triggers=[run_button.click, prompt.submit, negative_prompt.submit],
	fn = infer,
	inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
	outputs = [result, seed]
	)

	demo.launch()