Spaces:

alfredplpl
/

sd3-with-LLM

Running on Zero

App Files Files Community

sd3-with-LLM / app.py

alfredplpl

Update app.py

ea188c0 verified 4 months ago

raw

history blame

No virus

5.59 kB

	# Thanks: https://huggingface.co/spaces/stabilityai/stable-diffusion-3-medium
	import os
	import gradio as gr
	import numpy as np
	import random
	import torch
	from diffusers import StableDiffusion3Pipeline, SD3Transformer2DModel, FlowMatchEulerDiscreteScheduler
	import spaces
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	device = "cuda"
	dtype = torch.float16

	repo = "stabilityai/stable-diffusion-3-medium"
	t2i = StableDiffusion3Pipeline.from_pretrained(repo, torch_dtype=torch.float16, revision="refs/pr/26",token=os.environ["TOKEN"]).to(device)

	model = AutoModelForCausalLM.from_pretrained(
	"microsoft/Phi-3-mini-4k-instruct",
	device_map="cuda",
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	)
	tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
	upsampler = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	)

	generation_args = {
	"max_new_tokens": 300,
	"return_full_text": False,
	"temperature": 0.7,
	"do_sample": True,
	"top_p": 0.95
	}

	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = 1344

	@spaces.GPU
	def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
	messages = [
	{"role": "user", "content": "クールなアニメ風の女の子"},
	{"role": "assistant", "content": "An anime style illustration of a cool-looking teenage girl with an edgy, confident expression. She has piercing eyes, a slight smirk, and colorful hair that flows in the wind. She wears a trendy punk-inspired outfit with a leather jacket, ripped jeans, and combat boots. The background has an urban nighttime feel with city lights and graffiti to match her rebellious vibe. The colors are vibrant with high contrast to give an impactful look. The overall style captures her undeniable coolness and fearless attitude."},
	{"role": "user", "content": prompt },
	]
	output = upsampler(messages, **generation_args)
	upsampled_prompt=output[0]['generated_text']
	print(upsampled_prompt)

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	generator = torch.Generator().manual_seed(seed)

	image = t2i(
	prompt = upsampled_prompt,
	negative_prompt = negative_prompt,
	guidance_scale = guidance_scale,
	num_inference_steps = num_inference_steps,
	width = width,
	height = height,
	generator = generator
	).images[0]

	return image, seed, upsampled_prompt

	examples = [
	"美味しい肉",
	"馬に乗った宇宙飛行士",
	"アニメ風の美少女",
	]

	css="""
	#col-container {
	margin: 0 auto;
	max-width: 580px;
	}
	"""

	with gr.Blocks(css=css) as demo:

	with gr.Column(elem_id="col-container"):
	gr.Markdown(f"""
	# 日本語が入力できる SD3 Medium
	""")

	with gr.Row():

	prompt = gr.Text(
	label="プロンプト",
	show_label=False,
	max_lines=1,
	placeholder="作りたい画像の特徴を入力してください",
	container=False,
	)

	run_button = gr.Button("実行", scale=0)

	result = gr.Image(label="結果", show_label=False)
	generated_prompt = gr.Textbox(label="生成に使ったプロンプト", show_label=False, interactive=False)

	with gr.Accordion("詳細設定", open=False):

	negative_prompt = gr.Text(
	label="ネガティブプロンプト",
	max_lines=1,
	placeholder="画像から排除したい要素を入力してください",
	)

	seed = gr.Slider(
	label="乱数のシード",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)

	randomize_seed = gr.Checkbox(label="ランダム生成", value=True)

	with gr.Row():

	width = gr.Slider(
	label="横",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=64,
	value=1024,
	)

	height = gr.Slider(
	label="縦",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=64,
	value=1024,
	)

	with gr.Row():

	guidance_scale = gr.Slider(
	label="プロンプトの忠実さ",
	minimum=0.0,
	maximum=10.0,
	step=0.1,
	value=5.0,
	)

	num_inference_steps = gr.Slider(
	label="推論回数",
	minimum=1,
	maximum=50,
	step=1,
	value=28,
	)

	gr.Examples(
	examples = examples,
	inputs = [prompt]
	)
	gr.on(
	triggers=[run_button.click, prompt.submit, negative_prompt.submit],
	fn = infer,
	inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
	outputs = [result, seed, generated_prompt]
	)

	demo.launch()