peace / app.py
vivaceailab's picture
Update app.py
9e1bace verified
import torch
from diffusers import StableDiffusionPipeline
import gradio as gr
# GPU μ‚¬μš© κ°€λŠ₯ μ—¬λΆ€ 확인
device = "cuda" if torch.cuda.is_available() else "cpu"
# νŒŒμ΄ν”„λΌμΈ λ‘œλ”©
pipe = StableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)
# 생성 ν•¨μˆ˜
def generate(prompt):
image = pipe(prompt).images[0]
return image
# Gradio μΈν„°νŽ˜μ΄μŠ€ μ •μ˜
interface = gr.Interface(
fn=generate,
inputs=gr.Textbox(label="ν”„λ‘¬ν”„νŠΈλ₯Ό μž…λ ₯ν•˜μ„Έμš”", placeholder="예: a cute caricature of a cat in a hat"),
outputs=gr.Image(type="pil"),
title="Text to Image - Stable Diffusion",
description="Stable Diffusion을 μ‚¬μš©ν•œ ν…μŠ€νŠΈ-이미지 μƒμ„±κΈ°μž…λ‹ˆλ‹€."
)
if __name__ == "__main__":
interface.launch()
# import os
# import torch
# import random
# import importlib
# from PIL import Image
# from huggingface_hub import snapshot_download
# import gradio as gr
# from transformers import AutoProcessor, AutoModelForCausalLM, CLIPTextModel, CLIPTokenizer, CLIPFeatureExtractor
# from diffusers import StableDiffusionPipeline, DiffusionPipeline, EulerDiscreteScheduler, UNet2DConditionModel
# # ν™˜κ²½ μ„€μ •
# os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
# REVISION = "ceaf371f01ef66192264811b390bccad475a4f02"
# # 둜컬 λ‹€μš΄λ‘œλ“œ
# LOCAL_FLORENCE = snapshot_download("microsoft/Florence-2-base", revision=REVISION)
# LOCAL_TURBOX = snapshot_download("tensorart/stable-diffusion-3.5-large-TurboX")
# # λ””λ°”μ΄μŠ€ 및 dtype μ„€μ •
# device = "cuda" if torch.cuda.is_available() else "cpu"
# dtype = torch.float16 if torch.cuda.is_available() else torch.float32
# # λͺ¨λΈ λ‘œλ”© (뢀뢄별 λ‘œλ”© + dtype 적용)
# scheduler = EulerDiscreteScheduler.from_pretrained(
# LOCAL_TURBOX, subfolder="scheduler", torch_dtype=dtype
# )
# text_encoder = CLIPTextModel.from_pretrained(LOCAL_TURBOX, subfolder="text_encoder", torch_dtype=dtype)
# tokenizer = CLIPTokenizer.from_pretrained(LOCAL_TURBOX, subfolder="tokenizer")
# feature_extractor = CLIPFeatureExtractor.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="feature_extractor")
# unet = UNet2DConditionModel.from_pretrained(LOCAL_TURBOX, subfolder="unet", torch_dtype=dtype)
# florence_model = AutoModelForCausalLM.from_pretrained(
# LOCAL_FLORENCE, trust_remote_code=True, torch_dtype=dtype
# )
# florence_model.to("cpu").eval()
# florence_processor = AutoProcessor.from_pretrained(LOCAL_FLORENCE, trust_remote_code=True)
# # Stable Diffusion νŒŒμ΄ν”„λΌμΈ
# pipe = DiffusionPipeline.from_pretrained(
# LOCAL_TURBOX,
# torch_dtype=dtype,
# trust_remote_code=True,
# safety_checker=None,
# feature_extractor=None
# )
# pipe = pipe.to(device)
# pipe.scheduler = scheduler
# pipe.enable_attention_slicing() # λ©”λͺ¨λ¦¬ μ ˆμ•½
# # μƒμˆ˜
# MAX_SEED = 2**31 - 1
# # ν…μŠ€νŠΈ μŠ€νƒ€μΌλŸ¬
# def pseudo_translate_to_korean_style(en_prompt: str) -> str:
# return f"Cartoon styled {en_prompt} handsome or pretty people"
# # ν”„λ‘¬ν”„νŠΈ 생성
# def generate_prompt(image):
# if not isinstance(image, Image.Image):
# image = Image.fromarray(image)
# inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to("cpu")
# with torch.no_grad():
# generated_ids = florence_model.generate(
# input_ids=inputs["input_ids"],
# pixel_values=inputs["pixel_values"],
# max_new_tokens=256,
# num_beams=3
# )
# generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
# parsed_answer = florence_processor.post_process_generation(
# generated_text,
# task="<MORE_DETAILED_CAPTION>",
# image_size=(image.width, image.height)
# )
# prompt_en = parsed_answer["<MORE_DETAILED_CAPTION>"]
# cartoon_prompt = pseudo_translate_to_korean_style(prompt_en)
# return cartoon_prompt
# # 이미지 생성 ν•¨μˆ˜
# def generate_image(prompt, seed=42, randomize_seed=False):
# if randomize_seed:
# seed = random.randint(0, MAX_SEED)
# generator = torch.Generator().manual_seed(seed)
# image = pipe(
# prompt=prompt,
# guidance_scale=1.5,
# num_inference_steps=6, # μ΅œμ ν™”λœ step 수
# width=512,
# height=512,
# generator=generator
# ).images[0]
# return image, seed
# # Gradio UI
# with gr.Blocks() as demo:
# gr.Markdown("# πŸ–Ό 이미지 β†’ μ„€λͺ… 생성 β†’ 카툰 이미지 μžλ™ 생성기")
# gr.Markdown("**πŸ“Œ μ‚¬μš©λ²• μ•ˆλ‚΄ (ν•œκ΅­μ–΄)**\n"
# "- 이미지λ₯Ό μ—…λ‘œλ“œν•˜λ©΄ AIκ°€ μ„€λͺ… β†’ μŠ€νƒ€μΌ λ³€ν™˜ β†’ 카툰 이미지 μƒμ„±κΉŒμ§€ μžλ™μœΌλ‘œ μˆ˜ν–‰ν•©λ‹ˆλ‹€.")
# with gr.Row():
# with gr.Column():
# input_img = gr.Image(label="🎨 원본 이미지 μ—…λ‘œλ“œ")
# run_button = gr.Button("✨ 생성 μ‹œμž‘")
# with gr.Column():
# prompt_out = gr.Textbox(label="πŸ“ μŠ€νƒ€μΌ 적용된 ν”„λ‘¬ν”„νŠΈ", lines=3, show_copy_button=True)
# output_img = gr.Image(label="πŸŽ‰ μƒμ„±λœ 이미지")
# def full_process(img):
# prompt = generate_prompt(img)
# image, seed = generate_image(prompt, randomize_seed=True)
# return prompt, image
# run_button.click(fn=full_process, inputs=[input_img], outputs=[prompt_out, output_img])
# demo.launch()