Spaces:
Sleeping
Sleeping
import torch | |
from diffusers import StableDiffusionPipeline | |
import gradio as gr | |
# GPU μ¬μ© κ°λ₯ μ¬λΆ νμΈ | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# νμ΄νλΌμΈ λ‘λ© | |
pipe = StableDiffusionPipeline.from_pretrained( | |
"runwayml/stable-diffusion-v1-5", | |
torch_dtype=torch.float16 if device == "cuda" else torch.float32 | |
).to(device) | |
# μμ± ν¨μ | |
def generate(prompt): | |
image = pipe(prompt).images[0] | |
return image | |
# Gradio μΈν°νμ΄μ€ μ μ | |
interface = gr.Interface( | |
fn=generate, | |
inputs=gr.Textbox(label="ν둬ννΈλ₯Ό μ λ ₯νμΈμ", placeholder="μ: a cute caricature of a cat in a hat"), | |
outputs=gr.Image(type="pil"), | |
title="Text to Image - Stable Diffusion", | |
description="Stable Diffusionμ μ¬μ©ν ν μ€νΈ-μ΄λ―Έμ§ μμ±κΈ°μ λλ€." | |
) | |
if __name__ == "__main__": | |
interface.launch() | |
# import os | |
# import torch | |
# import random | |
# import importlib | |
# from PIL import Image | |
# from huggingface_hub import snapshot_download | |
# import gradio as gr | |
# from transformers import AutoProcessor, AutoModelForCausalLM, CLIPTextModel, CLIPTokenizer, CLIPFeatureExtractor | |
# from diffusers import StableDiffusionPipeline, DiffusionPipeline, EulerDiscreteScheduler, UNet2DConditionModel | |
# # νκ²½ μ€μ | |
# os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0" | |
# REVISION = "ceaf371f01ef66192264811b390bccad475a4f02" | |
# # λ‘컬 λ€μ΄λ‘λ | |
# LOCAL_FLORENCE = snapshot_download("microsoft/Florence-2-base", revision=REVISION) | |
# LOCAL_TURBOX = snapshot_download("tensorart/stable-diffusion-3.5-large-TurboX") | |
# # λλ°μ΄μ€ λ° dtype μ€μ | |
# device = "cuda" if torch.cuda.is_available() else "cpu" | |
# dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
# # λͺ¨λΈ λ‘λ© (λΆλΆλ³ λ‘λ© + dtype μ μ©) | |
# scheduler = EulerDiscreteScheduler.from_pretrained( | |
# LOCAL_TURBOX, subfolder="scheduler", torch_dtype=dtype | |
# ) | |
# text_encoder = CLIPTextModel.from_pretrained(LOCAL_TURBOX, subfolder="text_encoder", torch_dtype=dtype) | |
# tokenizer = CLIPTokenizer.from_pretrained(LOCAL_TURBOX, subfolder="tokenizer") | |
# feature_extractor = CLIPFeatureExtractor.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="feature_extractor") | |
# unet = UNet2DConditionModel.from_pretrained(LOCAL_TURBOX, subfolder="unet", torch_dtype=dtype) | |
# florence_model = AutoModelForCausalLM.from_pretrained( | |
# LOCAL_FLORENCE, trust_remote_code=True, torch_dtype=dtype | |
# ) | |
# florence_model.to("cpu").eval() | |
# florence_processor = AutoProcessor.from_pretrained(LOCAL_FLORENCE, trust_remote_code=True) | |
# # Stable Diffusion νμ΄νλΌμΈ | |
# pipe = DiffusionPipeline.from_pretrained( | |
# LOCAL_TURBOX, | |
# torch_dtype=dtype, | |
# trust_remote_code=True, | |
# safety_checker=None, | |
# feature_extractor=None | |
# ) | |
# pipe = pipe.to(device) | |
# pipe.scheduler = scheduler | |
# pipe.enable_attention_slicing() # λ©λͺ¨λ¦¬ μ μ½ | |
# # μμ | |
# MAX_SEED = 2**31 - 1 | |
# # ν μ€νΈ μ€νμΌλ¬ | |
# def pseudo_translate_to_korean_style(en_prompt: str) -> str: | |
# return f"Cartoon styled {en_prompt} handsome or pretty people" | |
# # ν둬ννΈ μμ± | |
# def generate_prompt(image): | |
# if not isinstance(image, Image.Image): | |
# image = Image.fromarray(image) | |
# inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to("cpu") | |
# with torch.no_grad(): | |
# generated_ids = florence_model.generate( | |
# input_ids=inputs["input_ids"], | |
# pixel_values=inputs["pixel_values"], | |
# max_new_tokens=256, | |
# num_beams=3 | |
# ) | |
# generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0] | |
# parsed_answer = florence_processor.post_process_generation( | |
# generated_text, | |
# task="<MORE_DETAILED_CAPTION>", | |
# image_size=(image.width, image.height) | |
# ) | |
# prompt_en = parsed_answer["<MORE_DETAILED_CAPTION>"] | |
# cartoon_prompt = pseudo_translate_to_korean_style(prompt_en) | |
# return cartoon_prompt | |
# # μ΄λ―Έμ§ μμ± ν¨μ | |
# def generate_image(prompt, seed=42, randomize_seed=False): | |
# if randomize_seed: | |
# seed = random.randint(0, MAX_SEED) | |
# generator = torch.Generator().manual_seed(seed) | |
# image = pipe( | |
# prompt=prompt, | |
# guidance_scale=1.5, | |
# num_inference_steps=6, # μ΅μ νλ step μ | |
# width=512, | |
# height=512, | |
# generator=generator | |
# ).images[0] | |
# return image, seed | |
# # Gradio UI | |
# with gr.Blocks() as demo: | |
# gr.Markdown("# πΌ μ΄λ―Έμ§ β μ€λͺ μμ± β μΉ΄ν° μ΄λ―Έμ§ μλ μμ±κΈ°") | |
# gr.Markdown("**π μ¬μ©λ² μλ΄ (νκ΅μ΄)**\n" | |
# "- μ΄λ―Έμ§λ₯Ό μ λ‘λνλ©΄ AIκ° μ€λͺ β μ€νμΌ λ³ν β μΉ΄ν° μ΄λ―Έμ§ μμ±κΉμ§ μλμΌλ‘ μνν©λλ€.") | |
# with gr.Row(): | |
# with gr.Column(): | |
# input_img = gr.Image(label="π¨ μλ³Έ μ΄λ―Έμ§ μ λ‘λ") | |
# run_button = gr.Button("β¨ μμ± μμ") | |
# with gr.Column(): | |
# prompt_out = gr.Textbox(label="π μ€νμΌ μ μ©λ ν둬ννΈ", lines=3, show_copy_button=True) | |
# output_img = gr.Image(label="π μμ±λ μ΄λ―Έμ§") | |
# def full_process(img): | |
# prompt = generate_prompt(img) | |
# image, seed = generate_image(prompt, randomize_seed=True) | |
# return prompt, image | |
# run_button.click(fn=full_process, inputs=[input_img], outputs=[prompt_out, output_img]) | |
# demo.launch() | |