Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
from diffusers import StableDiffusion3Pipeline | |
from diffusers import DiffusionPipeline | |
import torch | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model_repo_id = "stabilityai/stable-diffusion-3.5-medium" | |
image_style = "pixel art" | |
torch_dtype = torch.float32 | |
if torch.cuda.is_available(): | |
torch_dtype = torch.bfloat16 | |
def generate_description(image): | |
model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
return model(image)[0]['generated_text'] | |
def generate_image_by_description(description): | |
pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype) | |
pipe = pipe.to(device) | |
prompt = ( | |
f"Generate a high-quality, detailed image of a {image_style} of a pigeon. " | |
f"The description of the pigeon is: {description}. " | |
"Make it visually appealing with clear textures and distinct colors." | |
) | |
image = pipe( | |
prompt, | |
num_inference_steps=40, | |
guidance_scale=4.5, | |
).images[0] | |
return image | |
with gr.Blocks() as demo: | |
selected_image = gr.Image(type="filepath", label="Upload an Image of the Pigeon") | |
generate_button = gr.Button("Generate Avatar", variant="primary") | |
generated_image = gr.Image(type="numpy", label="Generated Avatar") | |
# Function chaining: generate description, then generate image without displaying text | |
def process_and_generate(image): | |
description = generate_description(image) | |
return generate_image_by_description(description) | |
generate_button.click(process_and_generate, inputs=selected_image, outputs=generated_image) | |
demo.launch() | |