File size: 15,821 Bytes
bb38035 44d2e01 1b5d55e 44d2e01 1b5d55e 5260f7d 1b5d55e dcb2d8b 5260f7d fc8839e dcb2d8b c5a017f 1b5d55e 44d2e01 1b5d55e 5260f7d 1b5d55e dcb2d8b e52e8c8 c5a017f 5260f7d 1b5d55e 0b36562 44d2e01 0b36562 44d2e01 0b36562 44d2e01 1b5d55e 44d2e01 1b5d55e 0b36562 1b5d55e 44d2e01 fc8839e 1b5d55e 44d2e01 1b5d55e 44d2e01 1b5d55e fc8839e 1b5d55e 0b36562 44d2e01 0b36562 44d2e01 0b36562 1b5d55e 9cbbe55 1b5d55e 44d2e01 1b5d55e 44d2e01 1b5d55e bb38035 1b5d55e 44d2e01 1b5d55e 44d2e01 1b5d55e 44d2e01 1b5d55e 44d2e01 febc658 1b5d55e 44d2e01 0b36562 44d2e01 0b36562 44d2e01 0b36562 44d2e01 0b36562 1b5d55e 44d2e01 1b5d55e 44d2e01 1b5d55e a94e2a0 1b5d55e a94e2a0 1b5d55e fc8839e 1b5d55e 5260f7d 1b5d55e 0b36562 44d2e01 af73a4f febc658 58ca403 af73a4f febc658 58ca403 febc658 af73a4f 9cbbe55 febc658 0b36562 bb38035 44d2e01 0b36562 44d2e01 0b36562 1b5d55e 44d2e01 fc8839e 1b5d55e 44d2e01 1b5d55e b439b95 1b5d55e fc8839e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 |
model_repo_id = "Freepik/F-Lite"
model_name = "F Lite"
from dotenv import load_dotenv
import gradio as gr
import numpy as np
import random
import os
import logging
import google.generativeai as genai
import spaces
import torch
from f_lite import FLitePipeline
# Trick required because it is not a native diffusers model
from diffusers.pipelines.pipeline_loading_utils import LOADABLE_CLASSES, ALL_IMPORTABLE_CLASSES
from f_lite.pipeline import APGConfig
LOADABLE_CLASSES["f_lite"] = LOADABLE_CLASSES["f_lite.model"] = {"DiT": ["save_pretrained", "from_pretrained"]}
ALL_IMPORTABLE_CLASSES["DiT"] = ["save_pretrained", "from_pretrained"]
load_dotenv()
# Initialize Gemini API if API key is available
if os.getenv("GEMINI_API_KEY"):
gemini_available = True
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
else:
gemini_available = False
logging.warning("GEMINI_API_KEY not found in environment variables. Prompt enrichment will not work.")
device = "cuda" if torch.cuda.is_available() else "cpu"
if torch.cuda.is_available():
torch_dtype = torch.bfloat16
else:
torch_dtype = torch.float32
pipe = FLitePipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
# pipe.enable_model_cpu_offload() # For less memory consumption
pipe.to(device)
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1600
# Predefined resolutions
RESOLUTIONS = {
"horizontal": [
{"width": 1344, "height": 896, "label": "1344×896"},
{"width": 1152, "height": 768, "label": "1152×768"},
{"width": 960, "height": 640, "label": "960×640"},
{"width": 1600, "height": 896, "label": "1600×896"}
],
"vertical": [
{"width": 896, "height": 1344, "label": "896×1344"},
{"width": 768, "height": 1152, "label": "768×1152"},
{"width": 640, "height": 960, "label": "640×960"},
{"width": 896, "height": 1600, "label": "896×1600"}
],
"square": [
{"width": 1216, "height": 1216, "label": "1216×1216"},
{"width": 1024, "height": 1024, "label": "1024×1024"}
]
}
# Default resolution
DEFAULT_RESOLUTION = {"width": 1024, "height": 1024, "label": "1024×1024"}
# Create flattened options for the dropdown
resolution_options = []
for category, resolutions in RESOLUTIONS.items():
resolution_options.append([f"{category.capitalize()}", None]) # Category header
for res in resolutions:
resolution_options.append([f" {res['label']}", f"{category}:{res['width']}:{res['height']}"])
def enrich_prompt_with_gemini(prompt, max_tokens=1024):
"""
Enrich a prompt using Google's Gemini API.
Args:
prompt: The original prompt to enrich
max_tokens: Maximum number of tokens for the response
Returns:
tuple: (enriched_prompt, error_message)
"""
try:
if not os.getenv("GEMINI_API_KEY"):
return None, "GEMINI_API_KEY not found in environment variables. Please add it to your .env file."
model = genai.GenerativeModel('gemini-1.5-flash')
enrichment_prompt = f"""
You are a prompt enhancer for image generation.
Take the following basic prompt and make it longer, very descriptive, and detailed.
Write the description in a paragraph, avoiding bullet points.
Original prompt: {prompt}
Enhanced prompt:
"""
response = model.generate_content(enrichment_prompt, generation_config={
"max_output_tokens": max_tokens,
"temperature": 1,
})
enriched_prompt = response.text.strip()
return enriched_prompt, None
except Exception as e:
error_message = f"Error enriching prompt: {str(e)}"
logging.error(error_message)
return None, error_message
# Function to update width and height based on selected resolution
def update_resolution(resolution_value):
"""Updates width and height based on selected resolution value"""
if not resolution_value:
return DEFAULT_RESOLUTION["width"], DEFAULT_RESOLUTION["height"]
try:
category, width, height = resolution_value.split(":")
return int(width), int(height)
except:
return DEFAULT_RESOLUTION["width"], DEFAULT_RESOLUTION["height"]
@spaces.GPU(duration=120)
def infer(
prompt,
negative_prompt,
seed,
randomize_seed,
width,
height,
guidance_scale,
num_inference_steps,
use_prompt_enrichment,
enable_apg,
progress=gr.Progress(track_tqdm=True),
):
enriched_prompt_str = None
error_message_str = None
generation_prompt = prompt # Default to original prompt
if use_prompt_enrichment and gemini_available:
enriched_prompt_str, error_message_str = enrich_prompt_with_gemini(prompt)
if enriched_prompt_str:
generation_prompt = enriched_prompt_str # Use enriched prompt if successful
# If enrichment fails, generation_prompt remains the original prompt
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator().manual_seed(seed)
image = pipe(
prompt=generation_prompt,
negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
width=width,
height=height,
generator=generator,
apg_config=APGConfig(enabled=enable_apg)
).images[0]
# Prepare Gradio updates for the enriched prompt display
enriched_prompt_display_update = gr.update(visible=False)
enriched_prompt_text_update = gr.update(value="")
enrichment_error_update = gr.update(visible=False, value="")
if enriched_prompt_str:
enriched_prompt_display_update = gr.update(visible=True)
enriched_prompt_text_update = gr.update(value=enriched_prompt_str)
elif error_message_str:
enriched_prompt_display_update = gr.update(visible=True)
enrichment_error_update = gr.update(visible=True, value=error_message_str)
return image, seed, enriched_prompt_display_update, enriched_prompt_text_update, enrichment_error_update
examples = [
["A photorealistic 3D render of a charming, mischievous young boy, approximately eight years old, possessing the endearingly unusual features of long, floppy donkey ears that droop playfully over his shoulders and a surprisingly small, pink pig nose that twitches slightly. His eyes, a sparkling, intelligent hazel, are wide with a hint of playful mischief, framed by slightly unruly, sandy-brown hair that falls in tousled waves across his forehead. He's dressed in a simple, slightly oversized, worn denim shirt and patched-up corduroy trousers, hinting at a life spent playing outdoors. The lighting is soft and natural, casting gentle shadows that highlight the texture of his skin – slightly freckled and sun-kissed, suggesting time spent in the sun. His expression is one of curious anticipation, his lips slightly parted as if he's about to speak or perhaps is listening intently. The background is a subtly blurred pastoral scene, perhaps a sun-dappled meadow with wildflowers, enhancing the overall whimsical and slightly surreal nature of the character. The overall style aims for a blend of realistic rendering with a touch of whimsical cartoonishness, capturing the unique juxtaposition of the boy's human features and his animalistic ears and nose.", None],
["Two white swans with long necks, gracefully swimming in a still body of water. The swans are positioned in a heart shape, with their necks intertwined, creating a romantic and elegant scene. The water is calm and reflective, reflecting the soft, golden light of the setting sun. The background is a blur of soft, golden hues, suggesting a peaceful and serene environment. The image is likely a photograph, captured with a shallow depth of field, which emphasizes the swans and creates a sense of intimacy. The soft lighting and the gentle curves of the swans create a sense of tranquility and beauty. The overall mood of the image is one of love, peace, and serenity.", None],
["""An awe-inspiring landscape of a pristine mountain lake nestled at the foot of a towering alpine range. The still water acts like a mirror, perfectly reflecting the snow-dusted peaks, scattered clouds, and lush forested slopes. The foreground includes rocky outcroppings and patches of wildflowers, adding texture and depth. The lighting is golden-hour soft, casting a warm glow across the scene and highlighting every ridge, tree, and ripple. The sky is vast and vibrant—either a sunrise palette of oranges and pinks, or a deep blue midday dome with wispy clouds. The composition radiates serenity, grandeur, and a connection to the sublime power of nature.""", None],
["A captivating photo, shot with a shallow depth of field, of a stunning blonde woman with cascading waves of platinum blonde hair that fall past her shoulders, catching the light. Her eyes, a striking shade of emerald green, are intensely focused on something just off-camera, creating a sense of intrigue. Sunlight streams softly onto her face, highlighting the delicate curve of her cheekbones and the subtle freckles scattered across her nose. She's wearing a flowing, bohemian-style maxi dress, the fabric a deep sapphire blue that complements her hair and eyes beautifully. The dress is adorned with intricate embroidery along the neckline and sleeves, adding a touch of elegance. The background is intentionally blurred, suggesting a sun-drenched garden setting with hints of vibrant flowers and lush greenery, drawing the viewer's eye to the woman's captivating features. The overall mood is serene yet captivating, evoking a feeling of summer warmth and quiet contemplation. The image should have a natural, slightly ethereal quality, with soft, diffused lighting that enhances her beauty without harsh shadows.", None],
]
css = """
#col-container {
margin: 0 auto;
max-width: 1024px;
}
.prompt-row > .gr-form {
gap: 0.5rem !important; /* Reduce gap between checkbox and button */
align-items: center; /* Align items vertically */
}
"""
with gr.Blocks(css=css, theme="ParityError/Interstellar") as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(f" # {model_name} Text-to-Image Demo")
with gr.Row(elem_classes="prompt-row"):
prompt = gr.Text(
label="Prompt",
show_label=False,
max_lines=1,
placeholder="Enter your prompt",
container=False,
scale=6 # Give prompt more space
)
use_prompt_enrichment = gr.Checkbox(
label="Enrich",
value=True if gemini_available else False,
visible=gemini_available, # Hide checkbox if Gemini not available
scale=1, # Give checkbox some space
min_width=100 # Ensure label isn't cut off
)
run_button = gr.Button("Run", scale=1, variant="primary", min_width=100)
result = gr.Image(label="Result", show_label=False)
# Enriched prompt display (outside Advanced Settings)
enriched_prompt_display = gr.Accordion("Enriched Prompt", open=False, visible=False)
with enriched_prompt_display:
enriched_prompt_text = gr.Textbox(
label="Enriched Prompt",
interactive=False,
lines=8
)
enrichment_error = gr.Textbox(
label="Error",
visible=False,
interactive=False,
)
with gr.Accordion("Advanced Settings", open=False):
negative_prompt = gr.Text(
label="Negative prompt",
max_lines=1,
placeholder="Enter a negative prompt",
visible=True,
)
with gr.Tabs() as resolution_tabs:
with gr.TabItem("Preset Resolutions"):
resolution_dropdown = gr.Dropdown(
label="Resolution",
choices=resolution_options,
value="square:1024:1024",
type="value"
)
with gr.TabItem("Custom Resolution"):
with gr.Row():
width = gr.Slider(
label="Width",
minimum=256,
maximum=MAX_IMAGE_SIZE,
step=32,
value=DEFAULT_RESOLUTION["width"],
)
height = gr.Slider(
label="Height",
minimum=256,
maximum=MAX_IMAGE_SIZE,
step=32,
value=DEFAULT_RESOLUTION["height"],
)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=42,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
with gr.Row():
guidance_scale = gr.Slider(
label="Guidance scale",
minimum=0.0,
maximum=15.0,
step=0.1,
value=6,
)
enable_apg = gr.Checkbox(
label="Enable APG",
value=True,
)
num_inference_steps = gr.Slider(
label="Number of inference steps",
minimum=1,
maximum=50,
step=1,
value=30,
)
# Examples should explicitly target only the prompt input
max_length = 180
# Function to handle example clicks - sets prompt and disables enrichment
def set_example_and_disable_enrichment(example, current_checkbox_value):
# The current_checkbox_value is not used, but required by Gradio's input mapping
return example, gr.update(value=False) # Explicitly disable enrichment
gr.Examples(
examples=examples,
# Add use_prompt_enrichment to inputs
inputs=[prompt, use_prompt_enrichment],
outputs=[prompt, use_prompt_enrichment],
fn=set_example_and_disable_enrichment, # Use the updated function
# Need to adjust example_labels to access the prompt string within the sublist
example_labels=[ex[0][:max_length] + "..." if len(ex[0]) > max_length else ex[0] for ex in examples]
)
# Add link to model card
gr.Markdown(f"[{model_name} Model Card and Weights](https://huggingface.co/{model_repo_id})")
# Update width and height when resolution dropdown changes
resolution_dropdown.change(
fn=update_resolution,
inputs=resolution_dropdown,
outputs=[width, height]
)
gr.on(
triggers=[run_button.click, prompt.submit],
fn=infer,
inputs=[
prompt,
negative_prompt,
seed,
randomize_seed,
width,
height,
guidance_scale,
num_inference_steps,
use_prompt_enrichment,
enable_apg,
],
outputs=[result, seed, enriched_prompt_display, enriched_prompt_text, enrichment_error],
)
if __name__ == "__main__":
demo.launch() # server_name="0.0.0.0", share=True)
|