akhaliq HF Staff commited on
Commit
7b7fb18
·
1 Parent(s): 09787f9

update text to image

Browse files
Files changed (1) hide show
  1. app.py +110 -4
app.py CHANGED
@@ -2127,6 +2127,106 @@ def cleanup_temp_media_files():
2127
  except Exception as e:
2128
  print(f"[TempCleanup] Error during cleanup: {str(e)}")
2129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2130
  def generate_image_with_qwen(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
2131
  """Generate image using Qwen image model via Hugging Face InferenceClient and upload to HF for permanent URL"""
2132
  try:
@@ -2649,7 +2749,7 @@ def create_image_replacement_blocks(html_content: str, user_prompt: str) -> str:
2649
  # Generate images for each prompt
2650
  generated_images = []
2651
  for i, prompt in enumerate(image_prompts):
2652
- image_html = generate_image_with_qwen(prompt, i, token=None) # TODO: Pass token from parent context
2653
  if not image_html.startswith("Error"):
2654
  generated_images.append((i, image_html))
2655
 
@@ -2739,7 +2839,7 @@ def create_image_replacement_blocks_text_to_image_single(html_content: str, prom
2739
  placeholder_images = re.findall(img_pattern, html_content)
2740
 
2741
  # Generate a single image
2742
- image_html = generate_image_with_qwen(prompt, 0, token=None) # TODO: Pass token from parent context
2743
  if image_html.startswith("Error"):
2744
  return ""
2745
 
@@ -3270,12 +3370,18 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
3270
  print(f"[MediaApply] Running text-to-image with prompt len={len(t2i_prompt)}")
3271
  # Single-image flow for text-to-image (LLM placement first, fallback deterministic)
3272
  try:
3273
- image_html_tag = generate_image_with_qwen(t2i_prompt, 0, token=token)
 
 
3274
  if not (image_html_tag or "").startswith("Error"):
 
3275
  blocks = llm_place_media(result, image_html_tag, media_kind="image")
 
3276
  else:
 
3277
  blocks = ""
3278
- except Exception:
 
3279
  blocks = ""
3280
  if not blocks:
3281
  blocks = create_image_replacement_blocks_text_to_image_single(result, t2i_prompt)
 
2127
  except Exception as e:
2128
  print(f"[TempCleanup] Error during cleanup: {str(e)}")
2129
 
2130
+ def generate_image_with_gemini(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
2131
+ """Generate image using Google Gemini 2.5 Flash Image Preview via OpenRouter.
2132
+
2133
+ Uses google/gemini-2.5-flash-image-preview:free via OpenRouter chat completions API.
2134
+
2135
+ Returns an HTML <img> tag whose src is an uploaded temporary URL.
2136
+ """
2137
+ try:
2138
+ print(f"[Text2Image] Starting generation with prompt: {prompt[:100]}...")
2139
+ # Check for OpenRouter API key
2140
+ openrouter_key = os.getenv('OPENROUTER_API_KEY')
2141
+ if not openrouter_key:
2142
+ print("[Text2Image] Missing OPENROUTER_API_KEY")
2143
+ return "Error: OPENROUTER_API_KEY environment variable is not set. Please set it to your OpenRouter API key."
2144
+
2145
+ import requests
2146
+ import json as _json
2147
+ import base64
2148
+ import io as _io
2149
+ from PIL import Image
2150
+
2151
+ # Create the chat completion request for text-to-image
2152
+ headers = {
2153
+ "Authorization": f"Bearer {openrouter_key}",
2154
+ "Content-Type": "application/json"
2155
+ }
2156
+
2157
+ data = {
2158
+ "model": "google/gemini-2.5-flash-image-preview:free",
2159
+ "messages": [
2160
+ {
2161
+ "role": "user",
2162
+ "content": f"Generate an image based on this description: {prompt}"
2163
+ }
2164
+ ],
2165
+ "temperature": 0.7,
2166
+ "max_tokens": 1000
2167
+ }
2168
+
2169
+ try:
2170
+ print("[Text2Image] Making API request to OpenRouter...")
2171
+ response = requests.post(
2172
+ "https://openrouter.ai/api/v1/chat/completions",
2173
+ headers=headers,
2174
+ json=data,
2175
+ timeout=60
2176
+ )
2177
+ response.raise_for_status()
2178
+ result_data = response.json()
2179
+ print(f"[Text2Image] Received API response: {_json.dumps(result_data, indent=2)}")
2180
+
2181
+ # Extract the generated image from the response (using same pattern as image-to-image)
2182
+ message = result_data.get('choices', [{}])[0].get('message', {})
2183
+
2184
+ if message and 'images' in message and message['images']:
2185
+ # Get the first image from the 'images' list
2186
+ image_data = message['images'][0]
2187
+ base64_string = image_data.get('image_url', {}).get('url', '')
2188
+
2189
+ if base64_string and ',' in base64_string:
2190
+ # Remove the "data:image/png;base64," prefix
2191
+ base64_content = base64_string.split(',')[1]
2192
+
2193
+ # Decode the base64 string and create a PIL image
2194
+ img_bytes = base64.b64decode(base64_content)
2195
+ generated_image = Image.open(_io.BytesIO(img_bytes))
2196
+
2197
+ # Convert PIL image to JPEG bytes for upload
2198
+ out_buf = _io.BytesIO()
2199
+ generated_image.convert('RGB').save(out_buf, format='JPEG', quality=90, optimize=True)
2200
+ image_bytes = out_buf.getvalue()
2201
+ else:
2202
+ raise RuntimeError(f"API returned an invalid image format. Response: {_json.dumps(result_data, indent=2)}")
2203
+ else:
2204
+ raise RuntimeError(f"API did not return an image. Full Response: {_json.dumps(result_data, indent=2)}")
2205
+
2206
+ except requests.exceptions.HTTPError as err:
2207
+ error_body = err.response.text
2208
+ if err.response.status_code == 401:
2209
+ return "Error: Authentication failed. Check your OpenRouter API key."
2210
+ elif err.response.status_code == 429:
2211
+ return "Error: Rate limit exceeded or insufficient credits. Check your OpenRouter account."
2212
+ else:
2213
+ return f"Error: An API error occurred: {error_body}"
2214
+ except Exception as e:
2215
+ return f"Error: An unexpected error occurred: {str(e)}"
2216
+
2217
+ # Upload and return HTML tag
2218
+ print("[Text2Image] Uploading image to HF...")
2219
+ filename = f"generated_image_{image_index}.jpg"
2220
+ temp_url = upload_media_to_hf(image_bytes, filename, "image", token, use_temp=True)
2221
+ if temp_url.startswith("Error"):
2222
+ print(f"[Text2Image] Upload failed: {temp_url}")
2223
+ return temp_url
2224
+ print(f"[Text2Image] Successfully generated image: {temp_url}")
2225
+ return f"<img src=\"{temp_url}\" alt=\"{prompt}\" style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\" loading=\"lazy\" />"
2226
+ except Exception as e:
2227
+ print(f"Text-to-image generation error: {str(e)}")
2228
+ return f"Error generating image (text-to-image): {str(e)}"
2229
+
2230
  def generate_image_with_qwen(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
2231
  """Generate image using Qwen image model via Hugging Face InferenceClient and upload to HF for permanent URL"""
2232
  try:
 
2749
  # Generate images for each prompt
2750
  generated_images = []
2751
  for i, prompt in enumerate(image_prompts):
2752
+ image_html = generate_image_with_gemini(prompt, i, token=None) # TODO: Pass token from parent context
2753
  if not image_html.startswith("Error"):
2754
  generated_images.append((i, image_html))
2755
 
 
2839
  placeholder_images = re.findall(img_pattern, html_content)
2840
 
2841
  # Generate a single image
2842
+ image_html = generate_image_with_gemini(prompt, 0, token=None) # TODO: Pass token from parent context
2843
  if image_html.startswith("Error"):
2844
  return ""
2845
 
 
3370
  print(f"[MediaApply] Running text-to-image with prompt len={len(t2i_prompt)}")
3371
  # Single-image flow for text-to-image (LLM placement first, fallback deterministic)
3372
  try:
3373
+ print(f"[MediaApply] Calling generate_image_with_gemini with prompt: {t2i_prompt[:50]}...")
3374
+ image_html_tag = generate_image_with_gemini(t2i_prompt, 0, token=token)
3375
+ print(f"[MediaApply] Image generation result: {image_html_tag[:200]}...")
3376
  if not (image_html_tag or "").startswith("Error"):
3377
+ print("[MediaApply] Attempting LLM placement of image...")
3378
  blocks = llm_place_media(result, image_html_tag, media_kind="image")
3379
+ print(f"[MediaApply] LLM placement result: {len(blocks) if blocks else 0} chars")
3380
  else:
3381
+ print(f"[MediaApply] Image generation failed: {image_html_tag}")
3382
  blocks = ""
3383
+ except Exception as e:
3384
+ print(f"[MediaApply] Exception during image generation: {str(e)}")
3385
  blocks = ""
3386
  if not blocks:
3387
  blocks = create_image_replacement_blocks_text_to_image_single(result, t2i_prompt)