Spaces:

akhaliq
/

anycoder

Running

App Files Files Community

akhaliq HF Staff commited on 8 days ago

Commit

7b7fb18

1 Parent(s): 09787f9

update text to image

Browse files

Files changed (1) hide show

app.py +110 -4

app.py CHANGED Viewed

@@ -2127,6 +2127,106 @@ def cleanup_temp_media_files():
     except Exception as e:
         print(f"[TempCleanup] Error during cleanup: {str(e)}")
 def generate_image_with_qwen(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
     """Generate image using Qwen image model via Hugging Face InferenceClient and upload to HF for permanent URL"""
     try:
@@ -2649,7 +2749,7 @@ def create_image_replacement_blocks(html_content: str, user_prompt: str) -> str:
     # Generate images for each prompt
     generated_images = []
     for i, prompt in enumerate(image_prompts):
-        image_html = generate_image_with_qwen(prompt, i, token=None)  # TODO: Pass token from parent context
         if not image_html.startswith("Error"):
             generated_images.append((i, image_html))
@@ -2739,7 +2839,7 @@ def create_image_replacement_blocks_text_to_image_single(html_content: str, prom
         placeholder_images = re.findall(img_pattern, html_content)
     # Generate a single image
-    image_html = generate_image_with_qwen(prompt, 0, token=None)  # TODO: Pass token from parent context
     if image_html.startswith("Error"):
         return ""
@@ -3270,12 +3370,18 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
             print(f"[MediaApply] Running text-to-image with prompt len={len(t2i_prompt)}")
             # Single-image flow for text-to-image (LLM placement first, fallback deterministic)
             try:
-                image_html_tag = generate_image_with_qwen(t2i_prompt, 0, token=token)
                 if not (image_html_tag or "").startswith("Error"):
                     blocks = llm_place_media(result, image_html_tag, media_kind="image")
                 else:
                     blocks = ""
-            except Exception:
                 blocks = ""
             if not blocks:
                 blocks = create_image_replacement_blocks_text_to_image_single(result, t2i_prompt)

     except Exception as e:
         print(f"[TempCleanup] Error during cleanup: {str(e)}")
+def generate_image_with_gemini(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
+    """Generate image using Google Gemini 2.5 Flash Image Preview via OpenRouter.
+    Uses google/gemini-2.5-flash-image-preview:free via OpenRouter chat completions API.
+    Returns an HTML <img> tag whose src is an uploaded temporary URL.
+    """
+    try:
+        print(f"[Text2Image] Starting generation with prompt: {prompt[:100]}...")
+        # Check for OpenRouter API key
+        openrouter_key = os.getenv('OPENROUTER_API_KEY')
+        if not openrouter_key:
+            print("[Text2Image] Missing OPENROUTER_API_KEY")
+            return "Error: OPENROUTER_API_KEY environment variable is not set. Please set it to your OpenRouter API key."
+        import requests
+        import json as _json
+        import base64
+        import io as _io
+        from PIL import Image
+        # Create the chat completion request for text-to-image
+        headers = {
+            "Authorization": f"Bearer {openrouter_key}",
+            "Content-Type": "application/json"
+        }
+        data = {
+            "model": "google/gemini-2.5-flash-image-preview:free",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": f"Generate an image based on this description: {prompt}"
+                }
+            ],
+            "temperature": 0.7,
+            "max_tokens": 1000
+        }
+        try:
+            print("[Text2Image] Making API request to OpenRouter...")
+            response = requests.post(
+                "https://openrouter.ai/api/v1/chat/completions",
+                headers=headers,
+                json=data,
+                timeout=60
+            )
+            response.raise_for_status()
+            result_data = response.json()
+            print(f"[Text2Image] Received API response: {_json.dumps(result_data, indent=2)}")
+            # Extract the generated image from the response (using same pattern as image-to-image)
+            message = result_data.get('choices', [{}])[0].get('message', {})
+            if message and 'images' in message and message['images']:
+                # Get the first image from the 'images' list
+                image_data = message['images'][0]
+                base64_string = image_data.get('image_url', {}).get('url', '')
+                if base64_string and ',' in base64_string:
+                    # Remove the "data:image/png;base64," prefix
+                    base64_content = base64_string.split(',')[1]
+                    # Decode the base64 string and create a PIL image
+                    img_bytes = base64.b64decode(base64_content)
+                    generated_image = Image.open(_io.BytesIO(img_bytes))
+                    # Convert PIL image to JPEG bytes for upload
+                    out_buf = _io.BytesIO()
+                    generated_image.convert('RGB').save(out_buf, format='JPEG', quality=90, optimize=True)
+                    image_bytes = out_buf.getvalue()
+                else:
+                    raise RuntimeError(f"API returned an invalid image format. Response: {_json.dumps(result_data, indent=2)}")
+            else:
+                raise RuntimeError(f"API did not return an image. Full Response: {_json.dumps(result_data, indent=2)}")
+        except requests.exceptions.HTTPError as err:
+            error_body = err.response.text
+            if err.response.status_code == 401:
+                return "Error: Authentication failed. Check your OpenRouter API key."
+            elif err.response.status_code == 429:
+                return "Error: Rate limit exceeded or insufficient credits. Check your OpenRouter account."
+            else:
+                return f"Error: An API error occurred: {error_body}"
+        except Exception as e:
+            return f"Error: An unexpected error occurred: {str(e)}"
+        # Upload and return HTML tag
+        print("[Text2Image] Uploading image to HF...")
+        filename = f"generated_image_{image_index}.jpg"
+        temp_url = upload_media_to_hf(image_bytes, filename, "image", token, use_temp=True)
+        if temp_url.startswith("Error"):
+            print(f"[Text2Image] Upload failed: {temp_url}")
+            return temp_url
+        print(f"[Text2Image] Successfully generated image: {temp_url}")
+        return f"<img src=\"{temp_url}\" alt=\"{prompt}\" style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\" loading=\"lazy\" />"
+    except Exception as e:
+        print(f"Text-to-image generation error: {str(e)}")
+        return f"Error generating image (text-to-image): {str(e)}"
 def generate_image_with_qwen(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
     """Generate image using Qwen image model via Hugging Face InferenceClient and upload to HF for permanent URL"""
     try:
     # Generate images for each prompt
     generated_images = []
     for i, prompt in enumerate(image_prompts):
+        image_html = generate_image_with_gemini(prompt, i, token=None)  # TODO: Pass token from parent context
         if not image_html.startswith("Error"):
             generated_images.append((i, image_html))
         placeholder_images = re.findall(img_pattern, html_content)
     # Generate a single image
+    image_html = generate_image_with_gemini(prompt, 0, token=None)  # TODO: Pass token from parent context
     if image_html.startswith("Error"):
         return ""
             print(f"[MediaApply] Running text-to-image with prompt len={len(t2i_prompt)}")
             # Single-image flow for text-to-image (LLM placement first, fallback deterministic)
             try:
+                print(f"[MediaApply] Calling generate_image_with_gemini with prompt: {t2i_prompt[:50]}...")
+                image_html_tag = generate_image_with_gemini(t2i_prompt, 0, token=token)
+                print(f"[MediaApply] Image generation result: {image_html_tag[:200]}...")
                 if not (image_html_tag or "").startswith("Error"):
+                    print("[MediaApply] Attempting LLM placement of image...")
                     blocks = llm_place_media(result, image_html_tag, media_kind="image")
+                    print(f"[MediaApply] LLM placement result: {len(blocks) if blocks else 0} chars")
                 else:
+                    print(f"[MediaApply] Image generation failed: {image_html_tag}")
                     blocks = ""
+            except Exception as e:
+                print(f"[MediaApply] Exception during image generation: {str(e)}")
                 blocks = ""
             if not blocks:
                 blocks = create_image_replacement_blocks_text_to_image_single(result, t2i_prompt)