Spaces:

MohamedRashad
/

Infinity

Running on Zero

App Files Files Community

MohamedRashad commited on 25 days ago

Commit

836dd96

1 Parent(s): c0ec201

Add prompt enhancement functionality and integrate Gradio client in app.py; update requirements.txt

Browse files

Files changed (2) hide show

app.py +68 -5
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ import re
 import random
 from pathlib import Path
 from typing import List
 import cv2
 import numpy as np
@@ -29,8 +30,10 @@ import spaces
 from models.infinity import Infinity
 from models.basic import *
 from utils.dynamic_resolution import dynamic_resolution_h_w, h_div_w_templates
 torch._dynamo.config.cache_size_limit = 64
 # Define a function to download weights if not present
 def download_infinity_weights(weights_path):
@@ -357,6 +360,60 @@ def load_transformer(vae, args):
     )
     return infinity
 # Set up paths
 weights_path = Path(__file__).parent / 'weights'
 weights_path.mkdir(exist_ok=True)
@@ -380,7 +437,6 @@ args = argparse.Namespace(
     rope2d_normalized_by_hw=2,
     use_scale_schedule_embedding=0,
     sampling_per_bits=1,
-    text_encoder_ckpt=str(weights_path / 'flan-t5-xl'),
     text_channels=2048,
     apply_spatial_patchify=0,
     h_div_w_template=1.000,
@@ -400,7 +456,7 @@ infinity = load_transformer(vae, args)
 # Define the image generation function
 @spaces.GPU
-def generate_image(prompt, cfg, tau, h_div_w, seed, enable_positive_prompt):
     try:
         args.prompt = prompt
         args.cfg = cfg
@@ -454,8 +510,8 @@ with gr.Blocks() as demo:
             # Prompt Settings
             gr.Markdown("### Prompt Settings")
             prompt = gr.Textbox(label="Prompt", value="alien spaceship enterprise", placeholder="Enter your prompt here...")
-            enable_positive_prompt = gr.Checkbox(label="Enable Positive Prompt", value=False, info="Enhance prompts with positive attributes for faces.")
             # Image Settings
             gr.Markdown("### Image Settings")
             with gr.Row():
@@ -477,10 +533,17 @@ with gr.Blocks() as demo:
     # Error Handling
     error_message = gr.Textbox(label="Error Message", visible=False)
     # Link the generate button to the image generation function
     generate_button.click(
         generate_image,
-        inputs=[prompt, cfg, tau, h_div_w, seed, enable_positive_prompt],
         outputs=output_image
     )

 import random
 from pathlib import Path
 from typing import List
+import json
 import cv2
 import numpy as np
 from models.infinity import Infinity
 from models.basic import *
 from utils.dynamic_resolution import dynamic_resolution_h_w, h_div_w_templates
+from gradio_client import Client
 torch._dynamo.config.cache_size_limit = 64
+client = Client("Qwen/Qwen2.5-72B-Instruct")
 # Define a function to download weights if not present
 def download_infinity_weights(weights_path):
     )
     return infinity
+def enhance_prompt(prompt):
+    SYSTEM = """You are part of a team of bots that creates images. You work with an assistant bot that will draw anything you say.
+When given a user prompt, your role is to transform it into a creative, detailed, and vivid image description. Additionally, you will assign a configuration value (`cfg`) based on the type of image.
+### Guidelines for Generating the Output:
+1. **Output Format:**
+   Your response must be in the following dictionary format:
+   ```json
+   {
+     "prompt": "<enhanced image description>",
+     "cfg": <cfg value>
+   }
+   ```
+2. **Enhancing the "prompt" field:**
+   - Use your creativity to transform short or vague prompts into highly detailed, descriptive, and imaginative image descriptions.
+   - Preserve the original intent and meaning of the user’s input.
+   - Focus on vivid imagery, sensory details, and emotional resonance in your descriptions.
+   - For particularly long user prompts (over 50 words), output them directly without refinement.
+   - Image descriptions must remain between 8-512 words. Any excess text will be ignored.
+   - If the user's request involves rendering specific text in the image, enclose that text in single quotation marks and prefix it with "the text".
+3. **Determining the "cfg" field:**
+   - If the image to be generated is likely to feature a clear face, set `"cfg": 1`.
+   - If the image does not prominently feature a face, set `"cfg": 3`.
+4. **Examples of Enhanced Prompts:**
+   - **User prompt:** "a tree"
+     **Enhanced prompt:** "A photo of a majestic oak tree stands proudly in the middle of a sunlit meadow, its branches stretching out like welcoming arms. The leaves shimmer in shades of vibrant green, casting dappled shadows on the soft grass below."
+     **Cfg:** `3`
+   - **User prompt:** "a cat by the window"
+     **Enhanced prompt:** "A serene scene of a fluffy tabby cat perched on the windowsill, gazing out at the golden hues of a sunset. The soft light filters through lace curtains, highlighting the cat’s delicate whiskers and its relaxed posture."
+     **Cfg:** `3`
+5. **Your Output:**
+   Always return a single dictionary containing both `"prompt"` and `"cfg"` fields. Avoid any additional commentary or explanations.
+Don't write anything except the dictionary in the output. (Don't start with ```)
+"""
+    result = client.predict(
+            query=prompt,
+            history=[],
+            system=SYSTEM,
+            api_name="/model_chat"
+    )
+    dict_of_inputs = json.loads(result[1][-1][-1])
+    print(dict_of_inputs)
+    return gr.update(value=dict_of_inputs["prompt"]), gr.update(value=float(dict_of_inputs['cfg']))
 # Set up paths
 weights_path = Path(__file__).parent / 'weights'
 weights_path.mkdir(exist_ok=True)
     rope2d_normalized_by_hw=2,
     use_scale_schedule_embedding=0,
     sampling_per_bits=1,
     text_channels=2048,
     apply_spatial_patchify=0,
     h_div_w_template=1.000,
 # Define the image generation function
 @spaces.GPU
+def generate_image(prompt, cfg, tau, h_div_w, seed, enable_positive_prompt=False):
     try:
         args.prompt = prompt
         args.cfg = cfg
             # Prompt Settings
             gr.Markdown("### Prompt Settings")
             prompt = gr.Textbox(label="Prompt", value="alien spaceship enterprise", placeholder="Enter your prompt here...")
+            enhance_prompt_button = gr.Button("Enhance Prompt", variant="secondary")
             # Image Settings
             gr.Markdown("### Image Settings")
             with gr.Row():
     # Error Handling
     error_message = gr.Textbox(label="Error Message", visible=False)
+    # Link the enhance prompt button to the prompt enhancement function
+    enhance_prompt_button.click(
+        enhance_prompt,
+        inputs=prompt,
+        outputs=[prompt, cfg],
+    )
     # Link the generate button to the image generation function
     generate_button.click(
         generate_image,
+        inputs=[prompt, cfg, tau, h_div_w, seed],
         outputs=output_image
     )

requirements.txt CHANGED Viewed

@@ -6,4 +6,6 @@ transformers
 argparse
 spaces
 torchvision
-timm

 argparse
 spaces
 torchvision
+timm
+gradio_client
+imageio