Spaces:

ByteDance
/

DreamO

Running on Zero

App Files Files Community

UI/UX suggestions and not using hte turbo model for quality trade off

by multimodalart HF Staff - opened 5 days ago

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+42

-12

Files changed (1) hide show

app.py +42 -12

app.py CHANGED Viewed

@@ -63,7 +63,7 @@ class Generator:
         # load dreamo
         model_root = 'black-forest-labs/FLUX.1-dev'
         dreamo_pipeline = DreamOPipeline.from_pretrained(model_root, torch_dtype=torch.bfloat16)
-        dreamo_pipeline.load_dreamo_model(device, use_turbo=True)
         self.dreamo_pipeline = dreamo_pipeline.to(device)
     @torch.no_grad()
@@ -107,7 +107,7 @@ def generate_image(
     width=1024,
     height=1024,
     ref_res=512,
-    num_steps=12,
     guidance=3.5,
     true_cfg=1,
     cfg_start_step=0,
@@ -185,11 +185,26 @@ If DreamO is helpful, please help to ⭐ the <a href='https://github.com/bytedan
 If you have any questions or feedbacks, feel free to open a discussion or contact <b>wuyanze123@gmail.com</b> and <b>eechongm@gmail.com</b>
 """  # noqa E501
 def create_demo():
     with gr.Blocks() as demo:
-        gr.Markdown(_HEADER_)
         with gr.Row():
             with gr.Column():
@@ -197,14 +212,29 @@ def create_demo():
                     ref_image1 = gr.Image(label="ref image 1", type="numpy", height=256)
                     ref_image2 = gr.Image(label="ref image 2", type="numpy", height=256)
                 with gr.Row():
-                    ref_task1 = gr.Dropdown(choices=["ip", "id", "style"], value="ip", label="task for ref image 1")
-                    ref_task2 = gr.Dropdown(choices=["ip", "id", "style"], value="ip", label="task for ref image 2")
                 prompt = gr.Textbox(label="Prompt", value="a person playing guitar in the street")
                 width = gr.Slider(768, 1024, 1024, step=16, label="Width")
                 height = gr.Slider(768, 1024, 1024, step=16, label="Height")
-                num_steps = gr.Slider(8, 30, 12, step=1, label="Number of steps")
                 guidance = gr.Slider(1.0, 10.0, 3.5, step=0.1, label="Guidance")
                 seed = gr.Textbox(label="Seed (-1 for random)", value="-1")
                 with gr.Accordion("Advanced Options", open=False, visible=False):
                     ref_res = gr.Slider(512, 1024, 512, step=16, label="resolution for ref image")
                     neg_prompt = gr.Textbox(label="Neg Prompt", value="")
@@ -230,8 +260,8 @@ def create_demo():
                 [
                     'example_inputs/woman1.png',
                     None,
-                    'ip',
-                    'ip',
                     'profile shot dark photo of a 25-year-old female with smoke escaping from her mouth, the backlit smoke gives the image an ephemeral quality, natural face, natural eyebrows, natural skin texture, award winning photo, highly detailed face, atmospheric lighting, film grain, monochrome',  # noqa E501
                     9180879731249039735,
                 ],
@@ -262,7 +292,7 @@ def create_demo():
                 [
                     'example_inputs/hinton.jpeg',
                     None,
-                    'id',
                     'ip',
                     'portrait, Chibi',
                     5443415087540486371,
@@ -270,7 +300,7 @@ def create_demo():
                 [
                     'example_inputs/mickey.png',
                     None,
-                    'style',
                     'ip',
                     'generate a same style image. A rooster wearing overalls.',
                     6245580464677124951,
@@ -294,7 +324,7 @@ def create_demo():
                 [
                     'example_inputs/woman2.png',
                     'example_inputs/dress.png',
-                    'id',
                     'ip',
                     'the woman wearing a dress, In the banquet hall',
                     7698454872441022867,
@@ -362,4 +392,4 @@ def create_demo():
 if __name__ == '__main__':
     demo = create_demo()
-    demo.launch()

         # load dreamo
         model_root = 'black-forest-labs/FLUX.1-dev'
         dreamo_pipeline = DreamOPipeline.from_pretrained(model_root, torch_dtype=torch.bfloat16)
+        dreamo_pipeline.load_dreamo_model(device, use_turbo=False) # MODIFIED: use_turbo=False
         self.dreamo_pipeline = dreamo_pipeline.to(device)
     @torch.no_grad()
     width=1024,
     height=1024,
     ref_res=512,
+    num_steps=28, # MODIFIED: default num_steps to 28
     guidance=3.5,
     true_cfg=1,
     cfg_start_step=0,
 If you have any questions or feedbacks, feel free to open a discussion or contact <b>wuyanze123@gmail.com</b> and <b>eechongm@gmail.com</b>
 """  # noqa E501
+# MODIFIED: Function to update guidance based on task selection
+def update_guidance_on_task_selection(task1_value, task2_value, current_guidance_value_from_slider):
+    # current_guidance_value_from_slider is a float from the slider state
+    is_identity_selected = (task1_value == "id" or task2_value == "id")
+    if is_identity_selected:
+        return gr.update(value=1.5)
+    else:
+        # If no identity task is selected, and current guidance is 1.5 (was likely set by previous identity task),
+        # revert to original default (3.5). Otherwise, keep user's manual setting.
+        if float(current_guidance_value_from_slider) == 1.5:
+            return gr.update(value=3.5) # Default slider value
+        return gr.update() # No change, keep current value
 def create_demo():
     with gr.Blocks() as demo:
+        # MODIFIED: User guide in a closed Accordion
+        with gr.Accordion("User Guide", open=False):
+            gr.Markdown(_HEADER_)
         with gr.Row():
             with gr.Column():
                     ref_image1 = gr.Image(label="ref image 1", type="numpy", height=256)
                     ref_image2 = gr.Image(label="ref image 2", type="numpy", height=256)
                 with gr.Row():
+                    # MODIFIED: Task names and values
+                    task_choices = [("Composition", "ip"), ("Identity", "id"), ("Style", "style")]
+                    ref_task1 = gr.Dropdown(choices=task_choices, value="ip", label="task for ref image 1")
+                    ref_task2 = gr.Dropdown(choices=task_choices, value="ip", label="task for ref image 2")
                 prompt = gr.Textbox(label="Prompt", value="a person playing guitar in the street")
                 width = gr.Slider(768, 1024, 1024, step=16, label="Width")
                 height = gr.Slider(768, 1024, 1024, step=16, label="Height")
+                num_steps = gr.Slider(8, 30, 28, step=1, label="Number of steps") # MODIFIED: default slider value to 28
                 guidance = gr.Slider(1.0, 10.0, 3.5, step=0.1, label="Guidance")
                 seed = gr.Textbox(label="Seed (-1 for random)", value="-1")
+                # MODIFIED: Event listeners for task dropdowns to update guidance
+                ref_task1.change(
+                    fn=update_guidance_on_task_selection,
+                    inputs=[ref_task1, ref_task2, guidance], # Pass current guidance value
+                    outputs=[guidance]
+                )
+                ref_task2.change(
+                    fn=update_guidance_on_task_selection,
+                    inputs=[ref_task1, ref_task2, guidance], # Pass current guidance value
+                    outputs=[guidance]
+                )
                 with gr.Accordion("Advanced Options", open=False, visible=False):
                     ref_res = gr.Slider(512, 1024, 512, step=16, label="resolution for ref image")
                     neg_prompt = gr.Textbox(label="Neg Prompt", value="")
                 [
                     'example_inputs/woman1.png',
                     None,
+                    'ip', # Corresponds to "Composition"
+                    'ip', # Corresponds to "Composition"
                     'profile shot dark photo of a 25-year-old female with smoke escaping from her mouth, the backlit smoke gives the image an ephemeral quality, natural face, natural eyebrows, natural skin texture, award winning photo, highly detailed face, atmospheric lighting, film grain, monochrome',  # noqa E501
                     9180879731249039735,
                 ],
                 [
                     'example_inputs/hinton.jpeg',
                     None,
+                    'id', # Corresponds to "Identity"
                     'ip',
                     'portrait, Chibi',
                     5443415087540486371,
                 [
                     'example_inputs/mickey.png',
                     None,
+                    'style', # Corresponds to "Style"
                     'ip',
                     'generate a same style image. A rooster wearing overalls.',
                     6245580464677124951,
                 [
                     'example_inputs/woman2.png',
                     'example_inputs/dress.png',
+                    'id', # Corresponds to "Identity"
                     'ip',
                     'the woman wearing a dress, In the banquet hall',
                     7698454872441022867,
 if __name__ == '__main__':
     demo = create_demo()
+    demo.launch()