Elea Zhong commited on
Commit
6d99887
·
1 Parent(s): 2dd3815

update demo

Browse files
Files changed (4) hide show
  1. app.py +52 -147
  2. qwenimage/datamodels.py +3 -3
  3. qwenimage/foundation.py +6 -25
  4. scripts/inf.ipynb +0 -0
app.py CHANGED
@@ -11,53 +11,44 @@ from PIL import Image
11
  import gradio as gr
12
  import spaces
13
 
 
14
  from qwenimage.debug import ctimed, ftimed
15
  from qwenimage.experiments.experiments_qwen import ExperimentRegistry
 
 
16
  from qwenimage.prompt import build_camera_prompt
17
 
18
  # --- Model Loading ---
19
- dtype = torch.bfloat16
20
- device = "cuda" if torch.cuda.is_available() else "cpu"
21
 
22
- print(f"main cuda: {torch.cuda.is_available()=}")
 
 
 
 
 
23
 
24
- exp = ExperimentRegistry.get("qwen_lightning_fa3_aot_int8_fuse_downsize512")()
25
- exp.load()
26
-
27
-
28
- @spaces.GPU(duration=1500)
29
- def optim_pipe():
30
- print(f"func cuda: {torch.cuda.is_available()=}")
31
- exp.optimize()
32
-
33
- optim_pipe()
34
 
35
 
36
  MAX_SEED = np.iinfo(np.int32).max
37
 
38
 
39
  @spaces.GPU
40
- def infer_camera_edit(
41
  image,
42
- rotate_deg,
43
- move_forward,
44
- vertical_tilt,
45
- wideangle,
46
  seed,
47
  randomize_seed,
48
- true_guidance_scale,
49
  num_inference_steps,
50
- height,
51
- width,
52
  prev_output = None,
53
  progress=gr.Progress(track_tqdm=True)
54
  ):
55
  with ctimed("pre pipe"):
56
- prompt = build_camera_prompt(rotate_deg, move_forward, vertical_tilt, wideangle)
57
- print(f"Generated Prompt: {prompt}")
58
 
59
  if randomize_seed:
60
  seed = random.randint(0, MAX_SEED)
 
 
61
  generator = torch.Generator(device=device).manual_seed(seed)
62
 
63
  # Choose input image (prefer uploaded, else last output)
@@ -75,150 +66,64 @@ def infer_camera_edit(
75
 
76
  print(f"{len(pil_images)=}")
77
 
78
- if prompt == "no camera movement":
79
- return image, seed, prompt
80
- result = exp.run_once(
 
 
81
  image=pil_images,
82
  prompt=prompt,
83
- height=height if height != 0 else None,
84
- width=width if width != 0 else None,
85
  num_inference_steps=num_inference_steps,
86
  generator=generator,
87
- true_cfg_scale=true_guidance_scale,
88
- num_images_per_prompt=1,
89
- )
90
 
91
- return result, seed, prompt
92
 
93
 
94
  # --- UI ---
95
- css = '''#col-container { max-width: 800px; margin: 0 auto; }
96
- .dark .progress-text{color: white !important}
97
- #examples{max-width: 800px; margin: 0 auto; }'''
98
 
99
- def reset_all():
100
- return [0, 0, 0, 0, False]
101
 
102
- def end_reset():
103
- return False
104
 
105
- def update_dimensions_on_upload(image):
106
- if image is None:
107
- return 1024, 1024
108
-
109
- original_width, original_height = image.size
110
-
111
- if original_width > original_height:
112
- new_width = 1024
113
- aspect_ratio = original_height / original_width
114
- new_height = int(new_width * aspect_ratio)
115
- else:
116
- new_height = 1024
117
- aspect_ratio = original_width / original_height
118
- new_width = int(new_height * aspect_ratio)
119
-
120
- # Ensure dimensions are multiples of 8
121
- new_width = (new_width // 8) * 8
122
- new_height = (new_height // 8) * 8
123
-
124
- return new_width, new_height
125
-
126
-
127
- with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
128
- with gr.Column(elem_id="col-container"):
129
- gr.Markdown("## 🎬 Qwen Image Edit — Camera Angle Control")
130
- gr.Markdown("""
131
- Qwen Image Edit 2509 for Camera Control ✨
132
- Using [dx8152's Qwen-Edit-2509-Multiple-angles LoRA](https://huggingface.co/dx8152/Qwen-Edit-2509-Multiple-angles) and [Phr00t/Qwen-Image-Edit-Rapid-AIO](https://huggingface.co/Phr00t/Qwen-Image-Edit-Rapid-AIO/tree/main) for 4-step inference 💨
133
- """
134
- )
135
-
136
- with gr.Row():
137
- with gr.Column():
138
- image = gr.Image(label="Input Image", type="pil")
139
- prev_output = gr.Image(value=None, visible=False)
140
- is_reset = gr.Checkbox(value=False, visible=False)
141
-
142
- with gr.Tab("Camera Controls"):
143
- rotate_deg = gr.Slider(label="Rotate Right-Left (degrees °)", minimum=-90, maximum=90, step=45, value=0)
144
- move_forward = gr.Slider(label="Move Forward → Close-Up", minimum=0, maximum=10, step=5, value=0)
145
- vertical_tilt = gr.Slider(label="Vertical Angle (Bird ↔ Worm)", minimum=-1, maximum=1, step=1, value=0)
146
- wideangle = gr.Checkbox(label="Wide-Angle Lens", value=False)
147
- with gr.Row():
148
- reset_btn = gr.Button("Reset")
149
- run_btn = gr.Button("Generate", variant="primary")
150
-
151
- with gr.Accordion("Advanced Settings", open=False):
152
- seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
153
- randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
154
- true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
155
- num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=2)
156
- height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
157
- width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
158
-
159
- with gr.Column():
160
- result = gr.Image(label="Output Image", interactive=False)
161
- prompt_preview = gr.Textbox(label="Processed Prompt", interactive=False)
162
 
163
  inputs = [
164
- image,rotate_deg, move_forward,
165
- vertical_tilt, wideangle,
166
- seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, prev_output
 
 
 
 
167
  ]
168
- outputs = [result, seed, prompt_preview]
169
-
170
- # Reset behavior
171
- reset_btn.click(
172
- fn=reset_all,
173
- inputs=None,
174
- outputs=[rotate_deg, move_forward, vertical_tilt, wideangle, is_reset],
175
- queue=False
176
- ).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False)
177
 
178
  run_event = run_btn.click(
179
- fn=infer_camera_edit,
180
  inputs=inputs,
181
  outputs=outputs
182
  )
183
-
184
- # Image upload triggers dimension update and control reset
185
- image.upload(
186
- fn=update_dimensions_on_upload,
187
- inputs=[image],
188
- outputs=[width, height]
189
- ).then(
190
- fn=reset_all,
191
- inputs=None,
192
- outputs=[rotate_deg, move_forward, vertical_tilt, wideangle, is_reset],
193
- queue=False
194
- ).then(
195
- fn=end_reset,
196
- inputs=None,
197
- outputs=[is_reset],
198
- queue=False
199
- )
200
 
201
-
202
- # Live updates
203
- @ftimed
204
- def maybe_infer(is_reset, progress=gr.Progress(track_tqdm=True), *args):
205
- if is_reset:
206
- return gr.update(), gr.update(), gr.update(), gr.update()
207
- else:
208
- return infer_camera_edit(*args)
209
-
210
- control_inputs = [
211
- image, rotate_deg, move_forward,
212
- vertical_tilt, wideangle,
213
- seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, prev_output
214
- ]
215
- control_inputs_with_flag = [is_reset] + control_inputs
216
-
217
- for control in [rotate_deg, move_forward, vertical_tilt]:
218
- control.release(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs)
219
-
220
- wideangle.input(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs)
221
-
222
  run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])
223
 
224
  demo.launch()
 
11
  import gradio as gr
12
  import spaces
13
 
14
+ from qwenimage.datamodels import QwenConfig
15
  from qwenimage.debug import ctimed, ftimed
16
  from qwenimage.experiments.experiments_qwen import ExperimentRegistry
17
+ from qwenimage.finetuner import QwenLoraFinetuner
18
+ from qwenimage.foundation import QwenImageFoundation
19
  from qwenimage.prompt import build_camera_prompt
20
 
21
  # --- Model Loading ---
 
 
22
 
23
+ foundation = QwenImageFoundation(QwenConfig(
24
+ vae_image_size=1024 * 1024,
25
+ regression_base_pipe_steps=4,
26
+ ))
27
+ finetuner = QwenLoraFinetuner(foundation, foundation.config)
28
+ finetuner.load("checkpoints/reg-mse-pixel-lpips_005000", lora_rank=32)
29
 
 
 
 
 
 
 
 
 
 
 
30
 
31
 
32
  MAX_SEED = np.iinfo(np.int32).max
33
 
34
 
35
  @spaces.GPU
36
+ def run_pipe(
37
  image,
38
+ prompt,
 
 
 
39
  seed,
40
  randomize_seed,
 
41
  num_inference_steps,
42
+ shift,
 
43
  prev_output = None,
44
  progress=gr.Progress(track_tqdm=True)
45
  ):
46
  with ctimed("pre pipe"):
 
 
47
 
48
  if randomize_seed:
49
  seed = random.randint(0, MAX_SEED)
50
+
51
+ device = "cuda" if torch.cuda.is_available() else "cpu"
52
  generator = torch.Generator(device=device).manual_seed(seed)
53
 
54
  # Choose input image (prefer uploaded, else last output)
 
66
 
67
  print(f"{len(pil_images)=}")
68
 
69
+ finetuner.enable()
70
+ foundation.scheduler.config["base_shift"] = shift
71
+ foundation.scheduler.config["max_shift"] = shift
72
+
73
+ result = foundation.base_pipe(foundation.INPUT_MODEL(
74
  image=pil_images,
75
  prompt=prompt,
 
 
76
  num_inference_steps=num_inference_steps,
77
  generator=generator,
78
+ ))[0]
 
 
79
 
80
+ return result, seed
81
 
82
 
83
  # --- UI ---
 
 
 
84
 
 
 
85
 
86
+ with gr.Blocks(theme=gr.themes.Citrus()) as demo:
 
87
 
88
+ gr.Markdown("Qwen Image Demo")
89
+
90
+ with gr.Row():
91
+ with gr.Column():
92
+ image = gr.Image(label="Input Image", type="pil")
93
+ prev_output = gr.Image(value=None, visible=False)
94
+ is_reset = gr.Checkbox(value=False, visible=False)
95
+ prompt = gr.Textbox(label="Prompt", placeholder="Prompt", lines=2)
96
+
97
+
98
+ run_btn = gr.Button("Generate", variant="primary")
99
+
100
+ with gr.Accordion("Advanced Settings", open=False):
101
+ seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
102
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
103
+ num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=2)
104
+ shift = gr.Slider(label="Timestep Shift", minimum=0.0, maximum=4.0, step=0.1, value=2.0)
105
+
106
+ with gr.Column():
107
+ result = gr.Image(label="Output Image", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  inputs = [
110
+ image,
111
+ prompt,
112
+ seed,
113
+ randomize_seed,
114
+ num_inference_steps,
115
+ shift,
116
+ prev_output,
117
  ]
118
+ outputs = [result, seed]
119
+
 
 
 
 
 
 
 
120
 
121
  run_event = run_btn.click(
122
+ fn=run_pipe,
123
  inputs=inputs,
124
  outputs=outputs
125
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])
128
 
129
  demo.launch()
qwenimage/datamodels.py CHANGED
@@ -21,8 +21,8 @@ class QwenInputs(BaseModel):
21
  num_inference_steps: int = 50
22
  generator: torch.Generator | list[torch.Generator] | None = None
23
  max_sequence_length: int = 512
24
- vae_image_override: int | None = 512 * 512
25
- latent_size_override: int | None = 512 * 512
26
 
27
  model_config = ConfigDict(
28
  arbitrary_types_allowed=True,
@@ -75,7 +75,7 @@ class QwenConfig(ExperimentTrainerParameters):
75
  static_mu: float | None = None
76
  loss_weight_dist: str | None = None # "scaled_clipped_gaussian", "logit-normal"
77
 
78
- vae_image_size: int = 512 * 512
79
  offload_text_encoder: bool = True
80
  quantize_text_encoder: bool = False
81
  quantize_transformer: bool = False
 
21
  num_inference_steps: int = 50
22
  generator: torch.Generator | list[torch.Generator] | None = None
23
  max_sequence_length: int = 512
24
+ vae_image_override: int | None = None
25
+ latent_size_override: int | None = None
26
 
27
  model_config = ConfigDict(
28
  arbitrary_types_allowed=True,
 
75
  static_mu: float | None = None
76
  loss_weight_dist: str | None = None # "scaled_clipped_gaussian", "logit-normal"
77
 
78
+ vae_image_size: int = 1024 * 1024
79
  offload_text_encoder: bool = True
80
  quantize_text_encoder: bool = False
81
  quantize_transformer: bool = False
qwenimage/foundation.py CHANGED
@@ -327,26 +327,16 @@ class QwenImageFoundation(WandModel):
327
  def base_pipe(self, inputs: QwenInputs) -> list[Image]:
328
  print(inputs)
329
  self.offload_text_encoder("cuda")
330
- image = inputs.image[0]
331
- w,h = image.size
332
- h_r, w_r = calculate_dimensions(self.config.vae_image_size, h/w)
333
- image = TF.resize(image, (h_r, w_r))
334
- inputs.image = [image]
335
  return self.pipe(**inputs.model_dump()).images
336
 
337
 
338
 
339
  class QwenImageFoundationSaveInterm(QwenImageFoundation):
340
  PIPELINE = QwenImageEditSaveIntermPipeline
341
-
342
- def base_pipe(self, inputs: QwenInputs) -> list[Image]:
343
- print(inputs)
344
- image = inputs.image[0]
345
- w,h = image.size
346
- h_r, w_r = calculate_dimensions(self.config.vae_image_size, h/w)
347
- image = TF.resize(image, (h_r, w_r))
348
- inputs.image = [image]
349
- return self.pipe(**inputs.model_dump())
350
 
351
 
352
  class QwenImageRegressionFoundation(QwenImageFoundation):
@@ -589,15 +579,6 @@ class QwenImageRegressionFoundation(QwenImageFoundation):
589
 
590
 
591
  def base_pipe(self, inputs: QwenInputs) -> list[Image]:
592
- # config overrides
593
  inputs.num_inference_steps = self.config.regression_base_pipe_steps
594
- inputs.latent_size_override = self.config.vae_image_size
595
- inputs.vae_image_override = self.config.vae_image_size
596
- image = inputs.image[0]
597
- w,h = image.size
598
- h_r, w_r = calculate_dimensions(self.config.vae_image_size, h/w)
599
- image = TF.resize(image, (h_r, w_r))
600
- inputs.image = [image]
601
- inputs.height = h_r
602
- inputs.width = w_r
603
- return super().base_pipe(inputs)
 
327
  def base_pipe(self, inputs: QwenInputs) -> list[Image]:
328
  print(inputs)
329
  self.offload_text_encoder("cuda")
330
+ if inputs.vae_image_override is None:
331
+ inputs.vae_image_override = self.config.vae_image_size
332
+ if inputs.latent_size_override is None:
333
+ inputs.latent_size_override = self.config.vae_image_size
 
334
  return self.pipe(**inputs.model_dump()).images
335
 
336
 
337
 
338
  class QwenImageFoundationSaveInterm(QwenImageFoundation):
339
  PIPELINE = QwenImageEditSaveIntermPipeline
 
 
 
 
 
 
 
 
 
340
 
341
 
342
  class QwenImageRegressionFoundation(QwenImageFoundation):
 
579
 
580
 
581
  def base_pipe(self, inputs: QwenInputs) -> list[Image]:
 
582
  inputs.num_inference_steps = self.config.regression_base_pipe_steps
583
+ return super().base_pipe(inputs)
584
+
 
 
 
 
 
 
 
 
scripts/inf.ipynb CHANGED
The diff for this file is too large to render. See raw diff