Spaces:

yeq6x
/

QIE-LoRA-training-with-musubi-tuner

Running on Zero

yeq6x commited on 12 days ago

Commit

325c528

1 Parent(s): f440db9

Add dataset configuration update functionality in app.py

Implement a new function, _update_dataset_toml, to modify dataset TOML files in-place, allowing updates to resolution and batch size settings. Integrate this function into run_training to ensure dataset configurations are updated based on user inputs. Enhance the UI to include fields for image resolution and control resolution, improving user experience and flexibility in training configurations.

Files changed (1) hide show

app.py +104 -1

app.py CHANGED Viewed

@@ -78,6 +78,68 @@ def _ensure_workspace_auto_files() -> None:
         pass
 def _ensure_dir_writable(path: str) -> str:
     try:
         os.makedirs(path, exist_ok=True)
@@ -245,6 +307,7 @@ def _prepare_script(
     override_learning_rate: Optional[str] = None,
     override_network_dim: Optional[int] = None,
     override_seed: Optional[int] = None,
 ) -> Path:
     """Create a temporary copy of train_QIE.sh with injected variables.
@@ -366,6 +429,15 @@ def _prepare_script(
     if override_seed is not None:
         txt = re.sub(r"--seed\s+\d+", f"--seed {override_seed}", txt)
     # Prefer overriding variable definitions at top of script (safer than CLI regex)
     def _set_var(name: str, value: str) -> None:
         nonlocal txt
@@ -533,6 +605,12 @@ def run_training(
     ctrl7_suffix: str,
     learning_rate: str,
     network_dim: int,
     seed: int,
     max_epochs: int,
     save_every: int,
@@ -614,6 +692,20 @@ def run_training(
     # Decide dataset_config path with fallback to runtime auto dir
     ds_conf = str(Path(AUTO_DIR_RUNTIME) / "dataset_QIE.toml")
     # Resolve models_root and set output_dir_base to the unique dataset dir
     models_root = MODELS_ROOT_RUNTIME
     out_base = ds_dir
@@ -640,6 +732,7 @@ def run_training(
         control_suffixes=[ctrl0_suffix, ctrl1_suffix, ctrl2_suffix, ctrl3_suffix, ctrl4_suffix, ctrl5_suffix, ctrl6_suffix, ctrl7_suffix],
         override_learning_rate=(learning_rate or None),
         override_network_dim=int(network_dim) if network_dim is not None else None,
         override_seed=int(seed) if seed is not None else None,
     )
@@ -730,10 +823,18 @@ def build_ui() -> gr.Blocks:
                         with gr.Row():
                             lr_input = gr.Textbox(label="Learning rate", value="1e-3")
                             dim_input = gr.Number(label="Network dim", value=4, precision=0)
                             seed_input = gr.Number(label="Seed", value=42, precision=0)
                             max_epochs = gr.Number(label="Max epochs", value=100, precision=0)
                             save_every = gr.Number(label="Save every N epochs", value=10, precision=0)
                 with gr.Accordion("Target Image", elem_classes=["pad-section_0"]):
                     with gr.Group():
                         with gr.Row():
@@ -859,7 +960,9 @@ def build_ui() -> gr.Blocks:
                         ctrl5_files, ctrl5_prefix, ctrl5_suffix,
                         ctrl6_files, ctrl6_prefix, ctrl6_suffix,
                         ctrl7_files, ctrl7_prefix, ctrl7_suffix,
-                        lr_input, dim_input, seed_input, max_epochs, save_every,
                     ],
                     outputs=[logs, ckpt_files],
                 )

         pass
+def _update_dataset_toml(
+    path: str,
+    *,
+    img_res_w: Optional[int] = None,
+    img_res_h: Optional[int] = None,
+    train_batch_size: Optional[int] = None,
+    control_res_w: Optional[int] = None,
+    control_res_h: Optional[int] = None,
+) -> None:
+    """Update dataset TOML for resolution/batch/control resolution in-place.
+    - Updates [general] resolution and batch_size if provided.
+    - Updates first [[datasets]] qwen_image_edit_control_resolution if provided.
+    - Creates sections/keys if missing.
+    """
+    try:
+        txt = Path(path).read_text(encoding="utf-8")
+    except Exception:
+        return
+    def _set_in_general(block: str, key: str, value_line: str) -> str:
+        import re as _re
+        if _re.search(rf"(?m)^\s*{_re.escape(key)}\s*=", block):
+            block = _re.sub(rf"(?m)^\s*{_re.escape(key)}\s*=.*$", value_line, block)
+        else:
+            block = block.rstrip() + "\n" + value_line + "\n"
+        return block
+    import re
+    m = re.search(r"(?ms)^\[general\]\s*(.*?)(?=^\[|\Z)", txt)
+    if not m:
+        gen = "[general]\n"
+        if img_res_w and img_res_h:
+            gen += f"resolution = [{int(img_res_w)}, {int(img_res_h)}]\n"
+        if train_batch_size is not None:
+            gen += f"batch_size = {int(train_batch_size)}\n"
+        txt = gen + "\n" + txt
+    else:
+        head, block, tail = txt[:m.start(1)], m.group(1), txt[m.end(1):]
+        if img_res_w and img_res_h:
+            block = _set_in_general(block, "resolution", f"resolution = [{int(img_res_w)}, {int(img_res_h)}]")
+        if train_batch_size is not None:
+            block = _set_in_general(block, "batch_size", f"batch_size = {int(train_batch_size)}")
+        txt = head + block + tail
+    if control_res_w and control_res_h:
+        m2 = re.search(r"(?ms)^\[\[datasets\]\]\s*(.*?)(?=^\[\[|\Z)", txt)
+        if m2:
+            head, block, tail = txt[:m2.start(1)], m2.group(1), txt[m2.end(1):]
+            line = f"qwen_image_edit_control_resolution = [{int(control_res_w)}, {int(control_res_h)}]"
+            if re.search(r"(?m)^\s*qwen_image_edit_control_resolution\s*=", block):
+                block = re.sub(r"(?m)^\s*qwen_image_edit_control_resolution\s*=.*$", line, block)
+            else:
+                block = block.rstrip() + "\n" + line + "\n"
+            txt = head + block + tail
+    try:
+        Path(path).write_text(txt, encoding="utf-8")
+    except Exception:
+        pass
 def _ensure_dir_writable(path: str) -> str:
     try:
         os.makedirs(path, exist_ok=True)
     override_learning_rate: Optional[str] = None,
     override_network_dim: Optional[int] = None,
     override_seed: Optional[int] = None,
+    override_te_cache_bs: Optional[int] = None,
 ) -> Path:
     """Create a temporary copy of train_QIE.sh with injected variables.
     if override_seed is not None:
         txt = re.sub(r"--seed\s+\d+", f"--seed {override_seed}", txt)
+    # Optionally override text-encoder cache batch size
+    if override_te_cache_bs is not None and override_te_cache_bs > 0:
+        txt = re.sub(
+            r"(qwen_image_cache_text_encoder_outputs\.py[^\n]*--batch_size\s+)\d+",
+            rf"\g<1>{int(override_te_cache_bs)}",
+            txt,
+            flags=re.MULTILINE,
+        )
     # Prefer overriding variable definitions at top of script (safer than CLI regex)
     def _set_var(name: str, value: str) -> None:
         nonlocal txt
     ctrl7_suffix: str,
     learning_rate: str,
     network_dim: int,
+    train_res_w: int,
+    train_res_h: int,
+    train_batch_size: int,
+    control_res_w: int,
+    control_res_h: int,
+    te_cache_batch_size: int,
     seed: int,
     max_epochs: int,
     save_every: int,
     # Decide dataset_config path with fallback to runtime auto dir
     ds_conf = str(Path(AUTO_DIR_RUNTIME) / "dataset_QIE.toml")
+    # Update dataset config with requested resolution/batch settings
+    try:
+        _update_dataset_toml(
+            ds_conf,
+            img_res_w=int(train_res_w) if train_res_w else None,
+            img_res_h=int(train_res_h) if train_res_h else None,
+            train_batch_size=int(train_batch_size) if train_batch_size else None,
+            control_res_w=int(control_res_w) if control_res_w else None,
+            control_res_h=int(control_res_h) if control_res_h else None,
+        )
+        log_buf += f"[QIE] Updated dataset config: resolution=({train_res_w},{train_res_h}), batch_size={train_batch_size}, control_res=({control_res_w},{control_res_h})\n"
+    except Exception as e:
+        log_buf += f"[QIE] WARN: failed to update dataset config: {e}\n"
     # Resolve models_root and set output_dir_base to the unique dataset dir
     models_root = MODELS_ROOT_RUNTIME
     out_base = ds_dir
         control_suffixes=[ctrl0_suffix, ctrl1_suffix, ctrl2_suffix, ctrl3_suffix, ctrl4_suffix, ctrl5_suffix, ctrl6_suffix, ctrl7_suffix],
         override_learning_rate=(learning_rate or None),
         override_network_dim=int(network_dim) if network_dim is not None else None,
+        override_te_cache_bs=int(te_cache_batch_size) if te_cache_batch_size else None,
         override_seed=int(seed) if seed is not None else None,
     )
                         with gr.Row():
                             lr_input = gr.Textbox(label="Learning rate", value="1e-3")
                             dim_input = gr.Number(label="Network dim", value=4, precision=0)
+                            train_bs = gr.Number(label="Batch size (dataset)", value=1, precision=0)
                             seed_input = gr.Number(label="Seed", value=42, precision=0)
                             max_epochs = gr.Number(label="Max epochs", value=100, precision=0)
                             save_every = gr.Number(label="Save every N epochs", value=10, precision=0)
+                        with gr.Row():
+                            tr_w = gr.Number(label="Image resolution W", value=1024, precision=0)
+                            tr_h = gr.Number(label="Image resolution H", value=1024, precision=0)
+                            cr_w = gr.Number(label="Control resolution W", value=1024, precision=0)
+                            cr_h = gr.Number(label="Control resolution H", value=1024, precision=0)
+                            te_bs = gr.Number(label="TE cache batch size", value=16, precision=0)
                 with gr.Accordion("Target Image", elem_classes=["pad-section_0"]):
                     with gr.Group():
                         with gr.Row():
                         ctrl5_files, ctrl5_prefix, ctrl5_suffix,
                         ctrl6_files, ctrl6_prefix, ctrl6_suffix,
                         ctrl7_files, ctrl7_prefix, ctrl7_suffix,
+                        lr_input, dim_input,
+                        tr_w, tr_h, train_bs, cr_w, cr_h, te_bs,
+                        seed_input, max_epochs, save_every,
                     ],
                     outputs=[logs, ckpt_files],
                 )