dikdimon
/

sdas

Model card Files Files and versions

xet

Community

dikdimon commited on Mar 8

Commit

3d4fdb4

verified ·

1 Parent(s): 37b6731

Delete init.py

Browse files

Files changed (1) hide show

__init__.py +0 -559

__init__.py DELETED Viewed

@@ -1,559 +0,0 @@
-"""
-lib_mega_freeu/unet.py — Math engine + A1111 th.cat patch
-BUGS FIXED vs sdwebui-freeU-extension/scripts/freeunet_hijack.py:
-  BUG 1 dtype: mask = torch.ones(..., dtype=torch.bool)
-    bool*float = NOOP, scale always 1.0
-    Fix: torch.full(..., float(scale_high))
-  BUG 2 quadrant: mask[..., crow-t:crow, ccol-t:ccol] (top-left only)
-    Fix: mask[..., crow-t:crow+t, ccol-t:ccol+t] (symmetric center)
-Sources:
-  sd-webui-freeu/lib_free_u/unet.py         patch(), free_u_cat_hijack(),
-    get_backbone_scale(), ratio_to_region(), filter_skip()[box],
-    get_schedule_ratio(), is_gpu_complex_supported(), lerp()
-  WAS FreeU_Advanced/nodes.py               9 blending modes, Fourier_filter() multiscale
-  ComfyUI_FreeU_V2_advanced/utils.py        Fourier_filter_gauss(), get_band_energy_stats()
-  ComfyUI_FreeU_V2_advanced/FreeU_S1S2.py  Adaptive Cap loop MAX_CAP_ITER=3
-  ComfyUI_FreeU_V2_advanced/FreeU_B1B2.py  channel_threshold, model_channels*4/2/1
-  FreeU_V2_timestepadd.py                  step-fraction timestep gating concept
-  nrs_kohaku_enhanced_v3_5.py              _freeu_b_scale_h, _freeu_fourier_filter_gaussian,
-                                            hf_boost param, on_cpu_devices dict
-"""
-import dataclasses
-import functools
-import logging
-import math
-import pathlib
-import sys
-from typing import Dict, List, Optional, Tuple, Union
-import torch
-from lib_mega_freeu import global_state
-# ── GPU complex support (sd-webui-freeu exact) ────────────────────────────────
-_gpu_complex_support: Optional[bool] = None
-def is_gpu_complex_supported(x: torch.Tensor) -> bool:
-    global _gpu_complex_support
-    if x.is_cpu:
-        return True
-    if _gpu_complex_support is not None:
-        return _gpu_complex_support
-    mps_avail = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
-    try:
-        import torch_directml
-    except ImportError:
-        dml_avail = False
-    else:
-        dml_avail = torch_directml.is_available()
-    _gpu_complex_support = not (mps_avail or dml_avail)
-    if _gpu_complex_support:
-        try: torch.fft.fftn(x.float(), dim=(-2, -1))
-        except RuntimeError: _gpu_complex_support = False
-    return _gpu_complex_support
-_on_cpu_devices: Dict = {}
-# ── Blending modes (WAS nodes.py exact) ───────────────────────────────────────
-def _normalize(t):
-    mn, mx = t.min(), t.max()
-    return (t - mn) / (mx - mn + 1e-8)
-def _hslerp(a, b, t):
-    nc = a.size(1)
-    iv = torch.zeros(1, nc, 1, 1, device=a.device, dtype=a.dtype)
-    iv[0, 0, 0, 0] = 1.0
-    result = (1 - t) * a + t * b
-    if t < 0.5:
-        result += (torch.norm(b - a, dim=1, keepdim=True) / 6) * iv
-    else:
-        result -= (torch.norm(b - a, dim=1, keepdim=True) / 6) * iv
-    return result
-def _stable_slerp(a, b, t, eps=1e-6):
-    an = a / torch.linalg.norm(a, dim=1, keepdim=True).clamp_min(eps)
-    bn = b / torch.linalg.norm(b, dim=1, keepdim=True).clamp_min(eps)
-    dot = (an * bn).sum(dim=1, keepdim=True).clamp(-1.0 + eps, 1.0 - eps)
-    theta = torch.acos(dot)
-    sin_t = torch.sin(theta).clamp_min(eps)
-    s0 = torch.sin((1.0 - t) * theta) / sin_t
-    s1 = torch.sin(t * theta) / sin_t
-    slerp_out = s0 * a + s1 * b
-    lerp_out  = (1.0 - t) * a + t * b
-    use_lerp  = (theta < 1e-3).squeeze(1)
-    return torch.where(use_lerp.unsqueeze(1), lerp_out, slerp_out)
-BLENDING_MODES = {
-    "bislerp":       lambda a, b, t: _normalize((1 - t) * a + t * b),
-    "colorize":      lambda a, b, t: a + (b - a) * t,
-    "cosine interp": lambda a, b, t: (
-        a + b - (a - b) * torch.cos(
-            torch.tensor(math.pi, device=a.device, dtype=a.dtype) * t)) / 2,
-    "cuberp":        lambda a, b, t: a + (b - a) * (3 * t**2 - 2 * t**3),
-    "hslerp":        _hslerp,
-    "stable_slerp":  _stable_slerp,
-    "inject":        lambda a, b, t: a + b * t,
-    "lerp":          lambda a, b, t: (1 - t) * a + t * b,
-    "linear dodge":  lambda a, b, t: _normalize(a + b * t),
-}
-def lerp(a, b, r):
-    return (1 - r) * a + r * b
-# ── Backbone scaling ──────────────────────────────────────────────────────────
-def get_backbone_scale(h: torch.Tensor, backbone_factor: float, version: str):
-    if version == "1":
-        return backbone_factor
-    # V2: adaptive hidden_mean (FreeU_B1B2.py + kohaku _freeu_b_scale_h exact)
-    features_mean = h.mean(1, keepdim=True)
-    B = features_mean.shape[0]
-    hidden_max, _ = torch.max(features_mean.view(B, -1), dim=-1, keepdim=True)
-    hidden_min, _ = torch.min(features_mean.view(B, -1), dim=-1, keepdim=True)
-    denom = (hidden_max - hidden_min).clamp_min(1e-6)
-    hidden_mean = (features_mean - hidden_min.unsqueeze(2).unsqueeze(3)) \
-                  / denom.unsqueeze(2).unsqueeze(3)
-    return 1 + (backbone_factor - 1) * hidden_mean
-def ratio_to_region(width: float, offset: float, n: int) -> Tuple[int, int, bool]:
-    """sd-webui-freeu ratio_to_region exact."""
-    if width < 0:
-        offset += width; width = -width
-    width = min(width, 1.0)
-    if offset < 0:
-        offset = 1 + offset - int(offset)
-    offset = math.fmod(offset, 1.0)
-    if width + offset <= 1:
-        return round(offset * n), round((width + offset) * n), False
-    else:
-        return round((width + offset - 1) * n), round(offset * n), True
-# ── Box FFT (BUGS FIXED symmetric center + float dtype) ─────────────────────
-def filter_skip_box(x: torch.Tensor, cutoff: float,
-                    scale: float, scale_high: float = 1.0) -> torch.Tensor:
-    """
-    FreeU box filter with TWO BUGS FIXED from sdwebui-freeU-extension:
-    BUG 1 (dtype): was torch.bool mask -> scale multiplication was NOOP
-    BUG 2 (region): was [crow-t:crow, ccol-t:ccol] -> single quadrant top-left
-    Both fixed: torch.full float + symmetric [crow-t:crow+t, ccol-t:ccol+t].
-    sd-webui-freeu has these correct already, we match their implementation.
-    """
-    if scale == 1.0 and scale_high == 1.0:
-        return x
-    fft_dev = x.device if is_gpu_complex_supported(x) else torch.device("cpu")
-    x_freq = torch.fft.fftn(x.to(fft_dev, dtype=torch.float32), dim=(-2, -1))
-    x_freq = torch.fft.fftshift(x_freq, dim=(-2, -1))
-    B, C, H, W = x_freq.shape
-    mask = torch.full((B, C, H, W), float(scale_high), device=fft_dev)  # FIX: float, not bool
-    crow, ccol = H // 2, W // 2
-    tr = max(1, math.floor(crow * cutoff)) if cutoff > 0 else 1
-    tc = max(1, math.floor(ccol * cutoff)) if cutoff > 0 else 1
-    mask[..., crow - tr:crow + tr, ccol - tc:ccol + tc] = scale  # FIX: symmetric center
-    x_freq *= mask
-    x_freq = torch.fft.ifftshift(x_freq, dim=(-2, -1))
-    return torch.fft.ifftn(x_freq, dim=(-2, -1)).real.to(device=x.device, dtype=x.dtype)
-# ── Box + WAS multiscale overlay (WAS nodes.py Fourier_filter exact) ─────────
-def filter_skip_box_multiscale(x: torch.Tensor, cutoff: float, scale: float,
-                                scales_preset: Optional[list],
-                                strength: float = 1.0,
-                                scale_high: float = 1.0) -> torch.Tensor:
-    """
-    WAS FreeU_Advanced/nodes.py Fourier_filter(x, threshold, scale, scales, strength).
-    threshold = cutoff: float ratio [0-1] or int pixels (WAS uses int default=1).
-    scales: None, list of (radius_px, val) single-scale, or list of lists multi-scale.
-    """
-    if scale == 1.0 and scale_high == 1.0 and scales_preset is None:
-        return x
-    fft_dev = x.device if is_gpu_complex_supported(x) else torch.device("cpu")
-    x_freq = torch.fft.fftn(x.to(fft_dev, dtype=torch.float32), dim=(-2, -1))
-    x_freq = torch.fft.fftshift(x_freq, dim=(-2, -1))
-    B, C, H, W = x_freq.shape
-    crow, ccol = H // 2, W // 2
-    if isinstance(cutoff, float) and 0 < cutoff <= 1.0:
-        tr = max(1, math.floor(crow * cutoff)); tc = max(1, math.floor(ccol * cutoff))
-    else:
-        t = max(1, int(cutoff)) if cutoff > 0 else 1; tr = tc = t
-    mask = torch.ones((B, C, H, W), device=fft_dev)
-    mask[..., crow - tr:crow + tr, ccol - tc:ccol + tc] = scale
-    if scale_high != 1.0:
-        hfm = torch.full((B, C, H, W), float(scale_high), device=fft_dev)
-        hfm[..., crow - tr:crow + tr, ccol - tc:ccol + tc] = 1.0
-        mask = mask * hfm
-    if scales_preset:
-        if isinstance(scales_preset[0], tuple):
-            # WAS single-scale mode
-            for scale_threshold, scale_value in scales_preset:
-                sv = scale_value * strength
-                sm = torch.ones((B, C, H, W), device=fft_dev)
-                st = max(1, int(scale_threshold))
-                sm[..., crow - st:crow + st, ccol - st:ccol + st] = sv
-                mask = mask + (sm - mask) * strength
-        else:
-            # WAS multi-scale mode
-            for scale_params in scales_preset:
-                if isinstance(scale_params, list):
-                    for scale_threshold, scale_value in scale_params:
-                        sv = scale_value * strength
-                        sm = torch.ones((B, C, H, W), device=fft_dev)
-                        st = max(1, int(scale_threshold))
-                        sm[..., crow - st:crow + st, ccol - st:ccol + st] = sv
-                        mask = mask + (sm - mask) * strength
-    x_freq = x_freq * mask
-    x_freq = torch.fft.ifftshift(x_freq, dim=(-2, -1))
-    return torch.fft.ifftn(x_freq, dim=(-2, -1)).real.to(device=x.device, dtype=x.dtype)
-# ── Gaussian FFT (ComfyUI utils.py exact) ────────────────────────────────────
-def fourier_filter_gauss(x: torch.Tensor, radius_ratio: float,
-                          scale: float, hf_boost: float = 1.0) -> torch.Tensor:
-    """
-    ComfyUI_FreeU_V2_advanced/utils.py Fourier_filter_gauss() exact.
-    Also matches kohaku _freeu_fourier_filter_gaussian().
-    R = max(1, int(min(H,W)*radius_ratio))
-    sigma_f = R^2/2
-    center = exp(-dist2/sigma_f)
-    mask = scale*center + hf_boost*(1-center)
-    """
-    x_f = torch.fft.fftn(x.float(), dim=(-2, -1))
-    x_f = torch.fft.fftshift(x_f, dim=(-2, -1))
-    B, C, H, W = x_f.shape
-    R = max(1, int(min(H, W) * radius_ratio))
-    sigma_f = max(1e-6, (R * R) / 2.0)
-    yy, xx = torch.meshgrid(
-        torch.arange(H, device=x.device, dtype=torch.float32) - H // 2,
-        torch.arange(W, device=x.device, dtype=torch.float32) - W // 2,
-        indexing="ij")
-    center = torch.exp(-(yy**2 + xx**2) / sigma_f)
-    mask = (scale * center + hf_boost * (1.0 - center)).view(1, 1, H, W)
-    x_f = x_f * mask
-    x_f = torch.fft.ifftshift(x_f, dim=(-2, -1))
-    return torch.fft.ifftn(x_f, dim=(-2, -1)).real.to(x.dtype)
-# ── Band energy stats (ComfyUI utils.py exact) ────────────────────────────────
-def get_band_energy_stats(x: torch.Tensor, R: int) -> Tuple[float, float, float]:
-    """ComfyUI_FreeU_V2_advanced/utils.py get_band_energy_stats() exact."""
-    xf = torch.fft.fftn(x.float(), dim=(-2, -1))
-    xf = torch.fft.fftshift(xf, dim=(-2, -1))
-    B, C, H, W = xf.shape
-    yy, xx = torch.meshgrid(
-        torch.arange(H, device=x.device, dtype=torch.float32) - H // 2,
-        torch.arange(W, device=x.device, dtype=torch.float32) - W // 2,
-        indexing="ij")
-    lf_mask = (yy**2 + xx**2) <= (R * R)
-    mag2 = xf.real**2 + xf.imag**2
-    # FIX: expand_as requires same ndim; use 2D mask on last dims
-    lf_e  = mag2[:, :, lf_mask].mean().item()   if lf_mask.any()   else 0.0
-    hf_e  = mag2[:, :, ~lf_mask].mean().item()  if (~lf_mask).any() else 0.0
-    cover = lf_mask.sum().item() / (H * W) * 100.0
-    return lf_e, hf_e, cover
-# ── Adaptive Cap Gaussian (FreeU_S1S2.py MAX_CAP_ITER=3 exact) ───────────────
-def filter_skip_gaussian_adaptive(hsp: torch.Tensor,
-                                   si: "global_state.StageInfo",
-                                   verbose: bool = False) -> torch.Tensor:
-    """
-    ComfyUI_FreeU_V2_advanced/FreeU_S1S2.py exact algorithm:
-    1. Compute LF/HF ratio before.
-    2. Apply Gaussian filter.
-    3. If enable_adaptive_cap and drop > cap_threshold: loop up to MAX_CAP_ITER=3.
-       adaptive mode: eff_factor = cap_factor * (cap_threshold / drop)
-       fixed mode:    eff_factor = cap_factor
-       capped_s = 1 - eff_factor*(1-s_scale)  [interpolate FROM ORIGINAL]
-       capped_s = max(capped_s, current_s*(1+1e-4))
-       Re-apply from original_hsp with capped_s.
-    hf_boost combined = max(si.hf_boost, si.skip_high_end_factor)  [kohaku pattern]
-    """
-    s_scale  = si.skip_factor
-    radius_r = si.fft_radius_ratio
-    hf_boost = max(si.hf_boost, si.skip_high_end_factor)
-    orig_dev = hsp.device
-    H, W     = hsp.shape[-2:]
-    R_eff    = max(1, int(min(H, W) * radius_r))
-    # CRITICAL ORDER: init cpu-fallback flag and helpers BEFORE any FFT call
-    use_cpu = _on_cpu_devices.get(orig_dev, not is_gpu_complex_supported(hsp))
-    if use_cpu:
-        _on_cpu_devices[orig_dev] = True
-    def _tod(t):            # to FFT-safe device
-        return t.cpu() if use_cpu else t
-    def _fromd(t):          # back to original device
-        return t.to(orig_dev) if use_cpu else t
-    def _energy(t):
-        return get_band_energy_stats(_tod(t), R_eff)
-    def _filt(inp, sc):
-        nonlocal use_cpu
-        try:
-            out = fourier_filter_gauss(_tod(inp), radius_r, sc, hf_boost)
-            return _fromd(out)
-        except Exception:
-            if not use_cpu:
-                logging.warning(f"[MegaFreeU] {orig_dev} -> CPU fallback for FFT")
-                _on_cpu_devices[orig_dev] = True
-                use_cpu = True
-                return fourier_filter_gauss(inp.cpu(), radius_r, sc, hf_boost).to(orig_dev)
-            return inp
-    # Pre-filter energy (now safe on all devices)
-    lf_b, hf_b, cover = _energy(hsp)
-    ratio_b = lf_b / hf_b if hf_b > 1e-6 else float("inf")
-    if verbose:
-        logging.info(f"[MegaFreeU] Gauss {H}x{W} R={R_eff}px cov={cover:.1f}% "
-                     f"LF={lf_b:.3e} HF={hf_b:.3e} ratio_b={ratio_b:.4f}")
-    hsp_filt = _filt(hsp, s_scale)
-    if not si.enable_adaptive_cap:
-        return hsp_filt
-    MAX_CAP_ITER = 3
-    original_hsp = hsp
-    current_s    = s_scale
-    lf_a, hf_a, _ = _energy(hsp_filt)
-    ratio_a = lf_a / hf_a if hf_a > 1e-6 else float("inf")
-    drop    = 1.0 - (ratio_a / ratio_b) if ratio_b > 1e-6 else 0.0
-    orig_drop = drop
-    iters = 0
-    hsp_cur = hsp_filt
-    while (si.enable_adaptive_cap
-           and drop > si.cap_threshold
-           and current_s < 0.999
-           and iters < MAX_CAP_ITER):
-        if iters == 0:
-            logging.warning(f"[MegaFreeU] Over-attenuation: drop={drop*100:.1f}% > "
-                            f"{si.cap_threshold*100:.1f}%  s={s_scale:.4f}")
-        eff_f = si.cap_factor
-        if si.adaptive_cap_mode == "adaptive":
-            eff_f = si.cap_factor * (si.cap_threshold / max(drop, 1e-8))
-        capped_s = 1.0 - eff_f * (1.0 - s_scale)           # interpolate from ORIGINAL s
-        capped_s = max(capped_s, current_s * (1.0 + 1e-4))  # only ever relax
-        if abs(capped_s - current_s) < 1e-4:
-            if verbose: logging.info("  Cap converged.")
-            break
-        if verbose:
-            logging.info(f"  Cap iter {iters+1}: s {current_s:.4f}->{capped_s:.4f} eff={eff_f:.4f}")
-        try:
-            hsp_new = _filt(original_hsp, capped_s)
-        except Exception as e:
-            logging.error(f"[MegaFreeU] cap re-apply error: {e}")
-            hsp_cur = original_hsp   # restore to original on error (ComfyUI FreeU_S1S2.py pattern)
-            break
-        hsp_cur   = hsp_new
-        lf_a, hf_a, _ = _energy(hsp_cur)
-        ratio_a = lf_a / hf_a if hf_a > 1e-6 else float("inf")
-        drop    = 1.0 - (ratio_a / ratio_b) if ratio_b > 1e-6 else 0.0
-        current_s = capped_s
-        iters += 1
-    if iters > 0 or verbose:
-        logging.info(f"[MegaFreeU] Cap done: {orig_drop*100:.1f}%->{drop*100:.1f}% "
-                     f"({iters} iters  s_final={current_s:.4f})")
-    return hsp_cur
-# ── Schedule (sd-webui-freeu exact) ──────────────────────────────────────────
-def get_schedule_ratio() -> float:
-    from modules import shared
-    st    = global_state.instance
-    steps = shared.state.sampling_steps or 20
-    cur   = global_state.current_sampling_step
-    start = _to_step(st.start_ratio, steps)
-    stop  = _to_step(st.stop_ratio, steps)
-    if start == stop:
-        smooth = 0.0
-    elif cur < start:
-        smooth = min(1.0, max(0.0, cur / (start + 1e-8)))
-    else:
-        smooth = min(1.0, max(0.0, 1 + (cur - start) / (start - stop + 1e-8)))
-    flat = 1.0 if start <= cur < stop else 0.0
-    return lerp(flat, smooth, st.transition_smoothness)
-def get_stage_bsratio(b_start: float, b_end: float) -> float:
-    """Independent B/S timestep range gate (FreeU_V2_timestepadd concept -> step fraction)."""
-    from modules import shared
-    steps = max(shared.state.sampling_steps or 20, 1)
-    cur   = global_state.current_sampling_step
-    pct   = cur / (steps - 1) if steps > 1 else 0.0
-    return 1.0 if b_start <= pct <= b_end else 0.0
-def _to_step(v, steps):
-    return int(v * steps) if isinstance(v, float) else int(v)
-# ── Stage auto-detection (FreeU_B1B2.py + kohaku exact) ──────────────────────
-_stage_channels: Tuple[int, int, int] = (1280, 640, 320)
-def detect_model_channels():
-    global _stage_channels
-    try:
-        from modules import shared
-        mc = int(shared.sd_model.model.diffusion_model.model_channels)
-        _stage_channels = (mc * 4, mc * 2, mc * 1)
-    except Exception:
-        _stage_channels = (1280, 640, 320)
-def get_stage_index(dims: int, channel_threshold: int = 96) -> Optional[int]:
-    """FreeU_B1B2.py abs(ch - target) <= channel_threshold proximity match."""
-    for i, target in enumerate(_stage_channels):
-        if abs(dims - target) <= channel_threshold:
-            return i
-    return None
-# ── Override scales parser (WAS nodes.py format exact) ───────────────────────
-def parse_override_scales(text: str) -> Optional[List]:
-    if not text or not text.strip():
-        return None
-    result = []
-    for line in text.strip().splitlines():
-        line = line.strip()
-        if not line or line.startswith(("#", "!", "//")):
-            continue
-        parts = line.split(",")
-        if len(parts) == 2:
-            try:
-                result.append((int(parts[0].strip()), float(parts[1].strip())))
-            except ValueError:
-                pass
-    return result if result else None
-class _VerboseRef:
-    value: bool = False
-verbose_ref = _VerboseRef()
-# ── Core th.cat hijack (sd-webui-freeu exact + extended) ─────────────────────
-def free_u_cat_hijack(hs, *args, original_function, **kwargs):
-    """
-    Intercepts torch.cat([h, h_skip], dim=1) in UNet output_blocks.
-    Signature: kwargs=={"dim":1} and len(hs)==2 (sd-webui-freeu exact check).
-    Why th.cat over alternatives:
-    - sdwebui-freeU-extension CondFunc(UNetModel.forward): rewrites full forward,
-      incompatible with other extensions, plus 2 bugs in fourier mask.
-    - kohaku register_forward_hook: output already concatenated,
-      can't cleanly separate h from h_skip for independent filtering.
-    - th.cat hijack: intercepts exactly [h, h_skip] before concatenation. CORRECT.
-    """
-    st = global_state.instance
-    if not st.enable:
-        return original_function(hs, *args, **kwargs)
-    sched = get_schedule_ratio()
-    if sched == 0:
-        return original_function(hs, *args, **kwargs)
-    try:
-        h, h_skip = hs
-        if list(kwargs.keys()) != ["dim"] or kwargs.get("dim", -1) != 1:
-            return original_function(hs, *args, **kwargs)
-    except (ValueError, TypeError):
-        return original_function(hs, *args, **kwargs)
-    dims      = int(h.shape[1])
-    stage_idx = get_stage_index(dims, st.channel_threshold)
-    if stage_idx is None:
-        return original_function(hs, *args, **kwargs)
-    si      = st.stage_infos[stage_idx]
-    version = st.version
-    verbose = verbose_ref.value
-    # ── BACKBONE ─────────────────────────────────────────────────────────────
-    b_gate = get_stage_bsratio(si.b_start_ratio, si.b_end_ratio)
-    eff_b  = sched * b_gate
-    if eff_b > 0.0 and abs(si.backbone_factor - 1.0) > 1e-6:
-        try:
-            rbegin, rend, rinv = ratio_to_region(si.backbone_width, si.backbone_offset, dims)
-            ch_idx = torch.arange(dims, device=h.device)
-            mask   = (rbegin <= ch_idx) & (ch_idx <= rend)
-            if rinv: mask = ~mask
-            mask = mask.reshape(1, -1, 1, 1).to(h.dtype)
-            eff_factor = float(lerp(1.0, si.backbone_factor, eff_b))
-            scale      = get_backbone_scale(h, eff_factor, version)
-            # h_scaled_full: full h with mask region scaled, rest unchanged
-            # This matches original: h *= mask*scale + (1-mask)
-            h_scaled_full = h * (mask * scale + (1.0 - mask))
-            bmode = si.backbone_blend_mode
-            if bmode in BLENDING_MODES and abs(si.backbone_blend - 1.0) > 1e-6:
-                # Blend on FULL tensors so modes like slerp/hslerp see proper norms.
-                # Then restore unmasked channels to original h.
-                h_blended = BLENDING_MODES[bmode](h, h_scaled_full, si.backbone_blend)
-                h = h * (1.0 - mask) + h_blended * mask
-            else:
-                h = h_scaled_full
-        except Exception as e:
-            logging.warning(f"[MegaFreeU] B-scaling stage {stage_idx}: {e}")
-    # ── SKIP / FOURIER ────────────────────────────────────────────────────────
-    s_gate = get_stage_bsratio(si.s_start_ratio, si.s_end_ratio)
-    eff_s  = sched * s_gate
-    if eff_s > 0.0 and (abs(si.skip_factor - 1.0) > 1e-6
-                         or abs(si.hf_boost - 1.0) > 1e-6
-                         or abs(si.skip_high_end_factor - 1.0) > 1e-6):
-        try:
-            s_scale = float(lerp(1.0, si.skip_factor, eff_s))
-            s_high  = float(lerp(1.0, si.skip_high_end_factor, eff_s))
-            if si.fft_type == "gaussian":
-                hf_b = float(lerp(1.0, si.hf_boost, eff_s))
-                si_eff = dataclasses.replace(si, skip_factor=s_scale, skip_high_end_factor=s_high, hf_boost=hf_b)
-                h_skip = filter_skip_gaussian_adaptive(h_skip, si_eff, verbose)
-            else:
-                override  = parse_override_scales(st.override_scales)
-                ms_preset = override or global_state.MSCALES.get(st.multiscale_mode)
-                if ms_preset is not None:
-                    h_skip = filter_skip_box_multiscale(
-                        h_skip, si.skip_cutoff, s_scale, ms_preset,
-                        st.multiscale_strength, s_high)
-                else:
-                    h_skip = filter_skip_box(h_skip, si.skip_cutoff, s_scale, s_high)
-        except Exception as e:
-            logging.warning(f"[MegaFreeU] skip filter stage {stage_idx}: {e}")
-    return original_function([h, h_skip], *args, **kwargs)
-# ── Patch (sd-webui-freeu exact + ControlNet) ─────────────────────────────────
-_patched = False  # guard against double-patch on hot-reload
-def patch():
-    global _patched
-    try:
-        from modules.sd_hijack_unet import th
-    except ImportError:
-        print("[MegaFreeU] sd_hijack_unet not available", file=sys.stderr); return
-    if _patched or (hasattr(th.cat, "func") and getattr(th.cat.func, "__name__", "") == "free_u_cat_hijack"):
-        return  # already patched (by name; handles module reload)
-    th.cat = functools.partial(free_u_cat_hijack, original_function=th.cat)
-    _patched = True
-    cn_status = "enabled"
-    try:
-        from modules import scripts
-        cn_paths = [
-            str(pathlib.Path(scripts.basedir()).parent.parent / "extensions-builtin" / "sd-webui-controlnet"),
-            str(pathlib.Path(scripts.basedir()).parent / "sd-webui-controlnet"),
-        ]
-        sys.path[0:0] = cn_paths
-        try:
-            import scripts.hook as cn_hook
-            cn_hook.th.cat = functools.partial(free_u_cat_hijack, original_function=cn_hook.th.cat)
-        except ImportError:
-            cn_status = "disabled"
-        finally:
-            for p in cn_paths:
-                if p in sys.path: sys.path.remove(p)
-    except Exception:
-        cn_status = "error"
-    print(f"[MegaFreeU] th.cat patched  ControlNet: *{cn_status}*")

Delete __init__.py

Delete init.py