Spaces:

codemichaeld
/

new03

Running

App Files Files Community

codemichaeld commited on 13 days ago

Commit

c31eee4

verified ·

1 Parent(s): 9f9518a

Update app.py

Browse files

Files changed (1) hide show

app.py +178 -89

app.py CHANGED Viewed

@@ -18,10 +18,7 @@ except ImportError:
     MODELScope_AVAILABLE = False
 def low_rank_decomposition(weight, rank=128):
-    """
-    Improved LoRA decomposition that maintains compatibility with existing merge scripts.
-    This implementation focuses on extracting meaningful low-rank components from 2D weights.
-    """
     if weight.ndim != 2:
         return None, None
@@ -34,10 +31,10 @@ def low_rank_decomposition(weight, rank=128):
         # Ensure rank doesn't exceed available singular values
         actual_rank = min(rank, len(S))
         # Create LoRA matrices using standard factorization
-        # W ≈ U[:, :r] * diag(S[:r]) * Vh[:r, :]
-        # We split as: A = Vh[:r, :], B = U[:, :r] * diag(S[:r])
         A = Vh[:actual_rank, :].contiguous()
         B = U[:, :actual_rank] @ torch.diag(S[:actual_rank])
@@ -46,6 +43,36 @@ def low_rank_decomposition(weight, rank=128):
         print(f"Decomposition error: {e}")
         return None, None
 def convert_safetensors_to_fp8_with_lora(safetensors_path, output_dir, fp8_format, lora_rank=128, architecture="auto", progress=gr.Progress()):
     progress(0.1, desc="Starting FP8 conversion with LoRA extraction...")
     try:
@@ -69,96 +96,133 @@ def convert_safetensors_to_fp8_with_lora(safetensors_path, output_dir, fp8_forma
         sd_fp8 = {}
         lora_weights = {}
         total = len(state_dict)
-        lora_keys = []
         stats = {
             "total_layers": total,
             "eligible_layers": 0,
             "processed_layers": 0,
-            "skipped_layers": []
         }
         for i, key in enumerate(state_dict):
             progress(0.4 + 0.4 * (i / total), desc=f"Processing {i+1}/{total}...")
             weight = state_dict[key]
             if weight.dtype in [torch.float16, torch.float32, torch.bfloat16]:
                 fp8_weight = weight.to(fp8_dtype)
                 sd_fp8[key] = fp8_weight
-                # Apply architecture filtering
-                lower_key = key.lower()
                 should_process = False
                 if architecture == "text_encoder":
-                    should_process = "text" in lower_key or "emb" in lower_key or "encoder" in lower_key
                 elif architecture == "transformer":
-                    should_process = "attn" in lower_key or "transformer" in lower_key
                 elif architecture == "vae":
-                    should_process = "vae" in lower_key or "decoder" in lower_key or "encoder" in lower_key
                 elif architecture == "all":
                     should_process = True
-                else:  # "auto" or unknown
                     should_process = True
-                # Only process 2D tensors that meet rank requirements and pass architecture filter
-                if should_process and weight.ndim == 2 and min(weight.shape) > lora_rank:
-                    stats["eligible_layers"] += 1
-                    try:
-                        A, B = low_rank_decomposition(weight, rank=lora_rank)
-                        if A is not None and B is not None:
-                            lora_weights[f"lora_A.{key}"] = A
-                            lora_weights[f"lora_B.{key}"] = B
-                            lora_keys.append(key)
                             stats["processed_layers"] += 1
-                        else:
-                            stats["skipped_layers"].append(f"{key}: decomposition failed")
-                    except Exception as e:
-                        stats["skipped_layers"].append(f"{key}: error - {str(e)}")
-                elif should_process and weight.ndim == 2:
-                    # Handle smaller 2D tensors with reduced rank
-                    smaller_rank = min(lora_rank, min(weight.shape) // 2)
-                    if smaller_rank >= 8:  # Minimum useful rank
                         stats["eligible_layers"] += 1
-                        try:
-                            A, B = low_rank_decomposition(weight, rank=smaller_rank)
-                            if A is not None and B is not None:
-                                lora_weights[f"lora_A.{key}"] = A
-                                lora_weights[f"lora_B.{key}"] = B
-                                lora_keys.append(key)
-                                stats["processed_layers"] += 1
-                            else:
-                                stats["skipped_layers"].append(f"{key}: small tensor decomposition failed")
-                        except Exception as e:
-                            stats["skipped_layers"].append(f"{key}: small tensor error - {str(e)}")
             else:
                 sd_fp8[key] = weight
                 stats["skipped_layers"].append(f"{key}: non-float dtype")
         base_name = os.path.splitext(os.path.basename(safetensors_path))[0]
         fp8_path = os.path.join(output_dir, f"{base_name}-fp8-{fp8_format}.safetensors")
-        lora_path = os.path.join(output_dir, f"{base_name}-lora-r{lora_rank}.safetensors")
         save_file(sd_fp8, fp8_path, metadata={"format": "pt", "fp8_format": fp8_format, **metadata})
-        # Always save LoRA file if any weights were processed
         if lora_weights:
             lora_metadata = {
                 "format": "pt",
                 "lora_rank": str(lora_rank),
                 "architecture": architecture,
-                "stats": json.dumps(stats)
             }
             save_file(lora_weights, lora_path, metadata=lora_metadata)
-        progress(0.9, desc="Saved FP8 and LoRA files.")
-        progress(1.0, desc="✅ FP8 + LoRA extraction complete!")
-        stats_msg = f"FP8 ({fp8_format}) and rank-{lora_rank} LoRA saved.\n"
-        stats_msg += f"Processed {stats['processed_layers']}/{stats['eligible_layers']} eligible layers."
-        if stats['processed_layers'] == 0:
-            stats_msg += "\n⚠️ No LoRA weights were generated. Try reducing rank or selecting a specific architecture."
         return True, stats_msg, stats
@@ -254,7 +318,7 @@ def process_and_upload_fp8(
             source_type, repo_url, safetensors_filename, hf_token, progress
         )
-        progress(0.25, desc="Converting to FP8 with LoRA extraction...")
         success, msg, stats = convert_safetensors_to_fp8_with_lora(
             safetensors_path, output_dir, fp8_format, lora_rank, architecture, progress
         )
@@ -268,26 +332,33 @@ def process_and_upload_fp8(
         )
         base_name = os.path.splitext(safetensors_filename)[0]
-        lora_filename = f"{base_name}-lora-r{lora_rank}.safetensors"
         fp8_filename = f"{base_name}-fp8-{fp8_format}.safetensors"
         readme = f"""---
 library_name: diffusers
 tags:
 - fp8
 - safetensors
-- lora
-- low-rank
 - diffusion
 - converted-by-gradio
 ---
-# FP8 Model with Low-Rank LoRA
 - **Source**: `{repo_url}`
 - **File**: `{safetensors_filename}`
 - **FP8 Format**: `{fp8_format.upper()}`
-- **LoRA Rank**: {lora_rank}
 - **Architecture**: {architecture}
-- **LoRA File**: `{lora_filename}`
 - **FP8 File**: `{fp8_filename}`
 ## Usage (Inference)
 ```python
@@ -295,18 +366,30 @@ from safetensors.torch import load_file
 import torch
 # Load FP8 model
 fp8_state = load_file("{fp8_filename}")
-lora_state = load_file("{lora_filename}")
-# Reconstruct approximate original weights
 reconstructed = {{}}
 for key in fp8_state:
-    if f"lora_A.{{key}}" in lora_state and f"lora_B.{{key}}" in lora_state:
-        A = lora_state[f"lora_A.{{key}}"].to(torch.float32)
-        B = lora_state[f"lora_B.{{key}}"].to(torch.float32)
-        lora_weight = B @ A  # (out_features, rank) @ (rank, in_features) -> (out_features, in_features)
-        fp8_weight = fp8_state[key].to(torch.float32)
-        reconstructed[key] = fp8_weight + lora_weight
     else:
-        reconstructed[key] = fp8_state[key].to(torch.float32)
 ```
 > Requires PyTorch ≥ 2.1 for FP8 support.
 """
@@ -327,9 +410,9 @@ for key in fp8_state:
         result_html = f"""
 ✅ Success!
 Model uploaded to: <a href="{repo_url_final}" target="_blank">{new_repo_id}</a>
-Includes: FP8 model + rank-{lora_rank} LoRA.
 """
-        return gr.HTML(result_html), "✅ FP8 + LoRA upload successful!", msg
     except Exception as e:
         import traceback
@@ -341,9 +424,9 @@ Includes: FP8 model + rank-{lora_rank} LoRA.
             shutil.rmtree(temp_dir, ignore_errors=True)
         shutil.rmtree(output_dir, ignore_errors=True)
-with gr.Blocks(title="FP8 + LoRA Extractor (HF ↔ ModelScope)") as demo:
-    gr.Markdown("# 🔄 FP8 Pruner with Enhanced Low-Rank LoRA Extraction")
-    gr.Markdown("Convert `.safetensors` → **FP8** + **high-quality LoRA** for precision recovery. Supports Hugging Face ↔ ModelScope.")
     with gr.Row():
         with gr.Column():
@@ -353,14 +436,14 @@ with gr.Blocks(title="FP8 + LoRA Extractor (HF ↔ ModelScope)") as demo:
             with gr.Accordion("Advanced Settings", open=True):
                 fp8_format = gr.Radio(["e4m3fn", "e5m2"], value="e5m2", label="FP8 Format")
-                lora_rank = gr.Slider(minimum=8, maximum=512, step=8, value=128, label="LoRA Rank")
                 architecture = gr.Dropdown(
                     choices=[
-                        ("Auto-detect components", "auto"),
-                        ("Text Encoder only", "text_encoder"),
-                        ("Transformer blocks only", "transformer"),
-                        ("VAE only", "vae"),
-                        ("All eligible layers", "all")
                     ],
                     value="auto",
                     label="Target Architecture"
@@ -372,7 +455,7 @@ with gr.Blocks(title="FP8 + LoRA Extractor (HF ↔ ModelScope)") as demo:
         with gr.Column():
             target_type = gr.Radio(["huggingface", "modelscope"], value="huggingface", label="Target")
-            new_repo_id = gr.Textbox(label="New Repo ID", placeholder="user/model-fp8-lora")
             private_repo = gr.Checkbox(label="Private Repository (HF only)", value=False)
             status_output = gr.Markdown()
@@ -402,25 +485,31 @@ with gr.Blocks(title="FP8 + LoRA Extractor (HF ↔ ModelScope)") as demo:
     gr.Examples(
         examples=[
-            ["huggingface", "https://huggingface.co/Yabo/FramePainter/tree/main", "unet_diffusion_pytorch_model.safetensors", "e5m2", 128, "transformer", "huggingface"],
             ["huggingface", "https://huggingface.co/stabilityai/sdxl-vae", "diffusion_pytorch_model.safetensors", "e4m3fn", 64, "vae", "huggingface"],
-            ["huggingface", "https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/text_encoder", "model.safetensors", "e5m2", 96, "text_encoder", "huggingface"]
         ],
         inputs=[source_type, repo_url, safetensors_filename, fp8_format, lora_rank, architecture, target_type],
         label="Example Conversions"
     )
     gr.Markdown("""
-    ## 💡 Usage Tips
-    - **Higher ranks (128-256)**: Best quality recovery for important layers
-    - **Smaller ranks (32-64)**: Good balance of quality and file size
-    - **Architecture selection**: Focus LoRA on specific components for better results
-    - **Text Encoder**: Use rank 96-128 for best text understanding
-    - **Transformers**: Use rank 128-256 for maximum quality retention
-    - **VAE**: Use rank 64-128 for good image reconstruction
-    > **Note**: This implementation maintains compatibility with existing merge scripts while providing significantly better precision recovery through improved LoRA extraction.
     """)
 demo.launch()

     MODELScope_AVAILABLE = False
 def low_rank_decomposition(weight, rank=128):
+    """Improved LoRA decomposition that maintains compatibility with existing merge scripts."""
     if weight.ndim != 2:
         return None, None
         # Ensure rank doesn't exceed available singular values
         actual_rank = min(rank, len(S))
+        if actual_rank < 8:
+            return None, None
         # Create LoRA matrices using standard factorization
         A = Vh[:actual_rank, :].contiguous()
         B = U[:, :actual_rank] @ torch.diag(S[:actual_rank])
         print(f"Decomposition error: {e}")
         return None, None
+def extract_correction_factors(original_weight, fp8_weight):
+    """Extract per-channel/tensor correction factors instead of LoRA decomposition for VAE."""
+    with torch.no_grad():
+        # Convert to float32 for precision
+        orig = original_weight.float()
+        quant = fp8_weight.float()
+        # Compute error (what needs to be added to FP8 to recover original)
+        error = orig - quant
+        # Skip if error is negligible
+        error_norm = torch.norm(error)
+        orig_norm = torch.norm(orig)
+        if orig_norm > 1e-6 and error_norm / orig_norm < 0.01:
+            return None
+        # For 4D tensors (common in VAE), compute per-channel correction
+        if orig.ndim == 4:
+            # Channel dimension is typically dimension 0 (output channels)
+            channel_dim = 0
+            channel_mean = error.mean(dim=tuple(i for i in range(1, orig.ndim)), keepdim=True)
+            return channel_mean.to(original_weight.dtype)
+        # For 2D tensors, use per-row correction
+        elif orig.ndim == 2:
+            row_mean = error.mean(dim=1, keepdim=True)
+            return row_mean.to(original_weight.dtype)
+        else:
+            # For bias/batchnorm etc., use scalar correction
+            return error.mean().to(original_weight.dtype)
 def convert_safetensors_to_fp8_with_lora(safetensors_path, output_dir, fp8_format, lora_rank=128, architecture="auto", progress=gr.Progress()):
     progress(0.1, desc="Starting FP8 conversion with LoRA extraction...")
     try:
         sd_fp8 = {}
         lora_weights = {}
+        correction_factors = {}
         total = len(state_dict)
         stats = {
             "total_layers": total,
             "eligible_layers": 0,
             "processed_layers": 0,
+            "correction_layers": 0,
+            "skipped_layers": [],
+            "architecture_detected": ""
         }
+        # Auto-detect architecture if needed
+        if architecture == "auto":
+            model_keys = " ".join(state_dict.keys()).lower()
+            if "text" in model_keys or "emb" in model_keys:
+                architecture = "text_encoder"
+            elif "vae" in model_keys or "encoder" in model_keys or "decoder" in model_keys:
+                architecture = "vae"
+            elif "attn" in model_keys or "transformer" in model_keys:
+                architecture = "transformer"
+            else:
+                architecture = "all"
+        stats["architecture_detected"] = architecture
+        use_correction = architecture == "vae"
         for i, key in enumerate(state_dict):
             progress(0.4 + 0.4 * (i / total), desc=f"Processing {i+1}/{total}...")
             weight = state_dict[key]
+            lower_key = key.lower()
             if weight.dtype in [torch.float16, torch.float32, torch.bfloat16]:
                 fp8_weight = weight.to(fp8_dtype)
                 sd_fp8[key] = fp8_weight
+                # Determine if this layer should be processed based on architecture
                 should_process = False
                 if architecture == "text_encoder":
+                    should_process = "text" in lower_key or "emb" in lower_key or "encoder" in lower_key or "attn" in lower_key
                 elif architecture == "transformer":
+                    should_process = "attn" in lower_key or "transformer" in lower_key or "mlp" in lower_key
                 elif architecture == "vae":
+                    should_process = "vae" in lower_key or "decoder" in lower_key or "encoder" in lower_key or "conv" in lower_key
                 elif architecture == "all":
                     should_process = True
+                else:  # "auto" fallback
                     should_process = True
+                if should_process:
+                    if use_correction:
+                        # For VAE, use correction factors instead of LoRA
+                        corr = extract_correction_factors(weight, fp8_weight)
+                        if corr is not None:
+                            correction_factors[f"correction.{key}"] = corr
+                            stats["correction_layers"] += 1
                             stats["processed_layers"] += 1
+                    else:
+                        # For other architectures, use LoRA
                         stats["eligible_layers"] += 1
+                        # Handle 2D tensors with standard LoRA
+                        if weight.ndim == 2:
+                            try:
+                                # Adjust rank for smaller matrices
+                                adjusted_rank = lora_rank
+                                if min(weight.shape) < lora_rank:
+                                    adjusted_rank = max(8, min(weight.shape) // 2)
+                                A, B = low_rank_decomposition(weight, rank=adjusted_rank)
+                                if A is not None and B is not None:
+                                    lora_weights[f"lora_A.{key}"] = A
+                                    lora_weights[f"lora_B.{key}"] = B
+                                    stats["processed_layers"] += 1
+                                else:
+                                    stats["skipped_layers"].append(f"{key}: decomposition failed")
+                            except Exception as e:
+                                stats["skipped_layers"].append(f"{key}: error - {str(e)}")
+                        # Skip 4D tensors for non-VAE architectures
+                        elif weight.ndim == 4:
+                            stats["skipped_layers"].append(f"{key}: 4D tensor skipped for non-VAE architecture")
             else:
                 sd_fp8[key] = weight
                 stats["skipped_layers"].append(f"{key}: non-float dtype")
         base_name = os.path.splitext(os.path.basename(safetensors_path))[0]
         fp8_path = os.path.join(output_dir, f"{base_name}-fp8-{fp8_format}.safetensors")
+        # Save FP8 model
         save_file(sd_fp8, fp8_path, metadata={"format": "pt", "fp8_format": fp8_format, **metadata})
+        # Save LoRA weights if any were generated
         if lora_weights:
+            lora_path = os.path.join(output_dir, f"{base_name}-lora-r{lora_rank}-{architecture}.safetensors")
             lora_metadata = {
                 "format": "pt",
                 "lora_rank": str(lora_rank),
                 "architecture": architecture,
+                "stats": json.dumps(stats),
+                "method": "lora"
             }
             save_file(lora_weights, lora_path, metadata=lora_metadata)
+        # Save correction factors if any were generated (for VAE)
+        if correction_factors:
+            correction_path = os.path.join(output_dir, f"{base_name}-correction-{architecture}.safetensors")
+            correction_metadata = {
+                "format": "pt",
+                "architecture": architecture,
+                "stats": json.dumps(stats),
+                "method": "correction"
+            }
+            save_file(correction_factors, correction_path, metadata=correction_metadata)
+        progress(0.9, desc="Saved FP8 and LoRA/correction files.")
+        progress(1.0, desc="✅ FP8 + LoRA/correction extraction complete!")
+        stats_msg = f"FP8 ({fp8_format}) with precision recovery saved.\n"
+        stats_msg += f"Architecture detected: {stats['architecture_detected']}\n"
+        if use_correction:
+            stats_msg += f"Correction factors generated for {stats['correction_layers']} layers."
+        else:
+            stats_msg += f"Processed {stats['processed_layers']}/{stats['eligible_layers']} eligible layers with LoRA rank {lora_rank}."
+        if stats['processed_layers'] == 0 and stats['correction_layers'] == 0:
+            stats_msg += "\n⚠️ No precision recovery weights were generated. Try a different architecture selection or parameters."
         return True, stats_msg, stats
             source_type, repo_url, safetensors_filename, hf_token, progress
         )
+        progress(0.25, desc="Converting to FP8 with precision recovery...")
         success, msg, stats = convert_safetensors_to_fp8_with_lora(
             safetensors_path, output_dir, fp8_format, lora_rank, architecture, progress
         )
         )
         base_name = os.path.splitext(safetensors_filename)[0]
         fp8_filename = f"{base_name}-fp8-{fp8_format}.safetensors"
+        # Determine which precision recovery file was generated
+        precision_recovery_file = ""
+        precision_recovery_type = "LoRA"
+        if stats.get("correction_layers", 0) > 0:
+            precision_recovery_file = f"{base_name}-correction-{architecture}.safetensors"
+            precision_recovery_type = "Correction Factors"
+        elif stats.get("processed_layers", 0) > 0:
+            precision_recovery_file = f"{base_name}-lora-r{lora_rank}-{architecture}.safetensors"
         readme = f"""---
 library_name: diffusers
 tags:
 - fp8
 - safetensors
+- precision-recovery
 - diffusion
 - converted-by-gradio
 ---
+# FP8 Model with Precision Recovery
 - **Source**: `{repo_url}`
 - **File**: `{safetensors_filename}`
 - **FP8 Format**: `{fp8_format.upper()}`
 - **Architecture**: {architecture}
+- **Precision Recovery Type**: {precision_recovery_type}
+- **Precision Recovery File**: `{precision_recovery_file}`
 - **FP8 File**: `{fp8_filename}`
 ## Usage (Inference)
 ```python
 import torch
 # Load FP8 model
 fp8_state = load_file("{fp8_filename}")
+# Load precision recovery file
+recovery_state = load_file("{precision_recovery_file}") if "{precision_recovery_file}" else {{}}
+# Reconstruct high-precision weights
 reconstructed = {{}}
 for key in fp8_state:
+    fp8_weight = fp8_state[key].to(torch.float32)
+    if recovery_state:
+        # For LoRA approach
+        if "lora_A" in recovery_state:
+            if f"lora_A.{{key}}" in recovery_state and f"lora_B.{{key}}" in recovery_state:
+                A = recovery_state[f"lora_A.{{key}}"].to(torch.float32)
+                B = recovery_state[f"lora_B.{{key}}"].to(torch.float32)
+                lora_weight = B @ A
+                reconstructed[key] = fp8_weight + lora_weight
+            else:
+                reconstructed[key] = fp8_weight
+        # For correction factor approach
+        elif f"correction.{{key}}" in recovery_state:
+            correction = recovery_state[f"correction.{{key}}"].to(torch.float32)
+            reconstructed[key] = fp8_weight + correction
+        else:
+            reconstructed[key] = fp8_weight
     else:
+        reconstructed[key] = fp8_weight
 ```
 > Requires PyTorch ≥ 2.1 for FP8 support.
 """
         result_html = f"""
 ✅ Success!
 Model uploaded to: <a href="{repo_url_final}" target="_blank">{new_repo_id}</a>
+Includes: FP8 model + precision recovery ({precision_recovery_type}).
 """
+        return gr.HTML(result_html), "✅ FP8 + precision recovery upload successful!", msg
     except Exception as e:
         import traceback
             shutil.rmtree(temp_dir, ignore_errors=True)
         shutil.rmtree(output_dir, ignore_errors=True)
+with gr.Blocks(title="FP8 + Precision Recovery Extractor") as demo:
+    gr.Markdown("# 🔄 FP8 Pruner with Architecture-Specific Precision Recovery")
+    gr.Markdown("Convert `.safetensors` → **FP8** + **precision recovery** (LoRA or correction factors). Supports Hugging Face ↔ ModelScope.")
     with gr.Row():
         with gr.Column():
             with gr.Accordion("Advanced Settings", open=True):
                 fp8_format = gr.Radio(["e4m3fn", "e5m2"], value="e5m2", label="FP8 Format")
+                lora_rank = gr.Slider(minimum=8, maximum=256, step=8, value=128, label="LoRA Rank (for text/transformers)")
                 architecture = gr.Dropdown(
                     choices=[
+                        ("Auto-detect architecture", "auto"),
+                        ("Text Encoder (LoRA)", "text_encoder"),
+                        ("Transformer blocks (LoRA)", "transformer"),
+                        ("VAE (Correction Factors)", "vae"),
+                        ("All layers (LoRA where applicable)", "all")
                     ],
                     value="auto",
                     label="Target Architecture"
         with gr.Column():
             target_type = gr.Radio(["huggingface", "modelscope"], value="huggingface", label="Target")
+            new_repo_id = gr.Textbox(label="New Repo ID", placeholder="user/model-fp8")
             private_repo = gr.Checkbox(label="Private Repository (HF only)", value=False)
             status_output = gr.Markdown()
     gr.Examples(
         examples=[
+            ["huggingface", "https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/text_encoder", "model.safetensors", "e5m2", 96, "text_encoder", "huggingface"],
             ["huggingface", "https://huggingface.co/stabilityai/sdxl-vae", "diffusion_pytorch_model.safetensors", "e4m3fn", 64, "vae", "huggingface"],
+            ["huggingface", "https://huggingface.co/Yabo/FramePainter/tree/main", "unet_diffusion_pytorch_model.safetensors", "e5m2", 128, "transformer", "huggingface"]
         ],
         inputs=[source_type, repo_url, safetensors_filename, fp8_format, lora_rank, architecture, target_type],
         label="Example Conversions"
     )
     gr.Markdown("""
+    ## 💡 Architecture-Specific Precision Recovery
+    This tool automatically selects the best precision recovery method based on architecture:
+    - **Text Encoder & Transformers**: Uses **LoRA decomposition** (best for attention layers)
+      - Higher ranks (96-128) recommended for text encoders
+      - Medium ranks (64-128) for transformers
+    - **VAE**: Uses **per-channel correction factors** (better for convolutional layers)
+      - No rank parameter needed - automatically computes channel-wise corrections
+      - Works with 4D convolutional weights that LoRA cannot handle well
+    - **Auto-detect**: Analyzes model structure to select appropriate method
+    > **Note**: VAE models typically contain 4D convolutional weights that don't work well with standard LoRA.
+    > The correction factor approach used for VAE matches the successful method from the attached file.
     """)
 demo.launch()