Spaces:

codemichaeld
/

new03

Running

App Files Files Community

codemichaeld commited on 13 days ago

Commit

311ef01

verified ·

1 Parent(s): 672b8b5

Update app.py

Browse files

Files changed (1) hide show

app.py +191 -390

app.py CHANGED Viewed

@@ -4,14 +4,11 @@ import tempfile
 import shutil
 import re
 import json
-import datetime
 from pathlib import Path
 from huggingface_hub import HfApi, hf_hub_download
 from safetensors.torch import load_file, save_file
 import torch
 import torch.nn.functional as F
-import traceback
-import math
 try:
     from modelscope.hub.file_download import model_file_download as ms_file_download
     from modelscope.hub.api import HubApi as ModelScopeApi
@@ -19,92 +16,21 @@ try:
 except ImportError:
     MODELScope_AVAILABLE = False
-def get_fp8_dtype(fp8_format):
-    """Get torch FP8 dtype."""
-    if fp8_format == "e5m2":
-        return torch.float8_e5m2
-    else:
-        return torch.float8_e4m3fn
-def quantize_and_get_error(weight, fp8_dtype):
-    """Quantize weight to FP8 and return both quantized weight and error."""
-    weight_fp8 = weight.to(fp8_dtype)
-    weight_dequantized = weight_fp8.to(weight.dtype)
-    error = weight - weight_dequantized
-    return weight_fp8, error
-def low_rank_decomposition_error(error_tensor, rank=32, min_error_threshold=1e-6):
-    """Decompose error tensor with proper rank reduction."""
-    if error_tensor.ndim not in [2, 4]:
         return None, None
     try:
-        # Calculate error magnitude
-        error_norm = torch.norm(error_tensor.float())
-        if error_norm < min_error_threshold:
-            return None, None
-        # For 2D tensors (linear layers)
-        if error_tensor.ndim == 2:
-            U, S, Vh = torch.linalg.svd(error_tensor.float(), full_matrices=False)
-            # Calculate rank based on variance explained (keep 95% of error)
-            total_variance = torch.sum(S ** 2)
-            cumulative = torch.cumsum(S ** 2, dim=0)
-            keep_components = torch.sum(cumulative <= 0.95 * total_variance).item() + 1
-            # Limit rank to much smaller than original
-            max_rank = min(error_tensor.shape)
-            actual_rank = min(rank, keep_components, max_rank // 2)
-            if actual_rank < 2:
-                return None, None
-            A = Vh[:actual_rank, :].contiguous()
-            B = U[:, :actual_rank] @ torch.diag(S[:actual_rank]).contiguous()
-            return A, B
-        # For 4D convolutions
-        elif error_tensor.ndim == 4:
-            out_ch, in_ch, kH, kW = error_tensor.shape
-            # Reshape to 2D for decomposition
-            error_2d = error_tensor.view(out_ch, in_ch * kH * kW)
-            U, S, Vh = torch.linalg.svd(error_2d.float(), full_matrices=False)
-            # Calculate rank based on variance explained (90% for conv)
-            total_variance = torch.sum(S ** 2)
-            cumulative = torch.cumsum(S ** 2, dim=0)
-            keep_components = torch.sum(cumulative <= 0.90 * total_variance).item() + 1
-            # Use even lower rank for conv
-            max_rank = min(error_2d.shape)
-            actual_rank = min(rank // 2, keep_components, max_rank // 4)
-            if actual_rank < 2:
-                return None, None
-            A = Vh[:actual_rank, :].contiguous()
-            B = U[:, :actual_rank] @ torch.diag(S[:actual_rank]).contiguous()
-            # Reshape back for convolutional format
-            if kH == 1 and kW == 1:
-                B = B.view(out_ch, actual_rank, 1, 1)
-                A = A.view(actual_rank, in_ch, 1, 1)
-            else:
-                B = B.view(out_ch, actual_rank, 1, 1)
-                A = A.view(actual_rank, in_ch, kH, kW)
-            return A, B
-    except Exception as e:
-        print(f"Error decomposition failed: {e}")
-    return None, None
 def extract_correction_factors(original_weight, fp8_weight):
-    """Extract simple correction factors for VAE."""
     with torch.no_grad():
         orig = original_weight.float()
         quant = fp8_weight.float()
@@ -112,99 +38,27 @@ def extract_correction_factors(original_weight, fp8_weight):
         error_norm = torch.norm(error)
         orig_norm = torch.norm(orig)
-        if orig_norm > 1e-6 and error_norm / orig_norm < 0.001:
             return None
-        # For 4D tensors (VAE), compute per-channel correction
         if orig.ndim == 4:
             channel_mean = error.mean(dim=tuple(i for i in range(1, orig.ndim)), keepdim=True)
             return channel_mean.to(original_weight.dtype)
         elif orig.ndim == 2:
             row_mean = error.mean(dim=1, keepdim=True)
             return row_mean.to(original_weight.dtype)
         else:
             return error.mean().to(original_weight.dtype)
-def get_architecture_settings(architecture, base_rank):
-    """Get optimal settings for different architectures."""
-    settings = {
-        "text_encoder": {
-            "rank": base_rank,
-            "error_threshold": 5e-5,
-            "min_rank": 8,
-            "max_rank_factor": 0.4,
-            "method": "lora"
-        },
-        "transformer": {
-            "rank": base_rank,
-            "error_threshold": 1e-5,
-            "min_rank": 12,
-            "max_rank_factor": 0.35,
-            "method": "lora"
-        },
-        "vae": {
-            "rank": base_rank // 2,
-            "error_threshold": 1e-4,
-            "min_rank": 4,
-            "max_rank_factor": 0.3,
-            "method": "correction"
-        },
-        "unet_conv": {
-            "rank": base_rank // 3,
-            "error_threshold": 2e-5,
-            "min_rank": 8,
-            "max_rank_factor": 0.25,
-            "method": "lora"
-        },
-        "auto": {
-            "rank": base_rank,
-            "error_threshold": 1e-5,
-            "min_rank": 8,
-            "max_rank_factor": 0.3,
-            "method": "lora"
-        },
-        "all": {
-            "rank": base_rank,
-            "error_threshold": 1e-5,
-            "min_rank": 8,
-            "max_rank_factor": 0.3,
-            "method": "lora"
-        }
-    }
-    return settings.get(architecture, settings["auto"])
-def should_process_layer(key, weight, architecture):
-    """Determine if layer should be processed for LoRA/correction."""
-    lower_key = key.lower()
-    # Skip biases and normalization layers
-    if 'bias' in key or 'norm' in key.lower() or 'bn' in key.lower():
-        return False
-    if weight.numel() < 100:
-        return False
-    # Architecture-specific filtering
-    if architecture == "text_encoder":
-        return ('text' in lower_key or 'emb' in lower_key or
-                'encoder' in lower_key or 'attn' in lower_key)
-    elif architecture == "transformer":
-        return ('attn' in lower_key or 'transformer' in lower_key or
-                'mlp' in lower_key or 'to_out' in lower_key)
-    elif architecture == "vae":
-        return ('vae' in lower_key or 'encoder' in lower_key or
-                'decoder' in lower_key or 'conv' in lower_key)
-    elif architecture == "unet_conv":
-        return ('conv' in lower_key or 'resnet' in lower_key or
-                'downsample' in lower_key or 'upsample' in lower_key)
-    elif architecture in ["all", "auto"]:
-        return True
-    return False
-def convert_safetensors_to_fp8_with_lora(safetensors_path, output_dir, fp8_format, lora_rank=128, architecture="auto", progress=gr.Progress()):
-    progress(0.1, desc="Starting FP8 conversion with error recovery...")
     try:
         def read_safetensors_metadata(path):
             with open(path, 'rb') as f:
@@ -215,157 +69,122 @@ def convert_safetensors_to_fp8_with_lora(safetensors_path, output_dir, fp8_forma
         metadata = read_safetensors_metadata(safetensors_path)
         progress(0.2, desc="Loaded metadata.")
         state_dict = load_file(safetensors_path)
         progress(0.4, desc="Loaded weights.")
-        # Auto-detect architecture if needed
-        if architecture == "auto":
-            model_keys = " ".join(state_dict.keys()).lower()
-            if "vae" in model_keys or ("encoder" in model_keys and "decoder" in model_keys):
-                architecture = "vae"
-            elif "text" in model_keys or "emb" in model_keys:
-                architecture = "text_encoder"
-            elif "attn" in model_keys or "transformer" in model_keys:
-                architecture = "transformer"
-            elif "conv" in model_keys or "resnet" in model_keys:
-                architecture = "unet_conv"
-            else:
-                architecture = "all"
-        settings = get_architecture_settings(architecture, lora_rank)
-        fp8_dtype = get_fp8_dtype(fp8_format)
         sd_fp8 = {}
-        lora_weights = {}
-        correction_factors = {}
         stats = {
             "total_layers": len(state_dict),
-            "eligible_layers": 0,
-            "layers_with_error": 0,
             "processed_layers": 0,
-            "correction_layers": 0,
             "skipped_layers": [],
-            "architecture": architecture,
-            "method": settings["method"],
-            "error_magnitudes": []
         }
         total = len(state_dict)
         for i, key in enumerate(state_dict):
             progress(0.4 + 0.4 * (i / total), desc=f"Processing {i+1}/{total}...")
             weight = state_dict[key]
             if weight.dtype in [torch.float16, torch.float32, torch.bfloat16]:
-                # Quantize to FP8 and calculate error
-                weight_fp8, error = quantize_and_get_error(weight, fp8_dtype)
-                sd_fp8[key] = weight_fp8
-                # Calculate error magnitude
-                error_norm = torch.norm(error.float())
-                weight_norm = torch.norm(weight.float())
-                relative_error = (error_norm / weight_norm).item() if weight_norm > 0 else 0
-                stats["error_magnitudes"].append({
-                    "key": key,
-                    "relative_error": relative_error
-                })
-                # Check if layer should be processed
-                should_process = should_process_layer(key, weight, architecture)
-                if should_process:
-                    stats["eligible_layers"] += 1
-                    # Only process if error is significant
-                    if relative_error > settings["error_threshold"]:
-                        stats["layers_with_error"] += 1
-                        if settings["method"] == "correction":
-                            # Use correction factors for VAE
-                            correction = extract_correction_factors(weight, weight_fp8)
-                            if correction is not None:
-                                correction_factors[f"correction.{key}"] = correction
-                                stats["correction_layers"] += 1
-                                stats["processed_layers"] += 1
-                        else:
-                            # Use LoRA decomposition for other architectures
-                            try:
-                                A, B = low_rank_decomposition_error(
-                                    error,
-                                    rank=settings["rank"],
-                                    min_error_threshold=settings["error_threshold"]
-                                )
-                                if A is not None and B is not None:
-                                    lora_weights[f"lora_A.{key}"] = A.to(torch.float16)
-                                    lora_weights[f"lora_B.{key}"] = B.to(torch.float16)
-                                    stats["processed_layers"] += 1
-                                else:
-                                    stats["skipped_layers"].append(f"{key}: decomposition failed")
-                            except Exception as e:
-                                stats["skipped_layers"].append(f"{key}: error - {str(e)}")
-                    else:
-                        stats["skipped_layers"].append(f"{key}: error too small ({relative_error:.6f})")
             else:
                 sd_fp8[key] = weight
                 stats["skipped_layers"].append(f"{key}: non-float dtype")
-        # Calculate average error
-        if stats["error_magnitudes"]:
-            errors = [e["relative_error"] for e in stats["error_magnitudes"]]
-            stats["avg_error"] = sum(errors) / len(errors) if errors else 0
-            stats["max_error"] = max(errors) if errors else 0
         base_name = os.path.splitext(os.path.basename(safetensors_path))[0]
         fp8_path = os.path.join(output_dir, f"{base_name}-fp8-{fp8_format}.safetensors")
         save_file(sd_fp8, fp8_path, metadata={"format": "pt", "fp8_format": fp8_format, **metadata})
-        # Save precision recovery weights
-        if lora_weights:
-            lora_path = os.path.join(output_dir, f"{base_name}-lora-r{lora_rank}-{architecture}.safetensors")
-            lora_metadata = {
-                "format": "pt",
-                "lora_rank": str(lora_rank),
-                "architecture": architecture,
-                "stats": json.dumps(stats),
-                "method": "lora"
-            }
-            save_file(lora_weights, lora_path, metadata=lora_metadata)
-        if correction_factors:
-            correction_path = os.path.join(output_dir, f"{base_name}-correction-{architecture}.safetensors")
-            correction_metadata = {
                 "format": "pt",
-                "architecture": architecture,
-                "stats": json.dumps(stats),
-                "method": "correction"
-            }
-            save_file(correction_factors, correction_path, metadata=correction_metadata)
-        progress(0.9, desc="Saved FP8 and precision recovery files.")
-        progress(1.0, desc="✅ FP8 + precision recovery extraction complete!")
-        stats_msg = f"FP8 ({fp8_format}) with precision recovery saved.\n"
-        stats_msg += f"Architecture: {architecture}\n"
-        stats_msg += f"Method: {settings['method']}\n"
-        stats_msg += f"Average quantization error: {stats.get('avg_error', 0):.6f}\n"
-        if settings["method"] == "correction":
-            stats_msg += f"Correction factors generated for {stats['correction_layers']} layers."
-        else:
-            stats_msg += f"LoRA generated for {stats['processed_layers']}/{stats['eligible_layers']} eligible layers (rank {lora_rank})."
-        if stats['processed_layers'] == 0 and stats['correction_layers'] == 0:
-            stats_msg += "\n⚠️ No precision recovery weights were generated. FP8 quantization error may be too small."
         return True, stats_msg, stats
     except Exception as e:
-        error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
-        return False, error_msg, None
 def parse_hf_url(url):
     url = url.strip().rstrip("/")
@@ -428,8 +247,7 @@ def process_and_upload_fp8(
     repo_url,
     safetensors_filename,
     fp8_format,
-    lora_rank,
-    architecture,
     target_type,
     new_repo_id,
     hf_token,
@@ -443,8 +261,10 @@ def process_and_upload_fp8(
         return None, "❌ Hugging Face token required for source.", ""
     if target_type == "huggingface" and not hf_token:
         return None, "❌ Hugging Face token required for target.", ""
-    if lora_rank < 8:
-        return None, "❌ LoRA rank must be at least 8.", ""
     temp_dir = None
     output_dir = tempfile.mkdtemp()
@@ -454,9 +274,9 @@ def process_and_upload_fp8(
             source_type, repo_url, safetensors_filename, hf_token, progress
         )
-        progress(0.25, desc="Converting to FP8 with precision recovery...")
-        success, msg, stats = convert_safetensors_to_fp8_with_lora(
-            safetensors_path, output_dir, fp8_format, lora_rank, architecture, progress
         )
         if not success:
@@ -469,16 +289,7 @@ def process_and_upload_fp8(
         base_name = os.path.splitext(safetensors_filename)[0]
         fp8_filename = f"{base_name}-fp8-{fp8_format}.safetensors"
-        # Determine which precision recovery file was generated
-        precision_recovery_file = ""
-        precision_recovery_type = ""
-        if stats.get("method") == "correction" and stats.get("correction_layers", 0) > 0:
-            precision_recovery_file = f"{base_name}-correction-{architecture}.safetensors"
-            precision_recovery_type = "Correction Factors"
-        elif stats.get("method") == "lora" and stats.get("processed_layers", 0) > 0:
-            precision_recovery_file = f"{base_name}-lora-r{lora_rank}-{architecture}.safetensors"
-            precision_recovery_type = "LoRA"
         readme = f"""---
 library_name: diffusers
@@ -486,60 +297,49 @@ tags:
 - fp8
 - safetensors
 - precision-recovery
-- diffusion
 - converted-by-gradio
 ---
-# FP8 Model with Precision Recovery
 - **Source**: `{repo_url}`
 - **File**: `{safetensors_filename}`
 - **FP8 Format**: `{fp8_format.upper()}`
-- **Architecture**: {architecture}
-- **Precision Recovery Type**: {precision_recovery_type}
-- **Precision Recovery File**: `{precision_recovery_file}` if available
-- **FP8 File**: `{fp8_filename}`
-## Usage (Inference)
 ```python
 from safetensors.torch import load_file
 import torch
-# Load FP8 model
 fp8_state = load_file("{fp8_filename}")
-# Load precision recovery file if available
-recovery_state = {{}}
-if "{precision_recovery_file}":
-    recovery_state = load_file("{precision_recovery_file}")
-# Reconstruct high-precision weights
 reconstructed = {{}}
 for key in fp8_state:
-    # Dequantize FP8 to target precision
-    fp_weight = fp8_state[key].to(torch.float32)
-    if recovery_state:
-        # For LoRA approach
-        if f"lora_A.{{key}}" in recovery_state and f"lora_B.{{key}}" in recovery_state:
-            A = recovery_state[f"lora_A.{{key}}"].to(torch.float32)
-            B = recovery_state[f"lora_B.{{key}}"].to(torch.float32)
-            error_correction = B @ A
-            reconstructed[key] = fp_weight + error_correction
-        # For correction factor approach
-        elif f"correction.{{key}}" in recovery_state:
-            correction = recovery_state[f"correction.{{key}}"].to(torch.float32)
-            reconstructed[key] = fp_weight + correction
-        else:
-            reconstructed[key] = fp_weight
-    else:
-        reconstructed[key] = fp_weight
-print("Model reconstructed with FP8 error recovery")
 ```
-> **Note**: This precision recovery targets FP8 quantization errors.
-> Average quantization error: {stats.get('avg_error', 0):.6f}
 """
         with open(os.path.join(output_dir, "README.md"), "w") as f:
             f.write(readme)
@@ -553,31 +353,24 @@ print("Model reconstructed with FP8 error recovery")
             )
         progress(1.0, desc="✅ Done!")
         result_html = f"""
 ✅ Success!
 Model uploaded to: <a href="{repo_url_final}" target="_blank">{new_repo_id}</a>
-Includes: FP8 model + precision recovery ({precision_recovery_type}).
-Average quantization error: {stats.get('avg_error', 0):.6f}
 """
-        if stats['processed_layers'] > 0 or stats['correction_layers'] > 0:
-            result_html += f"<br>Precision recovery applied to {stats['processed_layers'] + stats['correction_layers']} layers."
-        return gr.HTML(result_html), "✅ FP8 + precision recovery upload successful!", msg
     except Exception as e:
-        error_msg = f"❌ Error: {str(e)}\n{traceback.format_exc()}"
-        return None, error_msg, ""
     finally:
         if temp_dir:
             shutil.rmtree(temp_dir, ignore_errors=True)
         shutil.rmtree(output_dir, ignore_errors=True)
-with gr.Blocks(title="FP8 + Precision Recovery Extractor") as demo:
-    gr.Markdown("# 🔄 FP8 Converter with Architecture-Specific Precision Recovery")
-    gr.Markdown("Convert models to **FP8** with **error-based precision recovery**.")
     with gr.Row():
         with gr.Column():
@@ -585,31 +378,33 @@ with gr.Blocks(title="FP8 + Precision Recovery Extractor") as demo:
             repo_url = gr.Textbox(label="Repo URL or ID", placeholder="https://huggingface.co/... or modelscope-id")
             safetensors_filename = gr.Textbox(label="Filename", placeholder="model.safetensors")
-            with gr.Accordion("Advanced Settings", open=True):
                 fp8_format = gr.Radio(["e4m3fn", "e5m2"], value="e5m2", label="FP8 Format")
-                lora_rank = gr.Slider(minimum=8, maximum=256, step=8, value=128,
-                                     label="LoRA Rank (for text/transformers)")
-                architecture = gr.Dropdown(
-                    choices=[
-                        ("Auto-detect architecture", "auto"),
-                        ("Text Encoder (LoRA)", "text_encoder"),
-                        ("Transformer blocks (LoRA)", "transformer"),
-                        ("VAE (Correction Factors)", "vae"),
-                        ("UNet Convolutions (LoRA)", "unet_conv"),
-                        ("All layers (LoRA where applicable)", "all")
-                    ],
-                    value="auto",
-                    label="Target Architecture"
                 )
             with gr.Accordion("Authentication", open=False):
                 hf_token = gr.Textbox(label="Hugging Face Token", type="password")
-                modelscope_token = gr.Textbox(label="ModelScope Token (optional)", type="password",
-                                              visible=MODELScope_AVAILABLE)
         with gr.Column():
             target_type = gr.Radio(["huggingface", "modelscope"], value="huggingface", label="Target")
-            new_repo_id = gr.Textbox(label="New Repo ID", placeholder="user/model-fp8-precision")
             private_repo = gr.Checkbox(label="Private Repository (HF only)", value=False)
             status_output = gr.Markdown()
@@ -625,8 +420,7 @@ with gr.Blocks(title="FP8 + Precision Recovery Extractor") as demo:
             repo_url,
             safetensors_filename,
             fp8_format,
-            lora_rank,
-            architecture,
             target_type,
             new_repo_id,
             hf_token,
@@ -639,39 +433,46 @@ with gr.Blocks(title="FP8 + Precision Recovery Extractor") as demo:
     gr.Examples(
         examples=[
-            ["huggingface", "https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/text_encoder",
-             "model.safetensors", "e5m2", 96, "text_encoder"],
-            ["huggingface", "https://huggingface.co/stabilityai/sdxl-vae",
-             "diffusion_pytorch_model.safetensors", "e4m3fn", 64, "vae"],
-            ["huggingface", "https://huggingface.co/Yabo/FramePainter/tree/main",
-             "unet_diffusion_pytorch_model.safetensors", "e5m2", 128, "transformer"]
         ],
-        inputs=[source_type, repo_url, safetensors_filename, fp8_format, lora_rank, architecture],
         label="Example Conversions"
     )
     gr.Markdown("""
-    ## 🎯 What This Tool Does
-    Unlike traditional LoRA fine-tuning, this tool:
-    1. **Quantizes** the model to FP8 (loses precision)
-    2. **Measures** the quantization error for each weight
-    3. **Extracts recovery weights** that specifically recover this error
-    4. **Only applies** recovery where error is significant (>0.001%)
-    ## 💡 Recommended Settings
-    - **Text Encoders**: rank 64-96 (text is sensitive)
-    - **Transformers**: rank 96-128
-    - **VAE**: Uses correction factors (no rank needed)
-    - **UNet Convolutions**: rank 32-64
-    ## ⚠️ Important Notes
-    - This recovers **FP8 quantization errors**, not fine-tuning changes
-    - If FP8 error is tiny (<0.0001%), recovery may not be generated
-    - Higher rank ≠ better for error recovery (use recommended ranges)
     """)
 demo.launch()

 import shutil
 import re
 import json
 from pathlib import Path
 from huggingface_hub import HfApi, hf_hub_download
 from safetensors.torch import load_file, save_file
 import torch
 import torch.nn.functional as F
 try:
     from modelscope.hub.file_download import model_file_download as ms_file_download
     from modelscope.hub.api import HubApi as ModelScopeApi
 except ImportError:
     MODELScope_AVAILABLE = False
+def low_rank_decomposition(weight, rank=64):
+    """Standard LoRA decomposition for 2D tensors only."""
+    if weight.ndim != 2:
         return None, None
     try:
+        U, S, Vh = torch.linalg.svd(weight.float(), full_matrices=False)
+        U = U[:, :rank] @ torch.diag(torch.sqrt(S[:rank]))
+        Vh = torch.diag(torch.sqrt(S[:rank])) @ Vh[:rank, :]
+        return U.contiguous(), Vh.contiguous()
+    except Exception:
+        return None, None
 def extract_correction_factors(original_weight, fp8_weight):
+    """Extract per-channel/tensor correction factors (difference method)."""
     with torch.no_grad():
         orig = original_weight.float()
         quant = fp8_weight.float()
         error_norm = torch.norm(error)
         orig_norm = torch.norm(orig)
+        if orig_norm > 1e-6 and error_norm / orig_norm < 0.01:
             return None
+        # For 4D tensors (VAE/conv layers)
         if orig.ndim == 4:
+            channel_dim = 0
             channel_mean = error.mean(dim=tuple(i for i in range(1, orig.ndim)), keepdim=True)
             return channel_mean.to(original_weight.dtype)
+        # For 2D tensors (linear layers)
         elif orig.ndim == 2:
             row_mean = error.mean(dim=1, keepdim=True)
             return row_mean.to(original_weight.dtype)
+        # For 1D tensors (bias, etc.)
         else:
             return error.mean().to(original_weight.dtype)
+def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_format, recovery_config, progress=gr.Progress()):
+    progress(0.1, desc="Starting FP8 conversion with precision recovery...")
     try:
         def read_safetensors_metadata(path):
             with open(path, 'rb') as f:
         metadata = read_safetensors_metadata(safetensors_path)
         progress(0.2, desc="Loaded metadata.")
         state_dict = load_file(safetensors_path)
         progress(0.4, desc="Loaded weights.")
+        if fp8_format == "e5m2":
+            fp8_dtype = torch.float8_e5m2
+        else:
+            fp8_dtype = torch.float8_e4m3fn
         sd_fp8 = {}
+        recovery_weights = {}
         stats = {
             "total_layers": len(state_dict),
             "processed_layers": 0,
             "skipped_layers": [],
+            "recovery_type_counts": {"lora": 0, "diff": 0}
         }
         total = len(state_dict)
         for i, key in enumerate(state_dict):
             progress(0.4 + 0.4 * (i / total), desc=f"Processing {i+1}/{total}...")
             weight = state_dict[key]
+            lower_key = key.lower()
             if weight.dtype in [torch.float16, torch.float32, torch.bfloat16]:
+                fp8_weight = weight.to(fp8_dtype)
+                sd_fp8[key] = fp8_weight
+                # Match key against recovery config rules
+                recovery_method = "none"
+                lora_rank = 64
+                for rule in recovery_config:
+                    element_pattern = rule.get("element", "").lower()
+                    method = rule.get("method", "none")
+                    if element_pattern == "all" or element_pattern in lower_key:
+                        recovery_method = method
+                        if method == "lora":
+                            lora_rank = rule.get("rank", 64)
+                        break
+                if recovery_method == "lora" and weight.ndim == 2 and min(weight.shape) > lora_rank:
+                    try:
+                        U, V = low_rank_decomposition(weight, rank=lora_rank)
+                        if U is not None and V is not None:
+                            recovery_weights[f"lora_A.{key}"] = U.to(torch.float16)
+                            recovery_weights[f"lora_B.{key}"] = V.to(torch.float16)
+                            stats["processed_layers"] += 1
+                            stats["recovery_type_counts"]["lora"] += 1
+                    except Exception:
+                        stats["skipped_layers"].append(f"{key}: lora failed")
+                elif recovery_method == "diff":
+                    try:
+                        corr = extract_correction_factors(weight, fp8_weight)
+                        if corr is not None:
+                            recovery_weights[f"diff.{key}"] = corr
+                            stats["processed_layers"] += 1
+                            stats["recovery_type_counts"]["diff"] += 1
+                    except Exception:
+                        stats["skipped_layers"].append(f"{key}: diff failed")
+                else:
+                    stats["skipped_layers"].append(f"{key}: {recovery_method}")
             else:
                 sd_fp8[key] = weight
                 stats["skipped_layers"].append(f"{key}: non-float dtype")
         base_name = os.path.splitext(os.path.basename(safetensors_path))[0]
         fp8_path = os.path.join(output_dir, f"{base_name}-fp8-{fp8_format}.safetensors")
+        recovery_path = os.path.join(output_dir, f"{base_name}-recovery.safetensors")
         save_file(sd_fp8, fp8_path, metadata={"format": "pt", "fp8_format": fp8_format, **metadata})
+        if recovery_weights:
+            save_file(recovery_weights, recovery_path, metadata={
                 "format": "pt",
+                "fp8_format": fp8_format,
+                "recovery_config": json.dumps(recovery_config),
+                "stats": json.dumps(stats)
+            })
+        progress(0.9, desc="Saved FP8 and recovery files.")
+        progress(1.0, desc="✅ FP8 + recovery extraction complete!")
+        stats_msg = f"FP8 ({fp8_format}) and recovery saved.\n"
+        stats_msg += f"- Total layers: {stats['total_layers']}\n"
+        stats_msg += f"- Processed: {stats['processed_layers']} ({stats['recovery_type_counts']['lora']} LoRA + {stats['recovery_type_counts']['diff']} Diff)\n"
+        if stats["processed_layers"] == 0:
+            stats_msg += "\n⚠️ No recovery weights generated. Check your rules and rank settings."
         return True, stats_msg, stats
     except Exception as e:
+        return False, str(e), None
+def generate_config_from_rules(rules_input):
+    """Parse multi-line rule input into config."""
+    config = []
+    for line in rules_input.strip().split('\n'):
+        line = line.strip()
+        if not line or line.startswith('#'):
+            continue
+        parts = [p.strip() for p in line.split(',')]
+        if len(parts) >= 2:
+            element = parts[0]
+            method = parts[1].lower()
+            rank = 64
+            if method == "lora" and len(parts) >= 3:
+                try:
+                    rank = int(parts[2])
+                except ValueError:
+                    pass
+            config.append({"element": element, "method": method, "rank": rank})
+    return config
 def parse_hf_url(url):
     url = url.strip().rstrip("/")
     repo_url,
     safetensors_filename,
     fp8_format,
+    recovery_rules,
     target_type,
     new_repo_id,
     hf_token,
         return None, "❌ Hugging Face token required for source.", ""
     if target_type == "huggingface" and not hf_token:
         return None, "❌ Hugging Face token required for target.", ""
+    recovery_config = generate_config_from_rules(recovery_rules)
+    if not recovery_config:
+        recovery_config = [{"element": "all", "method": "none"}]
     temp_dir = None
     output_dir = tempfile.mkdtemp()
             source_type, repo_url, safetensors_filename, hf_token, progress
         )
+        progress(0.25, desc="Converting to FP8 with recovery...")
+        success, msg, stats = convert_safetensors_to_fp8_with_recovery(
+            safetensors_path, output_dir, fp8_format, recovery_config, progress
         )
         if not success:
         base_name = os.path.splitext(safetensors_filename)[0]
         fp8_filename = f"{base_name}-fp8-{fp8_format}.safetensors"
+        recovery_filename = f"{base_name}-recovery.safetensors"
         readme = f"""---
 library_name: diffusers
 - fp8
 - safetensors
 - precision-recovery
+- mixed-method
 - converted-by-gradio
 ---
+# FP8 Model with Custom Precision Recovery
 - **Source**: `{repo_url}`
 - **File**: `{safetensors_filename}`
 - **FP8 Format**: `{fp8_format.upper()}`
+- **Recovery File**: `{recovery_filename}` (contains both LoRA and Difference weights)
+## Recovery Rules Used
+```
+{recovery_rules}
+```
+## Usage
 ```python
 from safetensors.torch import load_file
 import torch
 fp8_state = load_file("{fp8_filename}")
+recovery_state = load_file("{recovery_filename}")
 reconstructed = {{}}
 for key in fp8_state:
+    fp8_weight = fp8_state[key].to(torch.float32)
+    # Apply LoRA if present
+    if f"lora_A.{{key}}" in recovery_state and f"lora_B.{{key}}" in recovery_state:
+        A = recovery_state[f"lora_A.{{key}}"].to(torch.float32)
+        B = recovery_state[f"lora_B.{{key}}"].to(torch.float32)
+        lora_weight = B @ A
+        fp8_weight = fp8_weight + lora_weight
+    # Apply Difference if present
+    if f"diff.{{key}}" in recovery_state:
+        diff = recovery_state[f"diff.{{key}}"].to(torch.float32)
+        fp8_weight = fp8_weight + diff
+    reconstructed[key] = fp8_weight
 ```
 """
         with open(os.path.join(output_dir, "README.md"), "w") as f:
             f.write(readme)
             )
         progress(1.0, desc="✅ Done!")
         result_html = f"""
 ✅ Success!
 Model uploaded to: <a href="{repo_url_final}" target="_blank">{new_repo_id}</a>
+Includes FP8 + custom recovery weights.
 """
+        return gr.HTML(result_html), "✅ FP8 + recovery upload successful!", msg
     except Exception as e:
+        return None, f"❌ Error: {str(e)}", ""
     finally:
         if temp_dir:
             shutil.rmtree(temp_dir, ignore_errors=True)
         shutil.rmtree(output_dir, ignore_errors=True)
+with gr.Blocks(title="FP8 + Custom Recovery Extractor") as demo:
+    gr.Markdown("# 🔄 FP8 Quantizer with Per-Layer Recovery Control")
+    gr.Markdown("Specify **exact recovery method per layer/tensor** using pattern matching. Supports LoRA and Difference methods simultaneously.")
     with gr.Row():
         with gr.Column():
             repo_url = gr.Textbox(label="Repo URL or ID", placeholder="https://huggingface.co/... or modelscope-id")
             safetensors_filename = gr.Textbox(label="Filename", placeholder="model.safetensors")
+            with gr.Accordion("FP8 Settings", open=True):
                 fp8_format = gr.Radio(["e4m3fn", "e5m2"], value="e5m2", label="FP8 Format")
+            with gr.Accordion("Recovery Rules (Layer/Tensor Level)", open=True):
+                gr.Markdown("""
+                Define recovery rules **one per line** in format:
+                `layer_pattern, method [, rank]`
+                - `layer_pattern`: substring to match in weight key (case-insensitive)
+                - `method`: `lora` or `diff` or `none`
+                - `rank`: LoRA rank (only for `lora` method)
+                **Rules are applied in order** – first match wins.
+                """)
+                recovery_rules = gr.Textbox(
+                    value="vae, diff\nencoder, diff\ndecoder, diff\ntext, lora, 64\nattn, lora, 128\nall, none",
+                    lines=8,
+                    label="Recovery Rules"
                 )
             with gr.Accordion("Authentication", open=False):
                 hf_token = gr.Textbox(label="Hugging Face Token", type="password")
+                modelscope_token = gr.Textbox(label="ModelScope Token (optional)", type="password", visible=MODELScope_AVAILABLE)
         with gr.Column():
             target_type = gr.Radio(["huggingface", "modelscope"], value="huggingface", label="Target")
+            new_repo_id = gr.Textbox(label="New Repo ID", placeholder="user/model-fp8")
             private_repo = gr.Checkbox(label="Private Repository (HF only)", value=False)
             status_output = gr.Markdown()
             repo_url,
             safetensors_filename,
             fp8_format,
+            recovery_rules,
             target_type,
             new_repo_id,
             hf_token,
     gr.Examples(
         examples=[
+            [
+                "huggingface",
+                "https://huggingface.co/stabilityai/sdxl-vae",
+                "diffusion_pytorch_model.safetensors",
+                "e5m2",
+                "vae, diff\nencoder, diff\ndecoder, diff\nall, none",
+                "huggingface"
+            ],
+            [
+                "huggingface",
+                "https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/text_encoder",
+                "model.safetensors",
+                "e5m2",
+                "text, lora, 64\nemb, lora, 64\nall, none",
+                "huggingface"
+            ]
         ],
+        inputs=[source_type, repo_url, safetensors_filename, fp8_format, recovery_rules, target_type],
         label="Example Conversions"
     )
     gr.Markdown("""
+    ## 💡 Recovery Strategy Guide
+    ### **Difference Method (Recommended for VAE/Convs)**
+    - Use for: `vae`, `encoder`, `decoder`, `conv` layers
+    - Captures exact quantization error
+    - Works with 4D tensors that LoRA cannot handle
+    ### **LoRA Method (Recommended for Attention/Linear)**
+    - Use for: `text`, `attn`, `mlp`, `transformer` layers
+    - Use rank 32-128 depending on layer importance
+    - Only works on 2D tensors
+    ### **Rule Ordering Tips**
+    - Put specific patterns first (`vae.encoder`) before general ones (`vae`)
+    - End with `all, none` to set default behavior
+    - Layer names are **case-insensitive**
+    > This implementation restores the successful VAE difference method while adding full per-layer control.
     """)
 demo.launch()