Spaces:

rootlocalghost
/

Flux.2-Klein-Model-Quantizer

Sleeping

App Files Files Community

rootlocalghost commited on 9 days ago

Commit

735ea49

verified ·

1 Parent(s): 60b0a02

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -25

app.py CHANGED Viewed

@@ -2,12 +2,11 @@ import os
 import gc
 import torch
 import shutil
 import gradio as gr
 from huggingface_hub import HfApi, hf_hub_download
 from safetensors.torch import load_file, save_file
-TEMP_DIR = "temp_processing_dir"
 def convert_and_upload(token, source_repo, target_repo, precision, target_components):
     if not token:
         yield "❌ Error: Please provide a valid Hugging Face Write Token."
@@ -45,53 +44,55 @@ def convert_and_upload(token, source_repo, target_repo, precision, target_compon
         yield f"❌ Error fetching files: {str(e)}"
         return
-    os.makedirs(TEMP_DIR, exist_ok=True)
     for file in files:
-        # AUTO-DELETE/SKIP LOGIC: Detect large .safetensors files at the root level (no slashes in path)
         is_root_safetensor = "/" not in file and file.endswith(".safetensors")
         if is_root_safetensor:
             yield f"🗑️ Auto-skipping massive root model: {file}..."
             try:
-                # If pushing to an existing repo, explicitly delete the large root file if it exists there
                 api.delete_file(path_in_repo=file, repo_id=target_repo, token=token, commit_message=f"Auto-deleted massive root file {file}")
                 yield f"✅ Ensured {file} is removed from target repository."
             except Exception:
-                pass # File doesn't exist in target repo yet, which is fine
             continue
         yield f"⏳ Processing {file}..."
         try:
-            # Download file locally, bypassing symlink cache to save disk space
             local_path = hf_hub_download(
                 repo_id=source_repo,
                 filename=file,
-                local_dir=TEMP_DIR,
-                local_dir_use_symlinks=False
             )
-            # Check if this file belongs to one of the user-selected components (e.g., text_encoder, transformer)
             in_target_component = any(f"{comp}/" in file for comp in target_components)
-            # Intercept and quantize only if it's a safetensors file in a selected folder
             if file.endswith(".safetensors") and in_target_component:
-                yield f"🧠 Quantizing {file} to {precision}..."
                 tensors = load_file(local_path)
-                # Cast floating point tensors to the selected precision
                 if target_dtype:
                     keys = list(tensors.keys())
                     for k in keys:
                         if tensors[k].is_floating_point():
                             tensors[k] = tensors[k].to(target_dtype)
-                converted_path = os.path.join(TEMP_DIR, "converted.safetensors")
                 save_file(tensors, converted_path)
-                # Aggressive memory flush to prevent OOM
                 del tensors
                 gc.collect()
@@ -114,19 +115,23 @@ def convert_and_upload(token, source_repo, target_repo, precision, target_compon
                     commit_message=f"Copy {file} from original repo"
                 )
-            # Cleanup original downloaded file
-            if os.path.exists(local_path):
-                os.remove(local_path)
             gc.collect()
         except Exception as e:
             yield f"⚠️ Error processing {file}: {str(e)}\nSkipping to next file..."
-    if os.path.exists(TEMP_DIR):
-        shutil.rmtree(TEMP_DIR)
-    yield f"✅ All files processed and successfully uploaded to {target_repo}!"
 # Dynamic UI Update for Target Repo Name
 def update_target_repo(username, source, precision):
@@ -139,8 +144,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🚀 FLUX.2-klein Dedicated Quantizer")
     gr.Markdown(
         "Convert sharded **FLUX.2-klein** models (4B and 9B) to lower precisions (FP8, FP16, BF16).\n\n"
-        "**Auto-Delete OOM Protection:** This tool is strictly designed to handle the sharded `transformer` and `text_encoder` folders. "
-        "It will **automatically ignore and delete** the massive 16GB/7GB `.safetensors` files located at the root of the repository to ensure your 16GB RAM limit is never breached."
     )
     with gr.Row():
@@ -154,7 +159,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 label="Your Hugging Face Username",
                 placeholder="e.g., rootlocalghost"
             )
-            # Locked down to only FLUX.2-klein models
             source_repo = gr.Dropdown(
                 choices=[
                     "black-forest-labs/FLUX.2-klein-9B",
@@ -192,7 +196,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 max_lines=25
             )
-    # Automatically update the target repo name when inputs change
     inputs_to_watch = [hf_username, source_repo, precision]
     for inp in inputs_to_watch:
         inp.change(

 import gc
 import torch
 import shutil
+import uuid
 import gradio as gr
 from huggingface_hub import HfApi, hf_hub_download
 from safetensors.torch import load_file, save_file
 def convert_and_upload(token, source_repo, target_repo, precision, target_components):
     if not token:
         yield "❌ Error: Please provide a valid Hugging Face Write Token."
         yield f"❌ Error fetching files: {str(e)}"
         return
+    # Create a unique cache directory for this specific run to prevent collisions
+    cache_dir = f"./hf_cache_{uuid.uuid4().hex[:8]}"
+    success_count = 0
+    error_count = 0
     for file in files:
+        # AUTO-DELETE/SKIP LOGIC: Detect large .safetensors files at the root level
         is_root_safetensor = "/" not in file and file.endswith(".safetensors")
         if is_root_safetensor:
             yield f"🗑️ Auto-skipping massive root model: {file}..."
             try:
                 api.delete_file(path_in_repo=file, repo_id=target_repo, token=token, commit_message=f"Auto-deleted massive root file {file}")
                 yield f"✅ Ensured {file} is removed from target repository."
             except Exception:
+                pass
             continue
         yield f"⏳ Processing {file}..."
         try:
+            os.makedirs(cache_dir, exist_ok=True)
+            # CRITICAL FIX: Added token=token here so gated FLUX models don't block the download
             local_path = hf_hub_download(
                 repo_id=source_repo,
                 filename=file,
+                cache_dir=cache_dir,
+                token=token
             )
             in_target_component = any(f"{comp}/" in file for comp in target_components)
             if file.endswith(".safetensors") and in_target_component:
+                yield f"🧠 Quantizing {file} to {precision} (This will take a few minutes)..."
                 tensors = load_file(local_path)
                 if target_dtype:
                     keys = list(tensors.keys())
                     for k in keys:
                         if tensors[k].is_floating_point():
                             tensors[k] = tensors[k].to(target_dtype)
+                converted_path = "converted.safetensors"
                 save_file(tensors, converted_path)
+                # Aggressive memory flush
                 del tensors
                 gc.collect()
                     commit_message=f"Copy {file} from original repo"
                 )
+            success_count += 1
+            # EXTREME DISK CLEANUP: Nuke the cache directory after every file to prevent the 50GB Space Crash
+            if os.path.exists(cache_dir):
+                shutil.rmtree(cache_dir)
             gc.collect()
         except Exception as e:
+            error_count += 1
             yield f"⚠️ Error processing {file}: {str(e)}\nSkipping to next file..."
+    # Final cleanup sweep
+    if os.path.exists(cache_dir):
+        shutil.rmtree(cache_dir)
+    yield f"✅ Finished! Successfully processed {success_count} files. Errors encountered: {error_count}."
 # Dynamic UI Update for Target Repo Name
 def update_target_repo(username, source, precision):
     gr.Markdown("# 🚀 FLUX.2-klein Dedicated Quantizer")
     gr.Markdown(
         "Convert sharded **FLUX.2-klein** models (4B and 9B) to lower precisions (FP8, FP16, BF16).\n\n"
+        "**Auto-Delete & Disk Protection:** This tool actively purges Hugging Face's download cache after every single shard. "
+        "This ensures the 9B model won't crash the free Space by filling up the 50GB hard drive limit."
     )
     with gr.Row():
                 label="Your Hugging Face Username",
                 placeholder="e.g., rootlocalghost"
             )
             source_repo = gr.Dropdown(
                 choices=[
                     "black-forest-labs/FLUX.2-klein-9B",
                 max_lines=25
             )
     inputs_to_watch = [hf_username, source_repo, precision]
     for inp in inputs_to_watch:
         inp.change(