Spaces:

rootlocalghost
/

Z-Image-Model-Quantizer

Running

App Files Files Community

rootlocalghost commited on 12 days ago

Commit

281d59c

verified ·

1 Parent(s): 98c9be7

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -40

app.py CHANGED Viewed

@@ -6,78 +6,84 @@ import gradio as gr
 from huggingface_hub import HfApi, hf_hub_download
 from safetensors.torch import load_file, save_file
-SOURCE_REPO = "Tongyi-MAI/Z-Image-Turbo"
-TARGET_REPO = "rootlocalghost/Z-Image-Turbo-FP8"
 TEMP_DIR = "temp_processing_dir"
-def convert_and_upload(token):
     if not token:
         yield "❌ Error: Please provide a valid Hugging Face Write Token."
         return
     api = HfApi(token=token)
-    yield f"🔄 Connecting to Hugging Face and verifying target repo: {TARGET_REPO}..."
-    # Ensure the target repo exists, create it if it doesn't
     try:
-        api.create_repo(repo_id=TARGET_REPO, exist_ok=True, private=False)
     except Exception as e:
         yield f"❌ Error checking/creating repo: {str(e)}\nMake sure your token has 'Write' permissions."
         return
-    yield "📋 Fetching file list from the source repository..."
     try:
-        files = api.list_repo_files(SOURCE_REPO)
     except Exception as e:
         yield f"❌ Error fetching files: {str(e)}"
         return
-    # Create a temporary directory for safe local processing
     os.makedirs(TEMP_DIR, exist_ok=True)
     for file in files:
         yield f"⏳ Processing {file}..."
         try:
-            # Download file locally without using the central symlink cache
-            # This is critical to prevent the 50GB Space disk from filling up
             local_path = hf_hub_download(
-                repo_id=SOURCE_REPO,
                 filename=file,
                 local_dir=TEMP_DIR,
                 local_dir_use_symlinks=False
             )
-            # Check if it's a safetensor file inside the target directories
             if file.endswith(".safetensors") and ("text_encoder/" in file or "transformer/" in file):
-                yield f"🧠 Quantizing {file} to FP8 (This may take a minute)..."
-                # Load tensors into RAM
                 tensors = load_file(local_path)
-                # Cast all floating point tensors to FP8
-                keys = list(tensors.keys())
-                for k in keys:
-                    if tensors[k].is_floating_point():
-                        tensors[k] = tensors[k].to(torch.float8_e4m3fn)
-                # Save the quantized tensors to a new temp file
                 converted_path = os.path.join(TEMP_DIR, "converted.safetensors")
                 save_file(tensors, converted_path)
-                # Wipe the tensors from RAM immediately to stay under the 16GB limit
                 del tensors
                 gc.collect()
-                yield f"☁️ Uploading FP8 version of {file}..."
                 api.upload_file(
                     path_or_fileobj=converted_path,
                     path_in_repo=file,
-                    repo_id=TARGET_REPO,
-                    commit_message=f"Upload FP8 quantized {file}"
                 )
-                # Clean up the converted file
                 os.remove(converted_path)
             else:
@@ -85,40 +91,55 @@ def convert_and_upload(token):
                 api.upload_file(
                     path_or_fileobj=local_path,
                     path_in_repo=file,
-                    repo_id=TARGET_REPO,
                     commit_message=f"Copy {file} from original repo"
                 )
-            # Delete the downloaded original file to free up disk space
             if os.path.exists(local_path):
                 os.remove(local_path)
-            # Final sweep of memory before the next file
             gc.collect()
         except Exception as e:
             yield f"⚠️ Error processing {file}: {str(e)}\nSkipping to next file..."
-    # Clean up the processing directory
     if os.path.exists(TEMP_DIR):
         shutil.rmtree(TEMP_DIR)
-    yield "✅ All files processed and successfully uploaded to your repository!"
-# Build the Gradio Web Interface
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🚀 Z-Image-Turbo FP8 Quantizer & Uploader")
     gr.Markdown(
-        f"This tool sequentially downloads files from `{SOURCE_REPO}`, quantizes the **text_encoder** and **transformer** "
-        f"`.safetensors` files to FP8 (`float8_e4m3fn`), and uploads everything to `{TARGET_REPO}`.\n\n"
-        "**Note:** Because we are using a free Space (2 vCPUs, 16GB RAM), this script is designed to process one file at a time "
-        "and aggressively clear memory/disk caches. It will take some time, but it won't crash."
     )
     with gr.Row():
         with gr.Column(scale=2):
             hf_token = gr.Textbox(
-                label="Hugging Face Token (Needs Write Access)",
                 type="password",
                 placeholder="hf_..."
             )
@@ -127,14 +148,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         with gr.Column(scale=3):
             output_log = gr.Textbox(
                 label="Operation Logs",
-                lines=15,
                 interactive=False,
                 max_lines=20
             )
     start_btn.click(
         fn=convert_and_upload,
-        inputs=[hf_token],
         outputs=[output_log]
     )

 from huggingface_hub import HfApi, hf_hub_download
 from safetensors.torch import load_file, save_file
 TEMP_DIR = "temp_processing_dir"
+def convert_and_upload(token, source_repo, target_repo, precision):
     if not token:
         yield "❌ Error: Please provide a valid Hugging Face Write Token."
         return
+    if not target_repo.strip():
+        yield "❌ Error: Please specify a Target Repository."
+        return
+    # Map precision string to PyTorch dtype
+    if precision == "FP8":
+        target_dtype = torch.float8_e4m3fn
+    elif precision == "FP16":
+        target_dtype = torch.float16
+    elif precision == "BF16":
+        target_dtype = torch.bfloat16
+    else:
+        target_dtype = None
     api = HfApi(token=token)
+    yield f"🔄 Connecting to Hugging Face and verifying target repo: {target_repo}..."
     try:
+        api.create_repo(repo_id=target_repo, exist_ok=True, private=False)
     except Exception as e:
         yield f"❌ Error checking/creating repo: {str(e)}\nMake sure your token has 'Write' permissions."
         return
+    yield f"📋 Fetching file list from {source_repo}..."
     try:
+        files = api.list_repo_files(source_repo)
     except Exception as e:
         yield f"❌ Error fetching files: {str(e)}"
         return
     os.makedirs(TEMP_DIR, exist_ok=True)
     for file in files:
         yield f"⏳ Processing {file}..."
         try:
+            # Download file locally, bypassing symlink cache to save space
             local_path = hf_hub_download(
+                repo_id=source_repo,
                 filename=file,
                 local_dir=TEMP_DIR,
                 local_dir_use_symlinks=False
             )
+            # Check if it's a target safetensor file
             if file.endswith(".safetensors") and ("text_encoder/" in file or "transformer/" in file):
+                yield f"🧠 Quantizing {file} to {precision}..."
                 tensors = load_file(local_path)
+                # Cast floating point tensors to the selected precision
+                if target_dtype:
+                    keys = list(tensors.keys())
+                    for k in keys:
+                        if tensors[k].is_floating_point():
+                            tensors[k] = tensors[k].to(target_dtype)
                 converted_path = os.path.join(TEMP_DIR, "converted.safetensors")
                 save_file(tensors, converted_path)
+                # Wipe tensors from RAM
                 del tensors
                 gc.collect()
+                yield f"☁️ Uploading {precision} version of {file}..."
                 api.upload_file(
                     path_or_fileobj=converted_path,
                     path_in_repo=file,
+                    repo_id=target_repo,
+                    commit_message=f"Upload {precision} quantized {file}"
                 )
                 os.remove(converted_path)
             else:
                 api.upload_file(
                     path_or_fileobj=local_path,
                     path_in_repo=file,
+                    repo_id=target_repo,
                     commit_message=f"Copy {file} from original repo"
                 )
+            # Cleanup original downloaded file
             if os.path.exists(local_path):
                 os.remove(local_path)
             gc.collect()
         except Exception as e:
             yield f"⚠️ Error processing {file}: {str(e)}\nSkipping to next file..."
     if os.path.exists(TEMP_DIR):
         shutil.rmtree(TEMP_DIR)
+    yield f"✅ All files processed and successfully uploaded to {target_repo}!"
+# Dynamic UI Update for Target Repo Name
+def update_target_repo(source, precision):
+    model_name = "Z-Image-Turbo" if "Turbo" in source else "Z-Image-Base"
+    return f"rootlocalghost/{model_name}-{precision}"
+# Build the Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🚀 Z-Image Quantizer & Uploader")
     gr.Markdown(
+        "Select your source model and desired precision. The tool will sequentially download, quantize the "
+        "**text_encoder** and **transformer** files, and upload everything to your target repository while keeping memory usage under 16GB."
     )
     with gr.Row():
         with gr.Column(scale=2):
+            source_repo = gr.Dropdown(
+                choices=["Tongyi-MAI/Z-Image", "Tongyi-MAI/Z-Image-Turbo"],
+                value="Tongyi-MAI/Z-Image-Turbo",
+                label="Source Repository"
+            )
+            precision = gr.Dropdown(
+                choices=["FP8", "FP16", "BF16"],
+                value="FP8",
+                label="Quantization Precision"
+            )
+            target_repo = gr.Textbox(
+                label="Target Repository",
+                value="rootlocalghost/Z-Image-Turbo-FP8"
+            )
             hf_token = gr.Textbox(
+                label="Hugging Face Token (Write Access)",
                 type="password",
                 placeholder="hf_..."
             )
         with gr.Column(scale=3):
             output_log = gr.Textbox(
                 label="Operation Logs",
+                lines=17,
                 interactive=False,
                 max_lines=20
             )
+    # Automatically update the target repo name when inputs change
+    source_repo.change(fn=update_target_repo, inputs=[source_repo, precision], outputs=[target_repo])
+    precision.change(fn=update_target_repo, inputs=[source_repo, precision], outputs=[target_repo])
     start_btn.click(
         fn=convert_and_upload,
+        inputs=[hf_token, source_repo, target_repo, precision],
         outputs=[output_log]
     )