Spaces:

rootlocalghost
/

Repo-Cloner

Sleeping

App Files Files Community

rootlocalghost commited on 8 days ago

Commit

7be3d75

verified ·

1 Parent(s): 1c3237a

Update app.py

Browse files

Files changed (1) hide show

app.py +169 -121

app.py CHANGED Viewed

@@ -1,141 +1,189 @@
 import os
-import gc
-import torch
 import shutil
-import gradio as gr
-from huggingface_hub import HfApi, hf_hub_download
-from safetensors.torch import load_file, save_file
-SOURCE_REPO = "Tongyi-MAI/Z-Image-Turbo"
-TARGET_REPO = "rootlocalghost/Z-Image-Turbo-FP8"
-TEMP_DIR = "temp_processing_dir"
-def convert_and_upload(token):
-    if not token:
-        yield "❌ Error: Please provide a valid Hugging Face Write Token."
-        return
-    api = HfApi(token=token)
-    yield f"🔄 Connecting to Hugging Face and verifying target repo: {TARGET_REPO}..."
-    # Ensure the target repo exists, create it if it doesn't
-    try:
-        api.create_repo(repo_id=TARGET_REPO, exist_ok=True, private=False)
-    except Exception as e:
-        yield f"❌ Error checking/creating repo: {str(e)}\nMake sure your token has 'Write' permissions."
-        return
-    yield "📋 Fetching file list from the source repository..."
-    try:
-        files = api.list_repo_files(SOURCE_REPO)
-    except Exception as e:
-        yield f"❌ Error fetching files: {str(e)}"
-        return
-    # Create a temporary directory for safe local processing
-    os.makedirs(TEMP_DIR, exist_ok=True)
-    for file in files:
-        yield f"⏳ Processing {file}..."
         try:
-            # Download file locally without using the central symlink cache
-            # This is critical to prevent the 50GB Space disk from filling up
-            local_path = hf_hub_download(
-                repo_id=SOURCE_REPO,
-                filename=file,
-                local_dir=TEMP_DIR,
-                local_dir_use_symlinks=False
-            )
-            # Check if it's a safetensor file inside the target directories
-            if file.endswith(".safetensors") and ("text_encoder/" in file or "transformer/" in file):
-                yield f"🧠 Quantizing {file} to FP8 (This may take a minute)..."
-                # Load tensors into RAM
-                tensors = load_file(local_path)
-                # Cast all floating point tensors to FP8
-                keys = list(tensors.keys())
-                for k in keys:
-                    if tensors[k].is_floating_point():
-                        tensors[k] = tensors[k].to(torch.float8_e4m3fn)
-                # Save the quantized tensors to a new temp file
-                converted_path = os.path.join(TEMP_DIR, "converted.safetensors")
-                save_file(tensors, converted_path)
-                # Wipe the tensors from RAM immediately to stay under the 16GB limit
-                del tensors
-                gc.collect()
-                yield f"☁️ Uploading FP8 version of {file}..."
-                api.upload_file(
-                    path_or_fileobj=converted_path,
-                    path_in_repo=file,
-                    repo_id=TARGET_REPO,
-                    commit_message=f"Upload FP8 quantized {file}"
                 )
-                # Clean up the converted file
-                os.remove(converted_path)
-            else:
-                yield f"☁️ Copying {file} as-is..."
                 api.upload_file(
-                    path_or_fileobj=local_path,
-                    path_in_repo=file,
-                    repo_id=TARGET_REPO,
-                    commit_message=f"Copy {file} from original repo"
                 )
-            # Delete the downloaded original file to free up disk space
-            if os.path.exists(local_path):
-                os.remove(local_path)
-            # Final sweep of memory before the next file
-            gc.collect()
-        except Exception as e:
-            yield f"⚠️ Error processing {file}: {str(e)}\nSkipping to next file..."
-    # Clean up the processing directory
-    if os.path.exists(TEMP_DIR):
-        shutil.rmtree(TEMP_DIR)
-    yield "✅ All files processed and successfully uploaded to your repository!"
-# Build the Gradio Web Interface
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🚀 Z-Image-Turbo FP8 Quantizer & Uploader")
     gr.Markdown(
-        f"This tool sequentially downloads files from `{SOURCE_REPO}`, quantizes the **text_encoder** and **transformer** "
-        f"`.safetensors` files to FP8 (`float8_e4m3fn`), and uploads everything to `{TARGET_REPO}`.\n\n"
-        "**Note:** Because we are using a free Space (2 vCPUs, 16GB RAM), this script is designed to process one file at a time "
-        "and aggressively clear memory/disk caches. It will take some time, but it won't crash."
     )
     with gr.Row():
-        with gr.Column(scale=2):
-            hf_token = gr.Textbox(
-                label="Hugging Face Token (Needs Write Access)",
-                type="password",
-                placeholder="hf_..."
-            )
-            start_btn = gr.Button("Start Quantization & Upload", variant="primary")
-        with gr.Column(scale=3):
-            output_log = gr.Textbox(
-                label="Operation Logs",
-                lines=15,
-                interactive=False,
-                max_lines=20
-            )
-    start_btn.click(
-        fn=convert_and_upload,
-        inputs=[hf_token],
-        outputs=[output_log]
     )
 if __name__ == "__main__":

+import gradio as gr
+from huggingface_hub import HfApi, hf_hub_download, BucketFile, BucketFolder
 import os
+import re
+import time
 import shutil
+import tempfile
+RATE_LIMIT_PATTERN = re.compile(r"Retry after\s*(\d+)\s*seconds", re.IGNORECASE)
+def _is_rate_limit_error(exc: Exception) -> bool:
+    message = str(exc).lower()
+    return "429" in message or "too many requests" in message or "rate limit" in message
+def _get_retry_after(exc: Exception, default: int = 2) -> int:
+    match = RATE_LIMIT_PATTERN.search(str(exc))
+    if match:
+        return int(match.group(1))
+    return default
+def _retry_api_call(fn, *args, retries: int = 3, **kwargs):
+    delay = 2
+    for attempt in range(1, retries + 1):
         try:
+            return fn(*args, **kwargs)
+        except Exception as exc:
+            if not _is_rate_limit_error(exc) or attempt == retries:
+                raise
+            wait = _get_retry_after(exc, delay)
+            time.sleep(wait)
+            delay = min(delay * 2, 60)
+def _format_bucket_uri(bucket_id: str) -> str:
+    bucket_id = bucket_id.strip()
+    if bucket_id.startswith("hf://buckets/"):
+        return bucket_id
+    if bucket_id.startswith("buckets/"):
+        return f"hf://{bucket_id}"
+    return f"hf://buckets/{bucket_id}"
+def _stream_clone_repo(source_repo, target_repo, repo_type, api, hf_token):
+    file_paths = api.list_repo_files(
+        repo_id=source_repo,
+        repo_type=repo_type,
+        token=hf_token,
+    )
+    if not file_paths:
+        raise ValueError("source repo is empty or could not be listed")
+    with tempfile.TemporaryDirectory(prefix="hf_file_") as root_dir:
+        for file_path in file_paths:
+            if file_path.endswith("/"):
+                continue
+            try:
+                downloaded_path = hf_hub_download(
+                    repo_id=source_repo,
+                    filename=file_path,
+                    repo_type=repo_type,
+                    local_dir=root_dir,
+                    local_dir_use_symlinks=False,
+                    token=hf_token,
                 )
+                if not os.path.isfile(downloaded_path):
+                    raise ValueError(f"Downloaded file not found: {downloaded_path}")
                 api.upload_file(
+                    path_or_fileobj=downloaded_path,
+                    path_in_repo=file_path,
+                    repo_id=target_repo,
+                    repo_type=repo_type,
+                    commit_message=f"clone {file_path}",
+                    token=hf_token,
                 )
+            finally:
+                if os.path.exists(downloaded_path):
+                    os.remove(downloaded_path)
+def _upload_local_source(source_path, target_repo, repo_type, api):
+    if not os.path.isdir(source_path):
+        raise ValueError("Local source path must be an existing directory.")
+    api.upload_large_folder(
+        repo_id=target_repo,
+        folder_path=source_path,
+        repo_type=repo_type,
+        num_workers=1,
+        print_report=False,
+    )
+def _stream_clone_bucket(source_repo, target_repo, repo_type, api, hf_token):
+    bucket_uri = _format_bucket_uri(source_repo)
+    bucket_id = bucket_uri[len("hf://"):]
+    items = api.list_bucket_tree(bucket_id=bucket_id, recursive=True, token=hf_token)
+    with tempfile.TemporaryDirectory(prefix="hf_file_") as root_dir:
+        for item in items:
+            if isinstance(item, BucketFolder):
+                continue
+            if isinstance(item, BucketFile):
+                local_path = os.path.join(root_dir, item.path)
+                os.makedirs(os.path.dirname(local_path), exist_ok=True)
+                try:
+                    api.download_bucket_files(
+                        bucket_id=bucket_id,
+                        files=[(item.path, local_path)],
+                        token=hf_token,
+                    )
+                    api.upload_file(
+                        path_or_fileobj=local_path,
+                        path_in_repo=item.path,
+                        repo_id=target_repo,
+                        repo_type=repo_type,
+                        commit_message=f"clone {item.path}",
+                        token=hf_token,
+                    )
+                finally:
+                    if os.path.exists(local_path):
+                        os.remove(local_path)
+def stealth_clone_hf_repo(hf_token_ui, source_repo, source_type, target_repo, repo_type):
+    # Prioritize the token pasted by the user. Fallback to Space secrets if empty.
+    hf_token = hf_token_ui.strip() if hf_token_ui.strip() else os.environ.get("HF_TOKEN")
+    if not hf_token:
+        return "error: Please provide a valid Hugging Face Write Token."
+    api = HfApi(token=hf_token)
+    try:
+        _retry_api_call(
+            api.create_repo,
+            repo_id=target_repo,
+            repo_type=repo_type,
+            exist_ok=True,
+        )
+        if source_type == "bucket":
+            _stream_clone_bucket(source_repo, target_repo, repo_type, api, hf_token)
+        elif source_type == "local":
+            _upload_local_source(source_repo, target_repo, repo_type, api)
+        else:
+            _stream_clone_repo(source_repo, target_repo, repo_type, api, hf_token)
+        return f"success! cleanly cloned {source_repo} to {target_repo} with no tags."
+    except Exception as e:
+        return f"error: {type(e).__name__}: {str(e)}"
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🥷 Hugging Face Stealth Cloner")
     gr.Markdown(
+        "Clone repositories, datasets, or HF buckets cleanly **without** the 'duplicated from' tag showing up on the new repository.\n\n"
+        "**Note:** To use this tool, you must provide your own Hugging Face token with **Write** permissions so it can push files to your account."
     )
     with gr.Row():
+        hf_token_input = gr.Textbox(
+            label="Hugging Face Token (Write Access)",
+            type="password",
+            placeholder="hf_..."
+        )
+    with gr.Row():
+        source_input = gr.Textbox(
+            label="Source Repo, Bucket ID, or Local Path",
+            placeholder="e.g. source-user/source-model, username/my-bucket"
+        )
+        source_type_input = gr.Radio(
+            choices=["repo", "bucket", "local"],
+            value="repo",
+            label="Source Type"
+        )
+    with gr.Row():
+        target_input = gr.Textbox(
+            label="Target Repo ID",
+            placeholder="e.g. your-username/cloned-model"
+        )
+        repo_type_input = gr.Radio(
+            choices=["model", "dataset", "space"],
+            value="model",
+            label="Target Repository Type"
+        )
+    clone_btn = gr.Button("Stealth Clone Repo", variant="primary")
+    output = gr.Textbox(label="Status", lines=2)
+    clone_btn.click(
+        fn=stealth_clone_hf_repo,
+        inputs=[hf_token_input, source_input, source_type_input, target_input, repo_type_input],
+        outputs=output
     )
 if __name__ == "__main__":