rootlocalghost commited on
Commit
7be3d75
·
verified ·
1 Parent(s): 1c3237a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -121
app.py CHANGED
@@ -1,141 +1,189 @@
 
 
1
  import os
2
- import gc
3
- import torch
4
  import shutil
5
- import gradio as gr
6
- from huggingface_hub import HfApi, hf_hub_download
7
- from safetensors.torch import load_file, save_file
8
-
9
- SOURCE_REPO = "Tongyi-MAI/Z-Image-Turbo"
10
- TARGET_REPO = "rootlocalghost/Z-Image-Turbo-FP8"
11
- TEMP_DIR = "temp_processing_dir"
12
-
13
- def convert_and_upload(token):
14
- if not token:
15
- yield "❌ Error: Please provide a valid Hugging Face Write Token."
16
- return
17
 
18
- api = HfApi(token=token)
19
- yield f"🔄 Connecting to Hugging Face and verifying target repo: {TARGET_REPO}..."
20
 
21
- # Ensure the target repo exists, create it if it doesn't
22
- try:
23
- api.create_repo(repo_id=TARGET_REPO, exist_ok=True, private=False)
24
- except Exception as e:
25
- yield f"❌ Error checking/creating repo: {str(e)}\nMake sure your token has 'Write' permissions."
26
- return
27
 
28
- yield "📋 Fetching file list from the source repository..."
29
- try:
30
- files = api.list_repo_files(SOURCE_REPO)
31
- except Exception as e:
32
- yield f"❌ Error fetching files: {str(e)}"
33
- return
34
-
35
- # Create a temporary directory for safe local processing
36
- os.makedirs(TEMP_DIR, exist_ok=True)
37
-
38
- for file in files:
39
- yield f"⏳ Processing {file}..."
40
 
 
 
 
41
  try:
42
- # Download file locally without using the central symlink cache
43
- # This is critical to prevent the 50GB Space disk from filling up
44
- local_path = hf_hub_download(
45
- repo_id=SOURCE_REPO,
46
- filename=file,
47
- local_dir=TEMP_DIR,
48
- local_dir_use_symlinks=False
49
- )
50
-
51
- # Check if it's a safetensor file inside the target directories
52
- if file.endswith(".safetensors") and ("text_encoder/" in file or "transformer/" in file):
53
- yield f"🧠 Quantizing {file} to FP8 (This may take a minute)..."
54
-
55
- # Load tensors into RAM
56
- tensors = load_file(local_path)
57
-
58
- # Cast all floating point tensors to FP8
59
- keys = list(tensors.keys())
60
- for k in keys:
61
- if tensors[k].is_floating_point():
62
- tensors[k] = tensors[k].to(torch.float8_e4m3fn)
63
-
64
- # Save the quantized tensors to a new temp file
65
- converted_path = os.path.join(TEMP_DIR, "converted.safetensors")
66
- save_file(tensors, converted_path)
67
-
68
- # Wipe the tensors from RAM immediately to stay under the 16GB limit
69
- del tensors
70
- gc.collect()
71
-
72
- yield f"☁️ Uploading FP8 version of {file}..."
73
- api.upload_file(
74
- path_or_fileobj=converted_path,
75
- path_in_repo=file,
76
- repo_id=TARGET_REPO,
77
- commit_message=f"Upload FP8 quantized {file}"
 
78
  )
79
-
80
- # Clean up the converted file
81
- os.remove(converted_path)
82
-
83
- else:
84
- yield f"☁️ Copying {file} as-is..."
85
  api.upload_file(
86
- path_or_fileobj=local_path,
87
- path_in_repo=file,
88
- repo_id=TARGET_REPO,
89
- commit_message=f"Copy {file} from original repo"
 
 
90
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- # Delete the downloaded original file to free up disk space
93
- if os.path.exists(local_path):
94
- os.remove(local_path)
95
-
96
- # Final sweep of memory before the next file
97
- gc.collect()
98
-
99
- except Exception as e:
100
- yield f"⚠️ Error processing {file}: {str(e)}\nSkipping to next file..."
101
-
102
- # Clean up the processing directory
103
- if os.path.exists(TEMP_DIR):
104
- shutil.rmtree(TEMP_DIR)
105
-
106
- yield "✅ All files processed and successfully uploaded to your repository!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
- # Build the Gradio Web Interface
109
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
110
- gr.Markdown("# 🚀 Z-Image-Turbo FP8 Quantizer & Uploader")
111
  gr.Markdown(
112
- f"This tool sequentially downloads files from `{SOURCE_REPO}`, quantizes the **text_encoder** and **transformer** "
113
- f"`.safetensors` files to FP8 (`float8_e4m3fn`), and uploads everything to `{TARGET_REPO}`.\n\n"
114
- "**Note:** Because we are using a free Space (2 vCPUs, 16GB RAM), this script is designed to process one file at a time "
115
- "and aggressively clear memory/disk caches. It will take some time, but it won't crash."
116
  )
117
-
118
  with gr.Row():
119
- with gr.Column(scale=2):
120
- hf_token = gr.Textbox(
121
- label="Hugging Face Token (Needs Write Access)",
122
- type="password",
123
- placeholder="hf_..."
124
- )
125
- start_btn = gr.Button("Start Quantization & Upload", variant="primary")
126
 
127
- with gr.Column(scale=3):
128
- output_log = gr.Textbox(
129
- label="Operation Logs",
130
- lines=15,
131
- interactive=False,
132
- max_lines=20
133
- )
134
-
135
- start_btn.click(
136
- fn=convert_and_upload,
137
- inputs=[hf_token],
138
- outputs=[output_log]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  )
140
 
141
  if __name__ == "__main__":
 
1
+ import gradio as gr
2
+ from huggingface_hub import HfApi, hf_hub_download, BucketFile, BucketFolder
3
  import os
4
+ import re
5
+ import time
6
  import shutil
7
+ import tempfile
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ RATE_LIMIT_PATTERN = re.compile(r"Retry after\s*(\d+)\s*seconds", re.IGNORECASE)
 
10
 
11
+ def _is_rate_limit_error(exc: Exception) -> bool:
12
+ message = str(exc).lower()
13
+ return "429" in message or "too many requests" in message or "rate limit" in message
 
 
 
14
 
15
+ def _get_retry_after(exc: Exception, default: int = 2) -> int:
16
+ match = RATE_LIMIT_PATTERN.search(str(exc))
17
+ if match:
18
+ return int(match.group(1))
19
+ return default
 
 
 
 
 
 
 
20
 
21
+ def _retry_api_call(fn, *args, retries: int = 3, **kwargs):
22
+ delay = 2
23
+ for attempt in range(1, retries + 1):
24
  try:
25
+ return fn(*args, **kwargs)
26
+ except Exception as exc:
27
+ if not _is_rate_limit_error(exc) or attempt == retries:
28
+ raise
29
+ wait = _get_retry_after(exc, delay)
30
+ time.sleep(wait)
31
+ delay = min(delay * 2, 60)
32
+
33
+ def _format_bucket_uri(bucket_id: str) -> str:
34
+ bucket_id = bucket_id.strip()
35
+ if bucket_id.startswith("hf://buckets/"):
36
+ return bucket_id
37
+ if bucket_id.startswith("buckets/"):
38
+ return f"hf://{bucket_id}"
39
+ return f"hf://buckets/{bucket_id}"
40
+
41
+ def _stream_clone_repo(source_repo, target_repo, repo_type, api, hf_token):
42
+ file_paths = api.list_repo_files(
43
+ repo_id=source_repo,
44
+ repo_type=repo_type,
45
+ token=hf_token,
46
+ )
47
+ if not file_paths:
48
+ raise ValueError("source repo is empty or could not be listed")
49
+
50
+ with tempfile.TemporaryDirectory(prefix="hf_file_") as root_dir:
51
+ for file_path in file_paths:
52
+ if file_path.endswith("/"):
53
+ continue
54
+ try:
55
+ downloaded_path = hf_hub_download(
56
+ repo_id=source_repo,
57
+ filename=file_path,
58
+ repo_type=repo_type,
59
+ local_dir=root_dir,
60
+ local_dir_use_symlinks=False,
61
+ token=hf_token,
62
  )
63
+ if not os.path.isfile(downloaded_path):
64
+ raise ValueError(f"Downloaded file not found: {downloaded_path}")
 
 
 
 
65
  api.upload_file(
66
+ path_or_fileobj=downloaded_path,
67
+ path_in_repo=file_path,
68
+ repo_id=target_repo,
69
+ repo_type=repo_type,
70
+ commit_message=f"clone {file_path}",
71
+ token=hf_token,
72
  )
73
+ finally:
74
+ if os.path.exists(downloaded_path):
75
+ os.remove(downloaded_path)
76
+
77
+ def _upload_local_source(source_path, target_repo, repo_type, api):
78
+ if not os.path.isdir(source_path):
79
+ raise ValueError("Local source path must be an existing directory.")
80
+ api.upload_large_folder(
81
+ repo_id=target_repo,
82
+ folder_path=source_path,
83
+ repo_type=repo_type,
84
+ num_workers=1,
85
+ print_report=False,
86
+ )
87
 
88
+ def _stream_clone_bucket(source_repo, target_repo, repo_type, api, hf_token):
89
+ bucket_uri = _format_bucket_uri(source_repo)
90
+ bucket_id = bucket_uri[len("hf://"):]
91
+ items = api.list_bucket_tree(bucket_id=bucket_id, recursive=True, token=hf_token)
92
+ with tempfile.TemporaryDirectory(prefix="hf_file_") as root_dir:
93
+ for item in items:
94
+ if isinstance(item, BucketFolder):
95
+ continue
96
+ if isinstance(item, BucketFile):
97
+ local_path = os.path.join(root_dir, item.path)
98
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
99
+ try:
100
+ api.download_bucket_files(
101
+ bucket_id=bucket_id,
102
+ files=[(item.path, local_path)],
103
+ token=hf_token,
104
+ )
105
+ api.upload_file(
106
+ path_or_fileobj=local_path,
107
+ path_in_repo=item.path,
108
+ repo_id=target_repo,
109
+ repo_type=repo_type,
110
+ commit_message=f"clone {item.path}",
111
+ token=hf_token,
112
+ )
113
+ finally:
114
+ if os.path.exists(local_path):
115
+ os.remove(local_path)
116
+
117
+ def stealth_clone_hf_repo(hf_token_ui, source_repo, source_type, target_repo, repo_type):
118
+ # Prioritize the token pasted by the user. Fallback to Space secrets if empty.
119
+ hf_token = hf_token_ui.strip() if hf_token_ui.strip() else os.environ.get("HF_TOKEN")
120
+
121
+ if not hf_token:
122
+ return "error: Please provide a valid Hugging Face Write Token."
123
+
124
+ api = HfApi(token=hf_token)
125
+ try:
126
+ _retry_api_call(
127
+ api.create_repo,
128
+ repo_id=target_repo,
129
+ repo_type=repo_type,
130
+ exist_ok=True,
131
+ )
132
+
133
+ if source_type == "bucket":
134
+ _stream_clone_bucket(source_repo, target_repo, repo_type, api, hf_token)
135
+ elif source_type == "local":
136
+ _upload_local_source(source_repo, target_repo, repo_type, api)
137
+ else:
138
+ _stream_clone_repo(source_repo, target_repo, repo_type, api, hf_token)
139
+
140
+ return f"success! cleanly cloned {source_repo} to {target_repo} with no tags."
141
+ except Exception as e:
142
+ return f"error: {type(e).__name__}: {str(e)}"
143
 
 
144
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
145
+ gr.Markdown("## 🥷 Hugging Face Stealth Cloner")
146
  gr.Markdown(
147
+ "Clone repositories, datasets, or HF buckets cleanly **without** the 'duplicated from' tag showing up on the new repository.\n\n"
148
+ "**Note:** To use this tool, you must provide your own Hugging Face token with **Write** permissions so it can push files to your account."
 
 
149
  )
150
+
151
  with gr.Row():
152
+ hf_token_input = gr.Textbox(
153
+ label="Hugging Face Token (Write Access)",
154
+ type="password",
155
+ placeholder="hf_..."
156
+ )
 
 
157
 
158
+ with gr.Row():
159
+ source_input = gr.Textbox(
160
+ label="Source Repo, Bucket ID, or Local Path",
161
+ placeholder="e.g. source-user/source-model, username/my-bucket"
162
+ )
163
+ source_type_input = gr.Radio(
164
+ choices=["repo", "bucket", "local"],
165
+ value="repo",
166
+ label="Source Type"
167
+ )
168
+
169
+ with gr.Row():
170
+ target_input = gr.Textbox(
171
+ label="Target Repo ID",
172
+ placeholder="e.g. your-username/cloned-model"
173
+ )
174
+ repo_type_input = gr.Radio(
175
+ choices=["model", "dataset", "space"],
176
+ value="model",
177
+ label="Target Repository Type"
178
+ )
179
+
180
+ clone_btn = gr.Button("Stealth Clone Repo", variant="primary")
181
+ output = gr.Textbox(label="Status", lines=2)
182
+
183
+ clone_btn.click(
184
+ fn=stealth_clone_hf_repo,
185
+ inputs=[hf_token_input, source_input, source_type_input, target_input, repo_type_input],
186
+ outputs=output
187
  )
188
 
189
  if __name__ == "__main__":