Spaces:
Running
Running
Commit ·
4006728
1
Parent(s): 6173f83
Add file count, total size, and per-file size validation
Browse filesMax 50 files, 50MB total, 10MB per file. Clear error messages
for each validation failure. Warnings returned for skipped
files when some files are still valid.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
app.py
CHANGED
|
@@ -186,21 +186,57 @@ def upload_trace(result: dict) -> None:
|
|
| 186 |
|
| 187 |
|
| 188 |
ALLOWED_EXTENSIONS = {".txt", ".csv", ".md", ".json", ".pdf"}
|
| 189 |
-
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
|
|
|
|
|
|
|
| 190 |
|
| 191 |
|
| 192 |
-
def
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
for file_path in file_paths:
|
| 195 |
src = Path(file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
if src.suffix.lower() not in ALLOWED_EXTENSIONS:
|
| 197 |
-
|
|
|
|
| 198 |
continue
|
| 199 |
-
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
| 201 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
(workspace / src.name).write_bytes(src.read_bytes())
|
| 203 |
-
|
|
|
|
| 204 |
|
| 205 |
|
| 206 |
def format_stats(trace) -> str:
|
|
@@ -368,14 +404,22 @@ def build_app() -> gr.Blocks:
|
|
| 368 |
token = data.get("token", "")
|
| 369 |
file_paths = data.get("paths", [])
|
| 370 |
except (json.JSONDecodeError, AttributeError):
|
| 371 |
-
return json.dumps({"error": "Invalid
|
| 372 |
if token and not _has_valid_token(token):
|
| 373 |
return json.dumps({"error": "Invalid access token."})
|
| 374 |
if not file_paths:
|
| 375 |
-
return json.dumps({"error": "No files
|
| 376 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
session_id = _create_session(str(workspace))
|
| 378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
|
| 380 |
upload_btn = gr.Button(visible=False)
|
| 381 |
upload_btn.click(api_upload, inputs=upload_input, outputs=upload_output, api_name="upload")
|
|
|
|
| 186 |
|
| 187 |
|
| 188 |
ALLOWED_EXTENSIONS = {".txt", ".csv", ".md", ".json", ".pdf"}
|
| 189 |
+
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB per file
|
| 190 |
+
MAX_TOTAL_SIZE = 50 * 1024 * 1024 # 50 MB total
|
| 191 |
+
MAX_FILE_COUNT = 50
|
| 192 |
|
| 193 |
|
| 194 |
+
def validate_and_save_files(file_paths: list[str]) -> tuple[Path | None, list[str]]:
|
| 195 |
+
"""Validate and save uploaded files. Returns (workspace, errors)."""
|
| 196 |
+
errors = []
|
| 197 |
+
|
| 198 |
+
if len(file_paths) > MAX_FILE_COUNT:
|
| 199 |
+
errors.append(f"Too many files ({len(file_paths)}). Maximum is {MAX_FILE_COUNT}.")
|
| 200 |
+
return None, errors
|
| 201 |
+
|
| 202 |
+
valid_files = []
|
| 203 |
+
total_size = 0
|
| 204 |
+
|
| 205 |
for file_path in file_paths:
|
| 206 |
src = Path(file_path)
|
| 207 |
+
|
| 208 |
+
if not src.is_file():
|
| 209 |
+
continue
|
| 210 |
+
|
| 211 |
if src.suffix.lower() not in ALLOWED_EXTENSIONS:
|
| 212 |
+
allowed = ", ".join(sorted(ALLOWED_EXTENSIONS))
|
| 213 |
+
errors.append(f"'{src.name}' has an unsupported file type. Allowed: {allowed}")
|
| 214 |
continue
|
| 215 |
+
|
| 216 |
+
size = src.stat().st_size
|
| 217 |
+
if size > MAX_FILE_SIZE:
|
| 218 |
+
limit_mb = MAX_FILE_SIZE // (1024 * 1024)
|
| 219 |
+
errors.append(f"'{src.name}' is too large ({size // (1024 * 1024)}MB). Maximum is {limit_mb}MB per file.")
|
| 220 |
continue
|
| 221 |
+
|
| 222 |
+
total_size += size
|
| 223 |
+
if total_size > MAX_TOTAL_SIZE:
|
| 224 |
+
limit_mb = MAX_TOTAL_SIZE // (1024 * 1024)
|
| 225 |
+
errors.append(f"Total upload size exceeds {limit_mb}MB. Remove some files and try again.")
|
| 226 |
+
return None, errors
|
| 227 |
+
|
| 228 |
+
valid_files.append(src)
|
| 229 |
+
|
| 230 |
+
if not valid_files:
|
| 231 |
+
if not errors:
|
| 232 |
+
errors.append("No valid files to upload.")
|
| 233 |
+
return None, errors
|
| 234 |
+
|
| 235 |
+
workspace = Path(tempfile.mkdtemp(prefix="lh-workspace-"))
|
| 236 |
+
for src in valid_files:
|
| 237 |
(workspace / src.name).write_bytes(src.read_bytes())
|
| 238 |
+
|
| 239 |
+
return workspace, errors
|
| 240 |
|
| 241 |
|
| 242 |
def format_stats(trace) -> str:
|
|
|
|
| 404 |
token = data.get("token", "")
|
| 405 |
file_paths = data.get("paths", [])
|
| 406 |
except (json.JSONDecodeError, AttributeError):
|
| 407 |
+
return json.dumps({"error": "Invalid upload request."})
|
| 408 |
if token and not _has_valid_token(token):
|
| 409 |
return json.dumps({"error": "Invalid access token."})
|
| 410 |
if not file_paths:
|
| 411 |
+
return json.dumps({"error": "No files selected. Please upload at least one document."})
|
| 412 |
+
|
| 413 |
+
workspace, errors = validate_and_save_files(file_paths)
|
| 414 |
+
if not workspace:
|
| 415 |
+
return json.dumps({"error": " ".join(errors)})
|
| 416 |
+
|
| 417 |
session_id = _create_session(str(workspace))
|
| 418 |
+
saved = sum(1 for f in workspace.iterdir() if f.is_file())
|
| 419 |
+
result = {"session_id": session_id, "file_count": saved}
|
| 420 |
+
if errors:
|
| 421 |
+
result["warnings"] = errors
|
| 422 |
+
return json.dumps(result)
|
| 423 |
|
| 424 |
upload_btn = gr.Button(visible=False)
|
| 425 |
upload_btn.click(api_upload, inputs=upload_input, outputs=upload_output, api_name="upload")
|