chuckfinca Claude Opus 4.6 (1M context) commited on
Commit
4006728
·
1 Parent(s): 6173f83

Add file count, total size, and per-file size validation

Browse files

Max 50 files, 50MB total, 10MB per file. Clear error messages
for each validation failure. Warnings returned for skipped
files when some files are still valid.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +55 -11
app.py CHANGED
@@ -186,21 +186,57 @@ def upload_trace(result: dict) -> None:
186
 
187
 
188
  ALLOWED_EXTENSIONS = {".txt", ".csv", ".md", ".json", ".pdf"}
189
- MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
 
 
190
 
191
 
192
- def save_uploaded_files(file_paths: list[str]) -> Path:
193
- workspace = Path(tempfile.mkdtemp(prefix="lh-workspace-"))
 
 
 
 
 
 
 
 
 
194
  for file_path in file_paths:
195
  src = Path(file_path)
 
 
 
 
196
  if src.suffix.lower() not in ALLOWED_EXTENSIONS:
197
- print(f"WARNING: skipping file with disallowed extension: {src.name}")
 
198
  continue
199
- if src.stat().st_size > MAX_FILE_SIZE:
200
- print(f"WARNING: skipping file exceeding size limit: {src.name}")
 
 
 
201
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  (workspace / src.name).write_bytes(src.read_bytes())
203
- return workspace
 
204
 
205
 
206
  def format_stats(trace) -> str:
@@ -368,14 +404,22 @@ def build_app() -> gr.Blocks:
368
  token = data.get("token", "")
369
  file_paths = data.get("paths", [])
370
  except (json.JSONDecodeError, AttributeError):
371
- return json.dumps({"error": "Invalid payload."})
372
  if token and not _has_valid_token(token):
373
  return json.dumps({"error": "Invalid access token."})
374
  if not file_paths:
375
- return json.dumps({"error": "No files provided."})
376
- workspace = save_uploaded_files(file_paths)
 
 
 
 
377
  session_id = _create_session(str(workspace))
378
- return json.dumps({"session_id": session_id, "file_count": len(file_paths)})
 
 
 
 
379
 
380
  upload_btn = gr.Button(visible=False)
381
  upload_btn.click(api_upload, inputs=upload_input, outputs=upload_output, api_name="upload")
 
186
 
187
 
188
  ALLOWED_EXTENSIONS = {".txt", ".csv", ".md", ".json", ".pdf"}
189
+ MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB per file
190
+ MAX_TOTAL_SIZE = 50 * 1024 * 1024 # 50 MB total
191
+ MAX_FILE_COUNT = 50
192
 
193
 
194
+ def validate_and_save_files(file_paths: list[str]) -> tuple[Path | None, list[str]]:
195
+ """Validate and save uploaded files. Returns (workspace, errors)."""
196
+ errors = []
197
+
198
+ if len(file_paths) > MAX_FILE_COUNT:
199
+ errors.append(f"Too many files ({len(file_paths)}). Maximum is {MAX_FILE_COUNT}.")
200
+ return None, errors
201
+
202
+ valid_files = []
203
+ total_size = 0
204
+
205
  for file_path in file_paths:
206
  src = Path(file_path)
207
+
208
+ if not src.is_file():
209
+ continue
210
+
211
  if src.suffix.lower() not in ALLOWED_EXTENSIONS:
212
+ allowed = ", ".join(sorted(ALLOWED_EXTENSIONS))
213
+ errors.append(f"'{src.name}' has an unsupported file type. Allowed: {allowed}")
214
  continue
215
+
216
+ size = src.stat().st_size
217
+ if size > MAX_FILE_SIZE:
218
+ limit_mb = MAX_FILE_SIZE // (1024 * 1024)
219
+ errors.append(f"'{src.name}' is too large ({size // (1024 * 1024)}MB). Maximum is {limit_mb}MB per file.")
220
  continue
221
+
222
+ total_size += size
223
+ if total_size > MAX_TOTAL_SIZE:
224
+ limit_mb = MAX_TOTAL_SIZE // (1024 * 1024)
225
+ errors.append(f"Total upload size exceeds {limit_mb}MB. Remove some files and try again.")
226
+ return None, errors
227
+
228
+ valid_files.append(src)
229
+
230
+ if not valid_files:
231
+ if not errors:
232
+ errors.append("No valid files to upload.")
233
+ return None, errors
234
+
235
+ workspace = Path(tempfile.mkdtemp(prefix="lh-workspace-"))
236
+ for src in valid_files:
237
  (workspace / src.name).write_bytes(src.read_bytes())
238
+
239
+ return workspace, errors
240
 
241
 
242
  def format_stats(trace) -> str:
 
404
  token = data.get("token", "")
405
  file_paths = data.get("paths", [])
406
  except (json.JSONDecodeError, AttributeError):
407
+ return json.dumps({"error": "Invalid upload request."})
408
  if token and not _has_valid_token(token):
409
  return json.dumps({"error": "Invalid access token."})
410
  if not file_paths:
411
+ return json.dumps({"error": "No files selected. Please upload at least one document."})
412
+
413
+ workspace, errors = validate_and_save_files(file_paths)
414
+ if not workspace:
415
+ return json.dumps({"error": " ".join(errors)})
416
+
417
  session_id = _create_session(str(workspace))
418
+ saved = sum(1 for f in workspace.iterdir() if f.is_file())
419
+ result = {"session_id": session_id, "file_count": saved}
420
+ if errors:
421
+ result["warnings"] = errors
422
+ return json.dumps(result)
423
 
424
  upload_btn = gr.Button(visible=False)
425
  upload_btn.click(api_upload, inputs=upload_input, outputs=upload_output, api_name="upload")