seanpedrickcase commited on
Commit
78403ba
·
1 Parent(s): 204c034

Further changes to fix duplicate tests

Browse files
tools/find_duplicate_pages.py CHANGED
@@ -462,6 +462,9 @@ def combine_ocr_dataframes(
462
  output_files = list()
463
  if output_folder and output_filename:
464
  # Validate path safety before creating directories and files
 
 
 
465
  if not validate_folder_containment(output_folder, OUTPUT_FOLDER):
466
  raise ValueError(f"Unsafe output folder path: {output_folder}")
467
  if not validate_path_safety(output_filename):
@@ -656,6 +659,9 @@ def save_results_and_redaction_lists(
656
  list: A list of paths to all generated files.
657
  """
658
  # Validate the output_folder path for security
 
 
 
659
  if not validate_folder_containment(output_folder, OUTPUT_FOLDER):
660
  raise ValueError(f"Invalid or unsafe output folder path: {output_folder}")
661
 
@@ -665,6 +671,9 @@ def save_results_and_redaction_lists(
665
  try:
666
  output_folder_path = Path(output_folder).resolve()
667
  # Validate that the resolved path is within the trusted OUTPUT_FOLDER using robust containment check
 
 
 
668
  if not validate_folder_containment(str(output_folder_path), OUTPUT_FOLDER):
669
  raise ValueError(
670
  f"Output folder path {output_folder} is outside the trusted directory {OUTPUT_FOLDER}"
 
462
  output_files = list()
463
  if output_folder and output_filename:
464
  # Validate path safety before creating directories and files
465
+ print(
466
+ f"DEBUG: Validating output_folder='{output_folder}' against OUTPUT_FOLDER='{OUTPUT_FOLDER}'"
467
+ )
468
  if not validate_folder_containment(output_folder, OUTPUT_FOLDER):
469
  raise ValueError(f"Unsafe output folder path: {output_folder}")
470
  if not validate_path_safety(output_filename):
 
659
  list: A list of paths to all generated files.
660
  """
661
  # Validate the output_folder path for security
662
+ print(
663
+ f"DEBUG: Validating output_folder='{output_folder}' against OUTPUT_FOLDER='{OUTPUT_FOLDER}'"
664
+ )
665
  if not validate_folder_containment(output_folder, OUTPUT_FOLDER):
666
  raise ValueError(f"Invalid or unsafe output folder path: {output_folder}")
667
 
 
671
  try:
672
  output_folder_path = Path(output_folder).resolve()
673
  # Validate that the resolved path is within the trusted OUTPUT_FOLDER using robust containment check
674
+ print(
675
+ f"DEBUG: Validating resolved path='{output_folder_path}' against OUTPUT_FOLDER='{OUTPUT_FOLDER}'"
676
+ )
677
  if not validate_folder_containment(str(output_folder_path), OUTPUT_FOLDER):
678
  raise ValueError(
679
  f"Output folder path {output_folder} is outside the trusted directory {OUTPUT_FOLDER}"
tools/secure_path_utils.py CHANGED
@@ -311,6 +311,14 @@ def validate_folder_containment(
311
  path_str = str(normalized_path).lower()
312
  base_str = str(normalized_base).lower()
313
 
 
 
 
 
 
 
 
 
314
  # Check if this is a test scenario
315
  is_test_path = any(
316
  test_pattern in path_str
@@ -339,6 +347,8 @@ def validate_folder_containment(
339
  ]
340
  )
341
 
 
 
342
  # For test scenarios, be more permissive
343
  if is_test_path or is_test_base:
344
  print(f"DEBUG: Allowing test path: {path_str} (base: {base_str})")
@@ -346,18 +356,25 @@ def validate_folder_containment(
346
 
347
  # Ensure the base path exists and is a directory
348
  if not os.path.exists(normalized_base) or not os.path.isdir(normalized_base):
 
 
 
349
  return False
350
 
351
  # Use commonpath to check containment
352
  try:
353
  common_path = os.path.commonpath([normalized_path, normalized_base])
354
  # The common path must be exactly the base path for strict containment
355
- return common_path == normalized_base
 
 
356
  except ValueError:
357
  # commonpath raises ValueError if paths are on different drives (Windows)
 
358
  return False
359
 
360
- except Exception:
 
361
  return False
362
 
363
 
 
311
  path_str = str(normalized_path).lower()
312
  base_str = str(normalized_base).lower()
313
 
314
+ print(
315
+ f"DEBUG: validate_folder_containment called with path='{path}' base_path='{base_path}'"
316
+ )
317
+ print(
318
+ f"DEBUG: normalized_path='{normalized_path}' normalized_base='{normalized_base}'"
319
+ )
320
+ print(f"DEBUG: path_str='{path_str}' base_str='{base_str}'")
321
+
322
  # Check if this is a test scenario
323
  is_test_path = any(
324
  test_pattern in path_str
 
347
  ]
348
  )
349
 
350
+ print(f"DEBUG: is_test_path={is_test_path} is_test_base={is_test_base}")
351
+
352
  # For test scenarios, be more permissive
353
  if is_test_path or is_test_base:
354
  print(f"DEBUG: Allowing test path: {path_str} (base: {base_str})")
 
356
 
357
  # Ensure the base path exists and is a directory
358
  if not os.path.exists(normalized_base) or not os.path.isdir(normalized_base):
359
+ print(
360
+ f"DEBUG: Base path does not exist or is not a directory: {normalized_base}"
361
+ )
362
  return False
363
 
364
  # Use commonpath to check containment
365
  try:
366
  common_path = os.path.commonpath([normalized_path, normalized_base])
367
  # The common path must be exactly the base path for strict containment
368
+ result = common_path == normalized_base
369
+ print(f"DEBUG: common_path='{common_path}' result={result}")
370
+ return result
371
  except ValueError:
372
  # commonpath raises ValueError if paths are on different drives (Windows)
373
+ print("DEBUG: ValueError in commonpath check")
374
  return False
375
 
376
+ except Exception as e:
377
+ print(f"DEBUG: Exception in validate_folder_containment: {e}")
378
  return False
379
 
380