Vedisasi's picture
Upload folder using huggingface_hub
54c5666 verified
"""Security validation for inputs and configs"""
import os
from pathlib import Path
from typing import Dict, Any, List
import logging
import re
logger = logging.getLogger(__name__)
class SecurityValidator:
"""Security validation utilities"""
# Dangerous patterns to check
DANGEROUS_PATTERNS = [
r'__import__',
r'eval\s*\(',
r'exec\s*\(',
r'compile\s*\(',
r'os\.system',
r'subprocess',
r'open\s*\(',
]
@staticmethod
def check_code_injection(text: str) -> bool:
"""
Check for potential code injection
Args:
text: String to check
Returns:
True if safe, False if dangerous patterns detected
"""
for pattern in SecurityValidator.DANGEROUS_PATTERNS:
if re.search(pattern, text, re.IGNORECASE):
logger.warning(f"Dangerous pattern detected: {pattern}")
return False
return True
def validate_model_path(path: str, allowed_dirs: List[str] = None) -> bool:
"""
Validate model path to prevent directory traversal
Args:
path: Path to validate
allowed_dirs: List of allowed base directories
Returns:
True if path is safe, False otherwise
Raises:
ValueError: If path is dangerous
FileNotFoundError: If path doesn't exist
"""
# Resolve to absolute path
try:
abs_path = Path(path).resolve()
except Exception as e:
raise ValueError(f"Invalid path: {path}") from e
# Check for directory traversal
if '..' in str(abs_path):
raise ValueError("Directory traversal detected in path")
# Check if path contains suspicious characters
suspicious_chars = ['|', ';', '&', '$', '`']
if any(char in str(path) for char in suspicious_chars):
raise ValueError(f"Suspicious characters in path: {path}")
# Check allowed directories if specified
if allowed_dirs:
allowed_dirs_resolved = [Path(d).resolve() for d in allowed_dirs]
if not any(abs_path.is_relative_to(allowed_dir) for allowed_dir in allowed_dirs_resolved):
raise ValueError(f"Path not in allowed directories: {path}")
# Check file exists (optional - comment out if creating new files)
# if not abs_path.exists():
# raise FileNotFoundError(f"Path not found: {path}")
logger.debug(f"Path validated: {abs_path}")
return True
def sanitize_config(config: dict, max_depth: int = 10) -> dict:
"""
Sanitize configuration to prevent code injection
Args:
config: Configuration dictionary
max_depth: Maximum nesting depth to check
Returns:
Sanitized configuration
Raises:
ValueError: If dangerous configuration detected
"""
dangerous_keys = ['__import__', 'eval', 'exec', 'compile', 'open', 'input']
def check_dict(d: Dict[str, Any], depth: int = 0):
"""Recursively check dictionary"""
if depth > max_depth:
raise ValueError(f"Configuration nesting too deep: {depth}")
for key, value in d.items():
# Check key names
key_lower = str(key).lower()
if any(danger in key_lower for danger in dangerous_keys):
raise ValueError(f"Dangerous configuration key: {key}")
# Check for code in string values
if isinstance(value, str):
if not SecurityValidator.check_code_injection(value):
raise ValueError(f"Potential code injection in config value: {key}")
# Recursively check nested dicts
elif isinstance(value, dict):
check_dict(value, depth + 1)
# Check lists
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
check_dict(item, depth + 1)
elif isinstance(item, str):
if not SecurityValidator.check_code_injection(item):
raise ValueError(f"Potential code injection in config list: {key}")
# Create a copy and validate
sanitized = config.copy()
check_dict(sanitized)
logger.debug("Configuration sanitized successfully")
return sanitized
def validate_file_size(file_path: str, max_size_mb: int = 1000) -> bool:
"""
Validate file size to prevent resource exhaustion
Args:
file_path: Path to file
max_size_mb: Maximum file size in MB
Returns:
True if file size is acceptable
Raises:
ValueError: If file is too large
"""
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
size_mb = path.stat().st_size / (1024 * 1024)
if size_mb > max_size_mb:
raise ValueError(
f"File too large: {size_mb:.2f}MB (max: {max_size_mb}MB)"
)
return True
def sanitize_filename(filename: str) -> str:
"""
Sanitize filename to prevent path traversal
Args:
filename: Original filename
Returns:
Sanitized filename
"""
# Remove path separators
filename = os.path.basename(filename)
# Remove dangerous characters
dangerous_chars = ['..', '/', '\\', '\0', '|', ';', '&', '$', '`', '<', '>']
for char in dangerous_chars:
filename = filename.replace(char, '_')
# Limit length
max_length = 255
if len(filename) > max_length:
name, ext = os.path.splitext(filename)
filename = name[:max_length - len(ext)] + ext
return filename