Spaces:
Running
Running
""" | |
Validation utilities for security and input validation | |
""" | |
import os | |
import re | |
from typing import Optional | |
from urllib.parse import urlparse | |
class ValidationError(Exception): | |
"""Custom exception for validation errors.""" | |
pass | |
class Validators: | |
"""Collection of validation functions for security and input validation.""" | |
# Regex patterns for validation - allow numbers, letters, hyphens, underscores, dots | |
HUGGINGFACE_MODEL_PATTERN = re.compile(r'^[a-zA-Z0-9_\-\.]+/[a-zA-Z0-9_\-\.]+$') | |
SAFE_FILENAME_PATTERN = re.compile(r'^[a-zA-Z0-9_\-\.]+$') | |
def validate_model_path(model_path: str) -> bool: | |
""" | |
Validate that a custom model path is safe and follows expected patterns. | |
Args: | |
model_path: The model path to validate | |
Returns: | |
bool: True if valid, False otherwise | |
Raises: | |
ValidationError: If the model path is invalid | |
""" | |
if not model_path or not isinstance(model_path, str): | |
raise ValidationError("Model path cannot be empty") | |
# Trim whitespace | |
model_path = model_path.strip() | |
# Check for dangerous characters (excluding single forward slash for HuggingFace format) | |
dangerous_chars = ['..', '\\', '|', ';', '&', '$', '`', '<', '>'] | |
if any(char in model_path for char in dangerous_chars): | |
raise ValidationError("Model path contains invalid characters") | |
# Check for multiple slashes or leading/trailing slashes | |
if '//' in model_path or model_path.startswith('/') or model_path.endswith('/'): | |
raise ValidationError("Model path contains invalid characters") | |
# Check if it looks like a HuggingFace model path (user/model format) | |
if not Validators.HUGGINGFACE_MODEL_PATTERN.match(model_path): | |
raise ValidationError("Model path must follow the format 'organization/model-name'") | |
# Check length limits | |
if len(model_path) > 200: | |
raise ValidationError("Model path is too long") | |
return True | |
def validate_filename(filename: str) -> bool: | |
""" | |
Validate that a filename is safe for upload. | |
Args: | |
filename: The filename to validate | |
Returns: | |
bool: True if valid, False otherwise | |
Raises: | |
ValidationError: If the filename is invalid | |
""" | |
if not filename or not isinstance(filename, str): | |
raise ValidationError("Filename cannot be empty") | |
# Check for dangerous characters and patterns | |
dangerous_patterns = ['..', '/', '\\', '|', ';', '&', '$', '`', '<', '>'] | |
if any(pattern in filename for pattern in dangerous_patterns): | |
raise ValidationError("Filename contains invalid characters") | |
# Check if filename starts with a dot (hidden files) | |
if filename.startswith('.'): | |
raise ValidationError("Hidden files are not allowed") | |
# Check length | |
if len(filename) > 255: | |
raise ValidationError("Filename is too long") | |
return True | |
def validate_file_extension(filename: str, allowed_extensions: set) -> bool: | |
""" | |
Validate that a file has an allowed extension. | |
Args: | |
filename: The filename to check | |
allowed_extensions: Set of allowed extensions (e.g., {'.txt', '.py'}) | |
Returns: | |
bool: True if valid, False otherwise | |
Raises: | |
ValidationError: If the extension is not allowed | |
""" | |
if not filename: | |
raise ValidationError("Filename cannot be empty") | |
_, ext = os.path.splitext(filename.lower()) | |
if ext not in allowed_extensions: | |
allowed_list = ', '.join(sorted(allowed_extensions)) | |
raise ValidationError(f"File type '{ext}' not allowed. Allowed types: {allowed_list}") | |
return True | |
def validate_file_size(file_size: int, max_size: int) -> bool: | |
""" | |
Validate that a file size is within limits. | |
Args: | |
file_size: Size of the file in bytes | |
max_size: Maximum allowed size in bytes | |
Returns: | |
bool: True if valid, False otherwise | |
Raises: | |
ValidationError: If the file is too large | |
""" | |
if file_size > max_size: | |
max_mb = max_size / (1024 * 1024) | |
current_mb = file_size / (1024 * 1024) | |
raise ValidationError(f"File too large: {current_mb:.1f}MB (max: {max_mb:.1f}MB)") | |
return True | |
def validate_text_input(text: str, max_length: int = 1000000) -> bool: | |
""" | |
Validate text input for processing. | |
Args: | |
text: The text to validate | |
max_length: Maximum allowed length | |
Returns: | |
bool: True if valid, False otherwise | |
Raises: | |
ValidationError: If the text is invalid | |
""" | |
if not isinstance(text, str): | |
raise ValidationError("Text input must be a string") | |
if len(text) > max_length: | |
raise ValidationError(f"Text too long: {len(text)} characters (max: {max_length})") | |
return True | |
def sanitize_model_path(model_path: str) -> str: | |
""" | |
Sanitize a model path by removing potentially dangerous elements. | |
Args: | |
model_path: The model path to sanitize | |
Returns: | |
str: Sanitized model path | |
""" | |
if not model_path: | |
return "" | |
# Remove whitespace | |
sanitized = model_path.strip() | |
# Remove any path traversal attempts | |
sanitized = sanitized.replace('..', '') | |
sanitized = sanitized.replace('/', '') | |
sanitized = sanitized.replace('\\', '') | |
return sanitized | |
def is_safe_path(path: str, base_path: str) -> bool: | |
""" | |
Check if a path is safe and within the expected base directory. | |
Args: | |
path: The path to check | |
base_path: The base directory that the path should be within | |
Returns: | |
bool: True if the path is safe, False otherwise | |
""" | |
try: | |
# Resolve both paths to absolute paths | |
abs_path = os.path.abspath(path) | |
abs_base = os.path.abspath(base_path) | |
# Check if the path is within the base directory | |
return abs_path.startswith(abs_base) | |
except (OSError, ValueError): | |
return False | |
# Global instance | |
validators = Validators() |