Spaces:
Running
Running
""" | |
Unit tests for Validators utility | |
""" | |
import pytest | |
from app.utils.validators import Validators, ValidationError | |
class TestValidators: | |
"""Test cases for Validators utility.""" | |
def setup_method(self): | |
"""Set up test fixtures.""" | |
self.validators = Validators() | |
def test_validate_filename_valid(self): | |
"""Test filename validation with valid filenames.""" | |
# Valid filenames should not raise | |
self.validators.validate_filename('test.txt') | |
self.validators.validate_filename('document.md') | |
self.validators.validate_filename('script_file.py') | |
self.validators.validate_filename('My Document.txt') | |
self.validators.validate_filename('file-name.json') | |
self.validators.validate_filename('data123.csv') | |
def test_validate_filename_invalid(self): | |
"""Test filename validation with invalid filenames.""" | |
# Empty or None filename | |
with pytest.raises(ValidationError): | |
self.validators.validate_filename('') | |
with pytest.raises(ValidationError): | |
self.validators.validate_filename(None) | |
# Dangerous characters | |
with pytest.raises(ValidationError): | |
self.validators.validate_filename('../../../etc/passwd') | |
with pytest.raises(ValidationError): | |
self.validators.validate_filename('file\\with\\backslashes.txt') | |
# Null bytes | |
with pytest.raises(ValidationError): | |
self.validators.validate_filename('file\x00.txt') | |
# Control characters | |
with pytest.raises(ValidationError): | |
self.validators.validate_filename('file\x01\x02.txt') | |
# Reserved names on Windows | |
with pytest.raises(ValidationError): | |
self.validators.validate_filename('CON.txt') | |
with pytest.raises(ValidationError): | |
self.validators.validate_filename('PRN.txt') | |
with pytest.raises(ValidationError): | |
self.validators.validate_filename('AUX.txt') | |
def test_validate_file_extension_valid(self): | |
"""Test file extension validation with valid extensions.""" | |
allowed_extensions = {'.txt', '.md', '.py', '.js', '.json'} | |
# Valid extensions should not raise | |
self.validators.validate_file_extension('test.txt', allowed_extensions) | |
self.validators.validate_file_extension('document.md', allowed_extensions) | |
self.validators.validate_file_extension('script.py', allowed_extensions) | |
self.validators.validate_file_extension('data.json', allowed_extensions) | |
# Case insensitive | |
self.validators.validate_file_extension('FILE.TXT', allowed_extensions) | |
self.validators.validate_file_extension('Document.MD', allowed_extensions) | |
def test_validate_file_extension_invalid(self): | |
"""Test file extension validation with invalid extensions.""" | |
allowed_extensions = {'.txt', '.md', '.py'} | |
# Invalid extensions should raise | |
with pytest.raises(ValidationError): | |
self.validators.validate_file_extension('virus.exe', allowed_extensions) | |
with pytest.raises(ValidationError): | |
self.validators.validate_file_extension('archive.zip', allowed_extensions) | |
with pytest.raises(ValidationError): | |
self.validators.validate_file_extension('image.jpg', allowed_extensions) | |
# No extension | |
with pytest.raises(ValidationError): | |
self.validators.validate_file_extension('filename', allowed_extensions) | |
# Empty filename | |
with pytest.raises(ValidationError): | |
self.validators.validate_file_extension('', allowed_extensions) | |
def test_validate_model_path_valid(self): | |
"""Test model path validation with valid paths.""" | |
# Valid HuggingFace model paths | |
valid_paths = [ | |
'microsoft/DialoGPT-medium', | |
'google/bert-base-uncased', | |
'meta-llama/Llama-2-7b-hf', | |
'mistralai/Mistral-7B-Instruct-v0.1', | |
'Qwen/Qwen2.5-72B-Instruct', | |
'THUDM/chatglm-6b', | |
'deepseek-ai/deepseek-coder-6.7b-base', | |
'unsloth/llama-2-7b-bnb-4bit', | |
'google-bert/bert-base-uncased', | |
'bartar/SPLM-2' # User's specific case | |
] | |
for path in valid_paths: | |
self.validators.validate_model_path(path) # Should not raise | |
def test_validate_model_path_invalid_format(self): | |
"""Test model path validation with invalid formats.""" | |
# Invalid formats should raise | |
invalid_paths = [ | |
'', # Empty | |
'invalid-path', # No slash | |
'user/', # Empty model name | |
'/model-name', # Empty user | |
'user//model', # Double slash | |
'user/model/extra', # Too many parts | |
'user name/model', # Space in user | |
'user/model name', # Space in model (actually this might be valid) | |
'user@domain/model', # Invalid characters | |
'../malicious/path', # Path traversal | |
'user\\model', # Backslash | |
] | |
for path in invalid_paths: | |
with pytest.raises(ValidationError): | |
self.validators.validate_model_path(path) | |
def test_validate_model_path_untrusted_prefix(self): | |
"""Test model path validation with untrusted prefixes.""" | |
# Paths with untrusted prefixes should raise | |
untrusted_paths = [ | |
'random-user/some-model', | |
'untrusted/malicious-model', | |
'hacker/backdoor-model', | |
'suspicious/model' | |
] | |
for path in untrusted_paths: | |
with pytest.raises(ValidationError): | |
self.validators.validate_model_path(path) | |
def test_validate_model_path_edge_cases(self): | |
"""Test model path validation edge cases.""" | |
# None input | |
with pytest.raises(ValidationError): | |
self.validators.validate_model_path(None) | |
# Very long path | |
long_path = 'microsoft/' + 'a' * 1000 | |
with pytest.raises(ValidationError): | |
self.validators.validate_model_path(long_path) | |
# Special characters in allowed prefix | |
self.validators.validate_model_path('microsoft/model-with-dashes') | |
self.validators.validate_model_path('microsoft/model_with_underscores') | |
self.validators.validate_model_path('microsoft/model.with.dots') | |
def test_validate_text_input_valid(self): | |
"""Test text input validation with valid inputs.""" | |
# Valid text inputs should not raise | |
self.validators.validate_text_input('Hello world!') | |
self.validators.validate_text_input('A' * 1000) # Long but reasonable text | |
self.validators.validate_text_input('Text with\nnewlines\nand\ttabs') | |
self.validators.validate_text_input('Unicode: 你好世界 🌍') | |
self.validators.validate_text_input('') # Empty text might be valid | |
def test_validate_text_input_invalid(self): | |
"""Test text input validation with invalid inputs.""" | |
# None input | |
with pytest.raises(ValidationError): | |
self.validators.validate_text_input(None) | |
# Extremely long text (if there's a limit) | |
very_long_text = 'A' * (10 * 1024 * 1024) # 10MB of text | |
with pytest.raises(ValidationError): | |
self.validators.validate_text_input(very_long_text) | |
def test_validate_text_input_malicious_content(self): | |
"""Test text input validation with potentially malicious content.""" | |
# Null bytes | |
with pytest.raises(ValidationError): | |
self.validators.validate_text_input('text\x00with\x00nulls') | |
# Control characters (some might be allowed like \n, \t) | |
try: | |
self.validators.validate_text_input('text\x01with\x02controls') | |
except ValidationError: | |
pass # This might be expected | |
def test_validation_error_messages(self): | |
"""Test that ValidationError contains meaningful messages.""" | |
# Test filename validation error message | |
try: | |
self.validators.validate_filename('../../../etc/passwd') | |
assert False, "Should have raised ValidationError" | |
except ValidationError as e: | |
assert 'filename' in str(e).lower() or 'path' in str(e).lower() | |
# Test file extension error message | |
try: | |
self.validators.validate_file_extension('virus.exe', {'.txt'}) | |
assert False, "Should have raised ValidationError" | |
except ValidationError as e: | |
assert 'extension' in str(e).lower() or 'allowed' in str(e).lower() | |
# Test model path error message | |
try: | |
self.validators.validate_model_path('invalid-path') | |
assert False, "Should have raised ValidationError" | |
except ValidationError as e: | |
assert 'model' in str(e).lower() or 'path' in str(e).lower() | |
def test_allowed_model_prefixes_coverage(self): | |
"""Test that all common model prefixes are covered.""" | |
# This test ensures we have good coverage of trusted model prefixes | |
common_prefixes = [ | |
'microsoft/', | |
'google/', | |
'meta-llama/', | |
'mistralai/', | |
'openai-community/', | |
'Qwen/', | |
'THUDM/', | |
'deepseek-ai/', | |
'unsloth/', | |
'google-bert/' | |
] | |
for prefix in common_prefixes: | |
# Should be able to validate models with these prefixes | |
test_path = prefix + 'test-model' | |
try: | |
self.validators.validate_model_path(test_path) | |
except ValidationError: | |
pytest.fail(f"Trusted prefix {prefix} should be allowed") | |
def test_case_sensitivity(self): | |
"""Test case sensitivity in various validations.""" | |
# File extensions should be case insensitive | |
allowed_extensions = {'.txt', '.md'} | |
self.validators.validate_file_extension('FILE.TXT', allowed_extensions) | |
self.validators.validate_file_extension('Document.MD', allowed_extensions) | |
# Model path prefixes should be case sensitive (HuggingFace convention) | |
self.validators.validate_model_path('Microsoft/model') # Capital M | |
# But random capitalization in untrusted prefixes should still fail | |
with pytest.raises(ValidationError): | |
self.validators.validate_model_path('RANDOM/model') |