tokenizers / tests /conftest.py
bartar's picture
Upload 26 files
d66ab65 verified
"""
pytest configuration file
"""
import pytest
import os
import tempfile
from unittest.mock import Mock, patch
from flask import Flask
# Add the parent directory to Python path so we can import the app
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app import create_app
from config import TestingConfig
@pytest.fixture
def app():
"""Create a test Flask application."""
app = create_app(TestingConfig())
# Create a temporary directory for file uploads during testing
with tempfile.TemporaryDirectory() as temp_dir:
app.config['UPLOAD_FOLDER'] = temp_dir
app.config['TESTING'] = True
yield app
@pytest.fixture
def client(app):
"""Create a test client."""
return app.test_client()
@pytest.fixture
def mock_tokenizer():
"""Create a mock tokenizer for testing."""
tokenizer = Mock()
tokenizer.tokenize.return_value = ['Hello', 'world', '!']
tokenizer.vocab_size = 50257
tokenizer.model_max_length = 1024
tokenizer.__class__.__name__ = 'MockTokenizer'
# Mock special tokens
tokenizer.pad_token = '<pad>'
tokenizer.eos_token = '</s>'
tokenizer.unk_token = '<unk>'
tokenizer.bos_token = '<s>'
return tokenizer
@pytest.fixture
def sample_text():
"""Sample text for testing."""
return "Hello world! This is a test."
@pytest.fixture
def sample_tokens():
"""Sample tokens for testing."""
return ['Hello', ' world', '!', ' This', ' is', ' a', ' test', '.']
@pytest.fixture
def temp_file():
"""Create a temporary file for testing."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write("Hello world! This is a test file.")
temp_path = f.name
yield temp_path
# Cleanup
if os.path.exists(temp_path):
os.unlink(temp_path)