Spaces:
Sleeping
Sleeping
Michael Hu
commited on
Commit
·
237cb26
1
Parent(s):
1e2d288
feat: replace legacy TTS providers with Chatterbox as the single, default provider
Browse files- DEVELOPER_GUIDE.md +1 -1
- README.md +4 -7
- app.py +1 -1
- config.example.json +1 -1
- src/application/dtos/processing_request_dto.py +1 -1
- src/application/services/configuration_service.py +2 -2
- src/domain/interfaces/audio_processing.py +1 -1
- src/domain/interfaces/speech_synthesis.py +1 -1
- src/infrastructure/config/app_config.py +1 -1
- src/infrastructure/tts/__init__.py +1 -21
- src/infrastructure/tts/dummy_provider.py +0 -139
- src/infrastructure/tts/provider_factory.py +8 -20
- tests/integration/test_audio_processing_pipeline.py +7 -7
- tests/integration/test_file_handling.py +15 -15
- tests/integration/test_performance_and_errors.py +6 -6
- tests/integration/test_provider_integration.py +9 -9
- tests/unit/application/dtos/test_processing_request_dto.py +23 -23
- tests/unit/application/services/test_audio_processing_service.py +5 -5
- tests/unit/application/services/test_configuration_service.py +3 -3
- tests/unit/domain/interfaces/test_speech_synthesis.py +9 -9
- tests/unit/infrastructure/factories/test_tts_provider_factory.py +13 -26
DEVELOPER_GUIDE.md
CHANGED
|
@@ -187,7 +187,7 @@ class AppConfig:
|
|
| 187 |
# ... existing configuration ...
|
| 188 |
|
| 189 |
# TTS Provider Configuration
|
| 190 |
-
TTS_PROVIDERS = os.getenv('TTS_PROVIDERS', '
|
| 191 |
|
| 192 |
# Provider-specific settings
|
| 193 |
MY_TTS_API_KEY = os.getenv('MY_TTS_API_KEY')
|
|
|
|
| 187 |
# ... existing configuration ...
|
| 188 |
|
| 189 |
# TTS Provider Configuration
|
| 190 |
+
TTS_PROVIDERS = os.getenv('TTS_PROVIDERS', 'chatterbox,my_tts').split(',')
|
| 191 |
|
| 192 |
# Provider-specific settings
|
| 193 |
MY_TTS_API_KEY = os.getenv('MY_TTS_API_KEY')
|
README.md
CHANGED
|
@@ -95,10 +95,7 @@ graph TD
|
|
| 95 |
- **NLLB** - Meta's No Language Left Behind model
|
| 96 |
|
| 97 |
### Text-to-Speech (TTS)
|
| 98 |
-
- **
|
| 99 |
-
- **Dia** - Fast neural TTS
|
| 100 |
-
- **CosyVoice2** - Advanced voice synthesis
|
| 101 |
-
- **Dummy** - Test provider for development
|
| 102 |
|
| 103 |
## 📖 Usage
|
| 104 |
|
|
@@ -135,7 +132,7 @@ request = ProcessingRequestDto(
|
|
| 135 |
audio=audio_upload,
|
| 136 |
asr_model="whisper-small",
|
| 137 |
target_language="zh",
|
| 138 |
-
voice="
|
| 139 |
speed=1.0
|
| 140 |
)
|
| 141 |
|
|
@@ -179,7 +176,7 @@ Create a `.env` file or set environment variables:
|
|
| 179 |
|
| 180 |
```bash
|
| 181 |
# Provider preferences (comma-separated, in order of preference)
|
| 182 |
-
TTS_PROVIDERS=
|
| 183 |
STT_PROVIDERS=whisper,parakeet
|
| 184 |
TRANSLATION_PROVIDERS=nllb
|
| 185 |
|
|
@@ -201,7 +198,7 @@ The system automatically detects available providers and falls back gracefully:
|
|
| 201 |
from src.infrastructure.config.dependency_container import DependencyContainer
|
| 202 |
|
| 203 |
container = DependencyContainer()
|
| 204 |
-
container.configure_tts_providers(['
|
| 205 |
```
|
| 206 |
|
| 207 |
## 🏗️ Architecture Benefits
|
|
|
|
| 95 |
- **NLLB** - Meta's No Language Left Behind model
|
| 96 |
|
| 97 |
### Text-to-Speech (TTS)
|
| 98 |
+
- **Chatterbox** - High-quality neural TTS provider
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
## 📖 Usage
|
| 101 |
|
|
|
|
| 132 |
audio=audio_upload,
|
| 133 |
asr_model="whisper-small",
|
| 134 |
target_language="zh",
|
| 135 |
+
voice="chatterbox",
|
| 136 |
speed=1.0
|
| 137 |
)
|
| 138 |
|
|
|
|
| 176 |
|
| 177 |
```bash
|
| 178 |
# Provider preferences (comma-separated, in order of preference)
|
| 179 |
+
TTS_PROVIDERS=chatterbox
|
| 180 |
STT_PROVIDERS=whisper,parakeet
|
| 181 |
TRANSLATION_PROVIDERS=nllb
|
| 182 |
|
|
|
|
| 198 |
from src.infrastructure.config.dependency_container import DependencyContainer
|
| 199 |
|
| 200 |
container = DependencyContainer()
|
| 201 |
+
container.configure_tts_providers(['chatterbox']) # Preferred order
|
| 202 |
```
|
| 203 |
|
| 204 |
## 🏗️ Architecture Benefits
|
app.py
CHANGED
|
@@ -110,7 +110,7 @@ def get_supported_configurations() -> dict:
|
|
| 110 |
# Return fallback configurations
|
| 111 |
return {
|
| 112 |
'asr_models': ['whisper-small', 'parakeet'],
|
| 113 |
-
'voices': ['
|
| 114 |
'languages': ['en', 'zh', 'es', 'fr', 'de'],
|
| 115 |
'audio_formats': ['wav', 'mp3'],
|
| 116 |
'max_file_size_mb': 100,
|
|
|
|
| 110 |
# Return fallback configurations
|
| 111 |
return {
|
| 112 |
'asr_models': ['whisper-small', 'parakeet'],
|
| 113 |
+
'voices': ['chatterbox'],
|
| 114 |
'languages': ['en', 'zh', 'es', 'fr', 'de'],
|
| 115 |
'audio_formats': ['wav', 'mp3'],
|
| 116 |
'max_file_size_mb': 100,
|
config.example.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"tts": {
|
| 3 |
-
"preferred_providers": ["
|
| 4 |
"default_voice": "default",
|
| 5 |
"default_speed": 1.0,
|
| 6 |
"default_language": "en",
|
|
|
|
| 1 |
{
|
| 2 |
"tts": {
|
| 3 |
+
"preferred_providers": ["chatterbox"],
|
| 4 |
"default_voice": "default",
|
| 5 |
"default_speed": 1.0,
|
| 6 |
"default_language": "en",
|
src/application/dtos/processing_request_dto.py
CHANGED
|
@@ -57,7 +57,7 @@ class ProcessingRequestDto:
|
|
| 57 |
raise ValueError("Voice cannot be empty")
|
| 58 |
|
| 59 |
# Validate voice options
|
| 60 |
-
supported_voices = ['
|
| 61 |
if self.voice not in supported_voices:
|
| 62 |
raise ValueError(f"Unsupported voice: {self.voice}. Supported: {supported_voices}")
|
| 63 |
|
|
|
|
| 57 |
raise ValueError("Voice cannot be empty")
|
| 58 |
|
| 59 |
# Validate voice options
|
| 60 |
+
supported_voices = ['chatterbox']
|
| 61 |
if self.voice not in supported_voices:
|
| 62 |
raise ValueError(f"Unsupported voice: {self.voice}. Supported: {supported_voices}")
|
| 63 |
|
src/application/services/configuration_service.py
CHANGED
|
@@ -294,7 +294,7 @@ class ConfigurationApplicationService:
|
|
| 294 |
Raises:
|
| 295 |
ConfigurationException: If validation fails
|
| 296 |
"""
|
| 297 |
-
valid_providers = ['chatterbox'
|
| 298 |
valid_languages = ['en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh']
|
| 299 |
|
| 300 |
for key, value in updates.items():
|
|
@@ -515,7 +515,7 @@ class ConfigurationApplicationService:
|
|
| 515 |
|
| 516 |
# Check TTS providers
|
| 517 |
tts_factory = self._container.resolve(type(self._container._get_tts_factory()))
|
| 518 |
-
for provider in ['chatterbox'
|
| 519 |
try:
|
| 520 |
tts_factory.create_provider(provider)
|
| 521 |
availability['tts'][provider] = True
|
|
|
|
| 294 |
Raises:
|
| 295 |
ConfigurationException: If validation fails
|
| 296 |
"""
|
| 297 |
+
valid_providers = ['chatterbox']
|
| 298 |
valid_languages = ['en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh']
|
| 299 |
|
| 300 |
for key, value in updates.items():
|
|
|
|
| 515 |
|
| 516 |
# Check TTS providers
|
| 517 |
tts_factory = self._container.resolve(type(self._container._get_tts_factory()))
|
| 518 |
+
for provider in ['chatterbox']:
|
| 519 |
try:
|
| 520 |
tts_factory.create_provider(provider)
|
| 521 |
availability['tts'][provider] = True
|
src/domain/interfaces/audio_processing.py
CHANGED
|
@@ -111,7 +111,7 @@ class IAudioProcessingService(ABC):
|
|
| 111 |
|
| 112 |
# Configure voice settings
|
| 113 |
voice_settings = VoiceSettings(
|
| 114 |
-
voice_id="
|
| 115 |
speed=1.0,
|
| 116 |
language="zh"
|
| 117 |
)
|
|
|
|
| 111 |
|
| 112 |
# Configure voice settings
|
| 113 |
voice_settings = VoiceSettings(
|
| 114 |
+
voice_id="chatterbox",
|
| 115 |
speed=1.0,
|
| 116 |
language="zh"
|
| 117 |
)
|
src/domain/interfaces/speech_synthesis.py
CHANGED
|
@@ -98,7 +98,7 @@ class ISpeechSynthesisService(ABC):
|
|
| 98 |
|
| 99 |
# Configure voice settings
|
| 100 |
voice_settings = VoiceSettings(
|
| 101 |
-
voice_id="
|
| 102 |
speed=1.0,
|
| 103 |
pitch=0.0,
|
| 104 |
volume=1.0
|
|
|
|
| 98 |
|
| 99 |
# Configure voice settings
|
| 100 |
voice_settings = VoiceSettings(
|
| 101 |
+
voice_id="chatterbox",
|
| 102 |
speed=1.0,
|
| 103 |
pitch=0.0,
|
| 104 |
volume=1.0
|
src/infrastructure/config/app_config.py
CHANGED
|
@@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
|
|
| 12 |
@dataclass
|
| 13 |
class TTSConfig:
|
| 14 |
"""Configuration for TTS providers."""
|
| 15 |
-
preferred_providers: List[str] = field(default_factory=lambda: ['chatterbox'
|
| 16 |
default_voice: str = 'default'
|
| 17 |
default_speed: float = 1.0
|
| 18 |
default_language: str = 'en'
|
|
|
|
| 12 |
@dataclass
|
| 13 |
class TTSConfig:
|
| 14 |
"""Configuration for TTS providers."""
|
| 15 |
+
preferred_providers: List[str] = field(default_factory=lambda: ['chatterbox'])
|
| 16 |
default_voice: str = 'default'
|
| 17 |
default_speed: float = 1.0
|
| 18 |
default_language: str = 'en'
|
src/infrastructure/tts/__init__.py
CHANGED
|
@@ -1,24 +1,8 @@
|
|
| 1 |
"""TTS provider implementations."""
|
| 2 |
|
| 3 |
from .provider_factory import TTSProviderFactory
|
| 4 |
-
from .dummy_provider import DummyTTSProvider
|
| 5 |
-
|
| 6 |
-
# Try to import optional providers
|
| 7 |
-
try:
|
| 8 |
-
from .kokoro_provider import KokoroTTSProvider
|
| 9 |
-
except ImportError:
|
| 10 |
-
KokoroTTSProvider = None
|
| 11 |
-
|
| 12 |
-
try:
|
| 13 |
-
from .dia_provider import DiaTTSProvider
|
| 14 |
-
except ImportError:
|
| 15 |
-
DiaTTSProvider = None
|
| 16 |
-
|
| 17 |
-
try:
|
| 18 |
-
from .cosyvoice2_provider import CosyVoice2TTSProvider
|
| 19 |
-
except ImportError:
|
| 20 |
-
CosyVoice2TTSProvider = None
|
| 21 |
|
|
|
|
| 22 |
try:
|
| 23 |
from .chatterbox_provider import ChatterboxTTSProvider
|
| 24 |
except ImportError:
|
|
@@ -26,9 +10,5 @@ except ImportError:
|
|
| 26 |
|
| 27 |
__all__ = [
|
| 28 |
'TTSProviderFactory',
|
| 29 |
-
'DummyTTSProvider',
|
| 30 |
-
'KokoroTTSProvider',
|
| 31 |
-
'DiaTTSProvider',
|
| 32 |
-
'CosyVoice2TTSProvider',
|
| 33 |
'ChatterboxTTSProvider'
|
| 34 |
]
|
|
|
|
| 1 |
"""TTS provider implementations."""
|
| 2 |
|
| 3 |
from .provider_factory import TTSProviderFactory
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
# Try to import chatterbox provider
|
| 6 |
try:
|
| 7 |
from .chatterbox_provider import ChatterboxTTSProvider
|
| 8 |
except ImportError:
|
|
|
|
| 10 |
|
| 11 |
__all__ = [
|
| 12 |
'TTSProviderFactory',
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
'ChatterboxTTSProvider'
|
| 14 |
]
|
src/infrastructure/tts/dummy_provider.py
DELETED
|
@@ -1,139 +0,0 @@
|
|
| 1 |
-
"""Dummy TTS provider implementation for testing and fallback."""
|
| 2 |
-
|
| 3 |
-
import logging
|
| 4 |
-
import numpy as np
|
| 5 |
-
import soundfile as sf
|
| 6 |
-
import io
|
| 7 |
-
from typing import Iterator, TYPE_CHECKING
|
| 8 |
-
|
| 9 |
-
if TYPE_CHECKING:
|
| 10 |
-
from ...domain.models.speech_synthesis_request import SpeechSynthesisRequest
|
| 11 |
-
|
| 12 |
-
from ..base.tts_provider_base import TTSProviderBase
|
| 13 |
-
from ...domain.exceptions import SpeechSynthesisException
|
| 14 |
-
|
| 15 |
-
logger = logging.getLogger(__name__)
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
class DummyTTSProvider(TTSProviderBase):
|
| 19 |
-
"""Dummy TTS provider that generates sine wave audio for testing."""
|
| 20 |
-
|
| 21 |
-
def __init__(self):
|
| 22 |
-
"""Initialize the Dummy TTS provider."""
|
| 23 |
-
super().__init__(
|
| 24 |
-
provider_name="Dummy",
|
| 25 |
-
supported_languages=['en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh']
|
| 26 |
-
)
|
| 27 |
-
|
| 28 |
-
def is_available(self) -> bool:
|
| 29 |
-
"""Dummy TTS is always available."""
|
| 30 |
-
return True
|
| 31 |
-
|
| 32 |
-
def get_available_voices(self) -> list[str]:
|
| 33 |
-
"""Get available voices for Dummy TTS."""
|
| 34 |
-
return ['default', 'male', 'female', 'robot']
|
| 35 |
-
|
| 36 |
-
def _generate_audio(self, request: 'SpeechSynthesisRequest') -> tuple[bytes, int]:
|
| 37 |
-
"""Generate dummy sine wave audio."""
|
| 38 |
-
try:
|
| 39 |
-
# Extract parameters from request
|
| 40 |
-
text = request.text_content.text
|
| 41 |
-
speed = request.voice_settings.speed
|
| 42 |
-
|
| 43 |
-
# Generate a simple sine wave based on text length and speed
|
| 44 |
-
sample_rate = 24000
|
| 45 |
-
# Rough approximation of speech duration adjusted by speed
|
| 46 |
-
duration = min(len(text) / (20 * speed), 10)
|
| 47 |
-
|
| 48 |
-
# Create time array
|
| 49 |
-
t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
|
| 50 |
-
|
| 51 |
-
# Generate sine wave (440 Hz base frequency)
|
| 52 |
-
frequency = 440
|
| 53 |
-
audio = 0.5 * np.sin(2 * np.pi * frequency * t)
|
| 54 |
-
|
| 55 |
-
# Add some variation based on voice setting
|
| 56 |
-
voice = request.voice_settings.voice_id
|
| 57 |
-
if voice == 'male':
|
| 58 |
-
# Lower frequency for male voice
|
| 59 |
-
audio = 0.5 * np.sin(2 * np.pi * 220 * t)
|
| 60 |
-
elif voice == 'female':
|
| 61 |
-
# Higher frequency for female voice
|
| 62 |
-
audio = 0.5 * np.sin(2 * np.pi * 660 * t)
|
| 63 |
-
elif voice == 'robot':
|
| 64 |
-
# Square wave for robot voice
|
| 65 |
-
audio = 0.5 * np.sign(np.sin(2 * np.pi * 440 * t))
|
| 66 |
-
|
| 67 |
-
# Convert to bytes
|
| 68 |
-
audio_bytes = self._numpy_to_bytes(audio, sample_rate)
|
| 69 |
-
|
| 70 |
-
logger.info(f"Generated dummy audio: duration={duration:.2f}s, voice={voice}")
|
| 71 |
-
return audio_bytes, sample_rate
|
| 72 |
-
|
| 73 |
-
except Exception as e:
|
| 74 |
-
self._handle_provider_error(e, "dummy audio generation")
|
| 75 |
-
|
| 76 |
-
def _generate_audio_stream(self, request: 'SpeechSynthesisRequest') -> Iterator[tuple[bytes, int, bool]]:
|
| 77 |
-
"""Generate dummy sine wave audio stream."""
|
| 78 |
-
try:
|
| 79 |
-
# Extract parameters from request
|
| 80 |
-
text = request.text_content.text
|
| 81 |
-
speed = request.voice_settings.speed
|
| 82 |
-
|
| 83 |
-
# Generate audio in chunks
|
| 84 |
-
sample_rate = 24000
|
| 85 |
-
chunk_duration = 1.0 # 1 second chunks
|
| 86 |
-
total_duration = min(len(text) / (20 * speed), 10)
|
| 87 |
-
|
| 88 |
-
chunks_count = int(np.ceil(total_duration / chunk_duration))
|
| 89 |
-
|
| 90 |
-
for chunk_idx in range(chunks_count):
|
| 91 |
-
start_time = chunk_idx * chunk_duration
|
| 92 |
-
end_time = min((chunk_idx + 1) * chunk_duration, total_duration)
|
| 93 |
-
actual_duration = end_time - start_time
|
| 94 |
-
|
| 95 |
-
if actual_duration <= 0:
|
| 96 |
-
break
|
| 97 |
-
|
| 98 |
-
# Create time array for this chunk
|
| 99 |
-
t = np.linspace(0, actual_duration, int(sample_rate * actual_duration), endpoint=False)
|
| 100 |
-
|
| 101 |
-
# Generate sine wave
|
| 102 |
-
frequency = 440
|
| 103 |
-
audio = 0.5 * np.sin(2 * np.pi * frequency * t)
|
| 104 |
-
|
| 105 |
-
# Apply voice variations
|
| 106 |
-
voice = request.voice_settings.voice_id
|
| 107 |
-
if voice == 'male':
|
| 108 |
-
audio = 0.5 * np.sin(2 * np.pi * 220 * t)
|
| 109 |
-
elif voice == 'female':
|
| 110 |
-
audio = 0.5 * np.sin(2 * np.pi * 660 * t)
|
| 111 |
-
elif voice == 'robot':
|
| 112 |
-
audio = 0.5 * np.sign(np.sin(2 * np.pi * 440 * t))
|
| 113 |
-
|
| 114 |
-
# Convert to bytes
|
| 115 |
-
audio_bytes = self._numpy_to_bytes(audio, sample_rate)
|
| 116 |
-
|
| 117 |
-
# Check if this is the final chunk
|
| 118 |
-
is_final = (chunk_idx == chunks_count - 1)
|
| 119 |
-
|
| 120 |
-
yield audio_bytes, sample_rate, is_final
|
| 121 |
-
|
| 122 |
-
except Exception as e:
|
| 123 |
-
self._handle_provider_error(e, "dummy streaming audio generation")
|
| 124 |
-
|
| 125 |
-
def _numpy_to_bytes(self, audio_array: np.ndarray, sample_rate: int) -> bytes:
|
| 126 |
-
"""Convert numpy audio array to bytes."""
|
| 127 |
-
try:
|
| 128 |
-
# Create an in-memory buffer
|
| 129 |
-
buffer = io.BytesIO()
|
| 130 |
-
|
| 131 |
-
# Write audio data to buffer as WAV
|
| 132 |
-
sf.write(buffer, audio_array, sample_rate, format='WAV')
|
| 133 |
-
|
| 134 |
-
# Get bytes from buffer
|
| 135 |
-
buffer.seek(0)
|
| 136 |
-
return buffer.read()
|
| 137 |
-
|
| 138 |
-
except Exception as e:
|
| 139 |
-
raise SpeechSynthesisException(f"Failed to convert audio to bytes: {str(e)}") from e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/infrastructure/tts/provider_factory.py
CHANGED
|
@@ -18,20 +18,17 @@ class TTSProviderFactory:
|
|
| 18 |
self._register_default_providers()
|
| 19 |
|
| 20 |
def _register_default_providers(self):
|
| 21 |
-
"""Register
|
| 22 |
# Import providers dynamically to avoid import errors if dependencies are missing
|
| 23 |
|
| 24 |
-
# Always register dummy provider as fallback
|
| 25 |
-
from .dummy_provider import DummyTTSProvider
|
| 26 |
-
self._providers['dummy'] = DummyTTSProvider
|
| 27 |
-
|
| 28 |
# Register only Chatterbox provider
|
| 29 |
try:
|
| 30 |
from .chatterbox_provider import ChatterboxTTSProvider
|
| 31 |
self._providers['chatterbox'] = ChatterboxTTSProvider
|
| 32 |
logger.info("Registered Chatterbox TTS provider")
|
| 33 |
except ImportError as e:
|
| 34 |
-
logger.
|
|
|
|
| 35 |
|
| 36 |
def get_available_providers(self) -> List[str]:
|
| 37 |
"""Get list of available TTS providers."""
|
|
@@ -44,10 +41,7 @@ class TTSProviderFactory:
|
|
| 44 |
# Create instance if not cached
|
| 45 |
if name not in self._provider_instances:
|
| 46 |
logger.info(f"Creating instance for {name} provider")
|
| 47 |
-
|
| 48 |
-
self._provider_instances[name] = provider_class()
|
| 49 |
-
else:
|
| 50 |
-
self._provider_instances[name] = provider_class()
|
| 51 |
|
| 52 |
# Check if provider is available
|
| 53 |
logger.info(f"Checking availability for {name}")
|
|
@@ -94,11 +88,8 @@ class TTSProviderFactory:
|
|
| 94 |
provider_class = self._providers[provider_name]
|
| 95 |
|
| 96 |
# Create instance with appropriate parameters
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
provider = provider_class(lang_code=lang_code)
|
| 100 |
-
else:
|
| 101 |
-
provider = provider_class(**kwargs)
|
| 102 |
|
| 103 |
# Verify the provider is available
|
| 104 |
if not provider.is_available():
|
|
@@ -126,7 +117,7 @@ class TTSProviderFactory:
|
|
| 126 |
SpeechSynthesisException: If no providers are available
|
| 127 |
"""
|
| 128 |
if preferred_providers is None:
|
| 129 |
-
preferred_providers = ['chatterbox'
|
| 130 |
|
| 131 |
logger.info(f"🔄 Getting TTS provider with fallback, preferred order: {preferred_providers}")
|
| 132 |
available_providers = self.get_available_providers()
|
|
@@ -174,10 +165,7 @@ class TTSProviderFactory:
|
|
| 174 |
# Create instance if not cached
|
| 175 |
if provider_name not in self._provider_instances:
|
| 176 |
provider_class = self._providers[provider_name]
|
| 177 |
-
|
| 178 |
-
self._provider_instances[provider_name] = provider_class()
|
| 179 |
-
else:
|
| 180 |
-
self._provider_instances[provider_name] = provider_class()
|
| 181 |
|
| 182 |
provider = self._provider_instances[provider_name]
|
| 183 |
|
|
|
|
| 18 |
self._register_default_providers()
|
| 19 |
|
| 20 |
def _register_default_providers(self):
|
| 21 |
+
"""Register available TTS providers."""
|
| 22 |
# Import providers dynamically to avoid import errors if dependencies are missing
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# Register only Chatterbox provider
|
| 25 |
try:
|
| 26 |
from .chatterbox_provider import ChatterboxTTSProvider
|
| 27 |
self._providers['chatterbox'] = ChatterboxTTSProvider
|
| 28 |
logger.info("Registered Chatterbox TTS provider")
|
| 29 |
except ImportError as e:
|
| 30 |
+
logger.warning(f"Chatterbox TTS provider not available: {e}")
|
| 31 |
+
raise SpeechSynthesisException("No TTS providers available - Chatterbox is required") from e
|
| 32 |
|
| 33 |
def get_available_providers(self) -> List[str]:
|
| 34 |
"""Get list of available TTS providers."""
|
|
|
|
| 41 |
# Create instance if not cached
|
| 42 |
if name not in self._provider_instances:
|
| 43 |
logger.info(f"Creating instance for {name} provider")
|
| 44 |
+
self._provider_instances[name] = provider_class()
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# Check if provider is available
|
| 47 |
logger.info(f"Checking availability for {name}")
|
|
|
|
| 88 |
provider_class = self._providers[provider_name]
|
| 89 |
|
| 90 |
# Create instance with appropriate parameters
|
| 91 |
+
lang_code = kwargs.get('lang_code', 'en')
|
| 92 |
+
provider = provider_class(lang_code=lang_code)
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
# Verify the provider is available
|
| 95 |
if not provider.is_available():
|
|
|
|
| 117 |
SpeechSynthesisException: If no providers are available
|
| 118 |
"""
|
| 119 |
if preferred_providers is None:
|
| 120 |
+
preferred_providers = ['chatterbox']
|
| 121 |
|
| 122 |
logger.info(f"🔄 Getting TTS provider with fallback, preferred order: {preferred_providers}")
|
| 123 |
available_providers = self.get_available_providers()
|
|
|
|
| 165 |
# Create instance if not cached
|
| 166 |
if provider_name not in self._provider_instances:
|
| 167 |
provider_class = self._providers[provider_name]
|
| 168 |
+
self._provider_instances[provider_name] = provider_class()
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
provider = self._provider_instances[provider_name]
|
| 171 |
|
tests/integration/test_audio_processing_pipeline.py
CHANGED
|
@@ -61,7 +61,7 @@ class TestAudioProcessingPipeline:
|
|
| 61 |
|
| 62 |
# TTS configuration
|
| 63 |
config.get_tts_config.return_value = {
|
| 64 |
-
'preferred_providers': ['
|
| 65 |
}
|
| 66 |
|
| 67 |
return config
|
|
@@ -124,7 +124,7 @@ class TestAudioProcessingPipeline:
|
|
| 124 |
asr_model="whisper-small",
|
| 125 |
target_language="es",
|
| 126 |
source_language="en",
|
| 127 |
-
voice="
|
| 128 |
speed=1.0,
|
| 129 |
requires_translation=True
|
| 130 |
)
|
|
@@ -152,7 +152,7 @@ class TestAudioProcessingPipeline:
|
|
| 152 |
asr_model="whisper-small",
|
| 153 |
target_language="en",
|
| 154 |
source_language="en",
|
| 155 |
-
voice="
|
| 156 |
speed=1.0,
|
| 157 |
requires_translation=False
|
| 158 |
)
|
|
@@ -171,7 +171,7 @@ class TestAudioProcessingPipeline:
|
|
| 171 |
asr_model="whisper-medium",
|
| 172 |
target_language="fr",
|
| 173 |
source_language="en",
|
| 174 |
-
voice="
|
| 175 |
speed=1.5,
|
| 176 |
requires_translation=True
|
| 177 |
)
|
|
@@ -179,7 +179,7 @@ class TestAudioProcessingPipeline:
|
|
| 179 |
result = audio_service.process_audio_pipeline(request)
|
| 180 |
|
| 181 |
assert result.success is True
|
| 182 |
-
assert result.metadata['voice'] == "
|
| 183 |
assert result.metadata['speed'] == 1.5
|
| 184 |
assert result.metadata['asr_model'] == "whisper-medium"
|
| 185 |
|
|
@@ -208,7 +208,7 @@ class TestAudioProcessingPipeline:
|
|
| 208 |
audio=large_audio,
|
| 209 |
asr_model="whisper-small",
|
| 210 |
target_language="es",
|
| 211 |
-
voice="
|
| 212 |
speed=1.0,
|
| 213 |
requires_translation=True
|
| 214 |
)
|
|
@@ -296,7 +296,7 @@ class TestAudioProcessingPipeline:
|
|
| 296 |
audio=sample_audio_upload,
|
| 297 |
asr_model="whisper-small",
|
| 298 |
target_language="es",
|
| 299 |
-
voice="
|
| 300 |
speed=1.0,
|
| 301 |
requires_translation=True
|
| 302 |
)
|
|
|
|
| 61 |
|
| 62 |
# TTS configuration
|
| 63 |
config.get_tts_config.return_value = {
|
| 64 |
+
'preferred_providers': ['chatterbox']
|
| 65 |
}
|
| 66 |
|
| 67 |
return config
|
|
|
|
| 124 |
asr_model="whisper-small",
|
| 125 |
target_language="es",
|
| 126 |
source_language="en",
|
| 127 |
+
voice="chatterbox",
|
| 128 |
speed=1.0,
|
| 129 |
requires_translation=True
|
| 130 |
)
|
|
|
|
| 152 |
asr_model="whisper-small",
|
| 153 |
target_language="en",
|
| 154 |
source_language="en",
|
| 155 |
+
voice="chatterbox",
|
| 156 |
speed=1.0,
|
| 157 |
requires_translation=False
|
| 158 |
)
|
|
|
|
| 171 |
asr_model="whisper-medium",
|
| 172 |
target_language="fr",
|
| 173 |
source_language="en",
|
| 174 |
+
voice="chatterbox",
|
| 175 |
speed=1.5,
|
| 176 |
requires_translation=True
|
| 177 |
)
|
|
|
|
| 179 |
result = audio_service.process_audio_pipeline(request)
|
| 180 |
|
| 181 |
assert result.success is True
|
| 182 |
+
assert result.metadata['voice'] == "chatterbox"
|
| 183 |
assert result.metadata['speed'] == 1.5
|
| 184 |
assert result.metadata['asr_model'] == "whisper-medium"
|
| 185 |
|
|
|
|
| 208 |
audio=large_audio,
|
| 209 |
asr_model="whisper-small",
|
| 210 |
target_language="es",
|
| 211 |
+
voice="chatterbox",
|
| 212 |
speed=1.0,
|
| 213 |
requires_translation=True
|
| 214 |
)
|
|
|
|
| 296 |
audio=sample_audio_upload,
|
| 297 |
asr_model="whisper-small",
|
| 298 |
target_language="es",
|
| 299 |
+
voice="chatterbox",
|
| 300 |
speed=1.0,
|
| 301 |
requires_translation=True
|
| 302 |
)
|
tests/integration/test_file_handling.py
CHANGED
|
@@ -57,7 +57,7 @@ class TestFileHandling:
|
|
| 57 |
|
| 58 |
# TTS configuration
|
| 59 |
config.get_tts_config.return_value = {
|
| 60 |
-
'preferred_providers': ['
|
| 61 |
}
|
| 62 |
|
| 63 |
return config
|
|
@@ -139,7 +139,7 @@ class TestFileHandling:
|
|
| 139 |
audio=audio_upload,
|
| 140 |
asr_model="whisper-small",
|
| 141 |
target_language="es",
|
| 142 |
-
voice="
|
| 143 |
speed=1.0,
|
| 144 |
requires_translation=True
|
| 145 |
)
|
|
@@ -170,7 +170,7 @@ class TestFileHandling:
|
|
| 170 |
audio=audio_upload,
|
| 171 |
asr_model="whisper-small",
|
| 172 |
target_language="en",
|
| 173 |
-
voice="
|
| 174 |
speed=1.0,
|
| 175 |
requires_translation=False
|
| 176 |
)
|
|
@@ -194,7 +194,7 @@ class TestFileHandling:
|
|
| 194 |
audio=audio_upload,
|
| 195 |
asr_model="whisper-small",
|
| 196 |
target_language="es",
|
| 197 |
-
voice="
|
| 198 |
speed=1.0,
|
| 199 |
requires_translation=True
|
| 200 |
)
|
|
@@ -226,7 +226,7 @@ class TestFileHandling:
|
|
| 226 |
audio=audio_upload,
|
| 227 |
asr_model="whisper-small",
|
| 228 |
target_language="es",
|
| 229 |
-
voice="
|
| 230 |
speed=1.0,
|
| 231 |
requires_translation=True
|
| 232 |
)
|
|
@@ -262,7 +262,7 @@ class TestFileHandling:
|
|
| 262 |
audio=audio_upload,
|
| 263 |
asr_model="whisper-small",
|
| 264 |
target_language="es",
|
| 265 |
-
voice="
|
| 266 |
speed=1.0,
|
| 267 |
requires_translation=True
|
| 268 |
)
|
|
@@ -294,7 +294,7 @@ class TestFileHandling:
|
|
| 294 |
audio=audio_upload,
|
| 295 |
asr_model="whisper-small",
|
| 296 |
target_language="es",
|
| 297 |
-
voice="
|
| 298 |
speed=1.0,
|
| 299 |
requires_translation=True
|
| 300 |
)
|
|
@@ -325,7 +325,7 @@ class TestFileHandling:
|
|
| 325 |
audio=audio_upload,
|
| 326 |
asr_model="whisper-small",
|
| 327 |
target_language="es",
|
| 328 |
-
voice="
|
| 329 |
speed=1.0,
|
| 330 |
requires_translation=True
|
| 331 |
)
|
|
@@ -373,7 +373,7 @@ class TestFileHandling:
|
|
| 373 |
audio=audio_upload,
|
| 374 |
asr_model="whisper-small",
|
| 375 |
target_language="es",
|
| 376 |
-
voice="
|
| 377 |
speed=1.0,
|
| 378 |
requires_translation=True
|
| 379 |
)
|
|
@@ -405,7 +405,7 @@ class TestFileHandling:
|
|
| 405 |
audio=audio_upload,
|
| 406 |
asr_model="whisper-small",
|
| 407 |
target_language="es",
|
| 408 |
-
voice="
|
| 409 |
speed=1.0,
|
| 410 |
requires_translation=True
|
| 411 |
)
|
|
@@ -435,7 +435,7 @@ class TestFileHandling:
|
|
| 435 |
audio=audio_upload,
|
| 436 |
asr_model="whisper-small",
|
| 437 |
target_language="es",
|
| 438 |
-
voice="
|
| 439 |
speed=1.0,
|
| 440 |
requires_translation=True
|
| 441 |
)
|
|
@@ -474,7 +474,7 @@ class TestFileHandling:
|
|
| 474 |
audio=audio_upload,
|
| 475 |
asr_model="whisper-small",
|
| 476 |
target_language="es",
|
| 477 |
-
voice="
|
| 478 |
speed=1.0,
|
| 479 |
requires_translation=True
|
| 480 |
)
|
|
@@ -501,7 +501,7 @@ class TestFileHandling:
|
|
| 501 |
audio=audio_upload,
|
| 502 |
asr_model="whisper-small",
|
| 503 |
target_language="es",
|
| 504 |
-
voice="
|
| 505 |
speed=1.0,
|
| 506 |
requires_translation=True
|
| 507 |
)
|
|
@@ -533,7 +533,7 @@ class TestFileHandling:
|
|
| 533 |
audio=audio_upload,
|
| 534 |
asr_model="whisper-small",
|
| 535 |
target_language="es",
|
| 536 |
-
voice="
|
| 537 |
speed=1.0,
|
| 538 |
requires_translation=True
|
| 539 |
)
|
|
@@ -561,7 +561,7 @@ class TestFileHandling:
|
|
| 561 |
audio=audio_upload,
|
| 562 |
asr_model="whisper-small",
|
| 563 |
target_language="es",
|
| 564 |
-
voice="
|
| 565 |
speed=1.0,
|
| 566 |
requires_translation=True
|
| 567 |
)
|
|
|
|
| 57 |
|
| 58 |
# TTS configuration
|
| 59 |
config.get_tts_config.return_value = {
|
| 60 |
+
'preferred_providers': ['chatterbox']
|
| 61 |
}
|
| 62 |
|
| 63 |
return config
|
|
|
|
| 139 |
audio=audio_upload,
|
| 140 |
asr_model="whisper-small",
|
| 141 |
target_language="es",
|
| 142 |
+
voice="chatterbox",
|
| 143 |
speed=1.0,
|
| 144 |
requires_translation=True
|
| 145 |
)
|
|
|
|
| 170 |
audio=audio_upload,
|
| 171 |
asr_model="whisper-small",
|
| 172 |
target_language="en",
|
| 173 |
+
voice="chatterbox",
|
| 174 |
speed=1.0,
|
| 175 |
requires_translation=False
|
| 176 |
)
|
|
|
|
| 194 |
audio=audio_upload,
|
| 195 |
asr_model="whisper-small",
|
| 196 |
target_language="es",
|
| 197 |
+
voice="chatterbox",
|
| 198 |
speed=1.0,
|
| 199 |
requires_translation=True
|
| 200 |
)
|
|
|
|
| 226 |
audio=audio_upload,
|
| 227 |
asr_model="whisper-small",
|
| 228 |
target_language="es",
|
| 229 |
+
voice="chatterbox",
|
| 230 |
speed=1.0,
|
| 231 |
requires_translation=True
|
| 232 |
)
|
|
|
|
| 262 |
audio=audio_upload,
|
| 263 |
asr_model="whisper-small",
|
| 264 |
target_language="es",
|
| 265 |
+
voice="chatterbox",
|
| 266 |
speed=1.0,
|
| 267 |
requires_translation=True
|
| 268 |
)
|
|
|
|
| 294 |
audio=audio_upload,
|
| 295 |
asr_model="whisper-small",
|
| 296 |
target_language="es",
|
| 297 |
+
voice="chatterbox",
|
| 298 |
speed=1.0,
|
| 299 |
requires_translation=True
|
| 300 |
)
|
|
|
|
| 325 |
audio=audio_upload,
|
| 326 |
asr_model="whisper-small",
|
| 327 |
target_language="es",
|
| 328 |
+
voice="chatterbox",
|
| 329 |
speed=1.0,
|
| 330 |
requires_translation=True
|
| 331 |
)
|
|
|
|
| 373 |
audio=audio_upload,
|
| 374 |
asr_model="whisper-small",
|
| 375 |
target_language="es",
|
| 376 |
+
voice="chatterbox",
|
| 377 |
speed=1.0,
|
| 378 |
requires_translation=True
|
| 379 |
)
|
|
|
|
| 405 |
audio=audio_upload,
|
| 406 |
asr_model="whisper-small",
|
| 407 |
target_language="es",
|
| 408 |
+
voice="chatterbox",
|
| 409 |
speed=1.0,
|
| 410 |
requires_translation=True
|
| 411 |
)
|
|
|
|
| 435 |
audio=audio_upload,
|
| 436 |
asr_model="whisper-small",
|
| 437 |
target_language="es",
|
| 438 |
+
voice="chatterbox",
|
| 439 |
speed=1.0,
|
| 440 |
requires_translation=True
|
| 441 |
)
|
|
|
|
| 474 |
audio=audio_upload,
|
| 475 |
asr_model="whisper-small",
|
| 476 |
target_language="es",
|
| 477 |
+
voice="chatterbox",
|
| 478 |
speed=1.0,
|
| 479 |
requires_translation=True
|
| 480 |
)
|
|
|
|
| 501 |
audio=audio_upload,
|
| 502 |
asr_model="whisper-small",
|
| 503 |
target_language="es",
|
| 504 |
+
voice="chatterbox",
|
| 505 |
speed=1.0,
|
| 506 |
requires_translation=True
|
| 507 |
)
|
|
|
|
| 533 |
audio=audio_upload,
|
| 534 |
asr_model="whisper-small",
|
| 535 |
target_language="es",
|
| 536 |
+
voice="chatterbox",
|
| 537 |
speed=1.0,
|
| 538 |
requires_translation=True
|
| 539 |
)
|
|
|
|
| 561 |
audio=audio_upload,
|
| 562 |
asr_model="whisper-small",
|
| 563 |
target_language="es",
|
| 564 |
+
voice="chatterbox",
|
| 565 |
speed=1.0,
|
| 566 |
requires_translation=True
|
| 567 |
)
|
tests/integration/test_performance_and_errors.py
CHANGED
|
@@ -61,7 +61,7 @@ class TestPerformanceAndErrors:
|
|
| 61 |
|
| 62 |
# TTS configuration
|
| 63 |
config.get_tts_config.return_value = {
|
| 64 |
-
'preferred_providers': ['
|
| 65 |
'provider_timeout': 30.0,
|
| 66 |
'max_retries': 3
|
| 67 |
}
|
|
@@ -133,7 +133,7 @@ class TestPerformanceAndErrors:
|
|
| 133 |
audio=audio_upload,
|
| 134 |
asr_model="whisper-small",
|
| 135 |
target_language="es",
|
| 136 |
-
voice="
|
| 137 |
speed=1.0,
|
| 138 |
requires_translation=True
|
| 139 |
)
|
|
@@ -243,7 +243,7 @@ class TestPerformanceAndErrors:
|
|
| 243 |
audio=audio_upload,
|
| 244 |
asr_model="whisper-small",
|
| 245 |
target_language="es",
|
| 246 |
-
voice="
|
| 247 |
speed=1.0,
|
| 248 |
requires_translation=True
|
| 249 |
)
|
|
@@ -359,7 +359,7 @@ class TestPerformanceAndErrors:
|
|
| 359 |
audio=invalid_audio,
|
| 360 |
asr_model="whisper-small",
|
| 361 |
target_language="es",
|
| 362 |
-
voice="
|
| 363 |
speed=1.0,
|
| 364 |
requires_translation=True
|
| 365 |
)
|
|
@@ -389,7 +389,7 @@ class TestPerformanceAndErrors:
|
|
| 389 |
audio=oversized_audio,
|
| 390 |
asr_model="whisper-small",
|
| 391 |
target_language="es",
|
| 392 |
-
voice="
|
| 393 |
speed=1.0,
|
| 394 |
requires_translation=True
|
| 395 |
)
|
|
@@ -413,7 +413,7 @@ class TestPerformanceAndErrors:
|
|
| 413 |
audio=corrupted_audio,
|
| 414 |
asr_model="whisper-small",
|
| 415 |
target_language="es",
|
| 416 |
-
voice="
|
| 417 |
speed=1.0,
|
| 418 |
requires_translation=True
|
| 419 |
)
|
|
|
|
| 61 |
|
| 62 |
# TTS configuration
|
| 63 |
config.get_tts_config.return_value = {
|
| 64 |
+
'preferred_providers': ['chatterbox'],
|
| 65 |
'provider_timeout': 30.0,
|
| 66 |
'max_retries': 3
|
| 67 |
}
|
|
|
|
| 133 |
audio=audio_upload,
|
| 134 |
asr_model="whisper-small",
|
| 135 |
target_language="es",
|
| 136 |
+
voice="chatterbox",
|
| 137 |
speed=1.0,
|
| 138 |
requires_translation=True
|
| 139 |
)
|
|
|
|
| 243 |
audio=audio_upload,
|
| 244 |
asr_model="whisper-small",
|
| 245 |
target_language="es",
|
| 246 |
+
voice="chatterbox",
|
| 247 |
speed=1.0,
|
| 248 |
requires_translation=True
|
| 249 |
)
|
|
|
|
| 359 |
audio=invalid_audio,
|
| 360 |
asr_model="whisper-small",
|
| 361 |
target_language="es",
|
| 362 |
+
voice="chatterbox",
|
| 363 |
speed=1.0,
|
| 364 |
requires_translation=True
|
| 365 |
)
|
|
|
|
| 389 |
audio=oversized_audio,
|
| 390 |
asr_model="whisper-small",
|
| 391 |
target_language="es",
|
| 392 |
+
voice="chatterbox",
|
| 393 |
speed=1.0,
|
| 394 |
requires_translation=True
|
| 395 |
)
|
|
|
|
| 413 |
audio=corrupted_audio,
|
| 414 |
asr_model="whisper-small",
|
| 415 |
target_language="es",
|
| 416 |
+
voice="chatterbox",
|
| 417 |
speed=1.0,
|
| 418 |
requires_translation=True
|
| 419 |
)
|
tests/integration/test_provider_integration.py
CHANGED
|
@@ -31,7 +31,7 @@ class TestProviderIntegration:
|
|
| 31 |
config = Mock(spec=AppConfig)
|
| 32 |
|
| 33 |
# TTS configuration
|
| 34 |
-
config.tts.preferred_providers = ['
|
| 35 |
config.tts.fallback_enabled = True
|
| 36 |
config.tts.provider_timeout = 30.0
|
| 37 |
|
|
@@ -85,7 +85,7 @@ class TestProviderIntegration:
|
|
| 85 |
)
|
| 86 |
|
| 87 |
# Test each TTS provider
|
| 88 |
-
providers_to_test = ['
|
| 89 |
|
| 90 |
for provider_name in providers_to_test:
|
| 91 |
with patch(f'src.infrastructure.tts.{provider_name}_provider') as mock_provider_module:
|
|
@@ -247,7 +247,7 @@ class TestProviderIntegration:
|
|
| 247 |
def test_provider_configuration_loading(self, dependency_container, mock_config):
|
| 248 |
"""Test provider configuration loading and validation."""
|
| 249 |
# Test TTS configuration
|
| 250 |
-
tts_provider = dependency_container.get_tts_provider('
|
| 251 |
assert tts_provider is not None
|
| 252 |
|
| 253 |
# Test STT configuration
|
|
@@ -310,7 +310,7 @@ class TestProviderIntegration:
|
|
| 310 |
|
| 311 |
# Measure performance
|
| 312 |
start_time = time.time()
|
| 313 |
-
provider = dependency_container.get_tts_provider('
|
| 314 |
result = provider.synthesize(synthesis_request)
|
| 315 |
end_time = time.time()
|
| 316 |
|
|
@@ -322,7 +322,7 @@ class TestProviderIntegration:
|
|
| 322 |
def test_provider_resource_cleanup(self, dependency_container):
|
| 323 |
"""Test provider resource cleanup."""
|
| 324 |
# Get multiple providers
|
| 325 |
-
tts_provider = dependency_container.get_tts_provider('
|
| 326 |
stt_provider = dependency_container.get_stt_provider('whisper-small')
|
| 327 |
translation_provider = dependency_container.get_translation_provider()
|
| 328 |
|
|
@@ -356,7 +356,7 @@ class TestProviderIntegration:
|
|
| 356 |
|
| 357 |
def synthesize_audio():
|
| 358 |
try:
|
| 359 |
-
provider = dependency_container.get_tts_provider('
|
| 360 |
with patch.object(provider, 'synthesize') as mock_synthesize:
|
| 361 |
mock_synthesize.return_value = AudioContent(
|
| 362 |
data=b"concurrent_audio_data",
|
|
@@ -396,15 +396,15 @@ class TestProviderIntegration:
|
|
| 396 |
"""Test dynamic provider configuration updates."""
|
| 397 |
# Initial configuration
|
| 398 |
initial_providers = mock_config.tts.preferred_providers
|
| 399 |
-
assert '
|
| 400 |
|
| 401 |
# Update configuration
|
| 402 |
-
mock_config.tts.preferred_providers = ['
|
| 403 |
|
| 404 |
# Verify configuration update affects provider selection
|
| 405 |
# (This would require actual implementation of dynamic config updates)
|
| 406 |
updated_providers = mock_config.tts.preferred_providers
|
| 407 |
-
assert '
|
| 408 |
assert 'dummy' in updated_providers
|
| 409 |
|
| 410 |
def test_provider_health_checking(self, dependency_container):
|
|
|
|
| 31 |
config = Mock(spec=AppConfig)
|
| 32 |
|
| 33 |
# TTS configuration
|
| 34 |
+
config.tts.preferred_providers = ['chatterbox']
|
| 35 |
config.tts.fallback_enabled = True
|
| 36 |
config.tts.provider_timeout = 30.0
|
| 37 |
|
|
|
|
| 85 |
)
|
| 86 |
|
| 87 |
# Test each TTS provider
|
| 88 |
+
providers_to_test = ['chatterbox']
|
| 89 |
|
| 90 |
for provider_name in providers_to_test:
|
| 91 |
with patch(f'src.infrastructure.tts.{provider_name}_provider') as mock_provider_module:
|
|
|
|
| 247 |
def test_provider_configuration_loading(self, dependency_container, mock_config):
|
| 248 |
"""Test provider configuration loading and validation."""
|
| 249 |
# Test TTS configuration
|
| 250 |
+
tts_provider = dependency_container.get_tts_provider('chatterbox')
|
| 251 |
assert tts_provider is not None
|
| 252 |
|
| 253 |
# Test STT configuration
|
|
|
|
| 310 |
|
| 311 |
# Measure performance
|
| 312 |
start_time = time.time()
|
| 313 |
+
provider = dependency_container.get_tts_provider('chatterbox')
|
| 314 |
result = provider.synthesize(synthesis_request)
|
| 315 |
end_time = time.time()
|
| 316 |
|
|
|
|
| 322 |
def test_provider_resource_cleanup(self, dependency_container):
|
| 323 |
"""Test provider resource cleanup."""
|
| 324 |
# Get multiple providers
|
| 325 |
+
tts_provider = dependency_container.get_tts_provider('chatterbox')
|
| 326 |
stt_provider = dependency_container.get_stt_provider('whisper-small')
|
| 327 |
translation_provider = dependency_container.get_translation_provider()
|
| 328 |
|
|
|
|
| 356 |
|
| 357 |
def synthesize_audio():
|
| 358 |
try:
|
| 359 |
+
provider = dependency_container.get_tts_provider('chatterbox')
|
| 360 |
with patch.object(provider, 'synthesize') as mock_synthesize:
|
| 361 |
mock_synthesize.return_value = AudioContent(
|
| 362 |
data=b"concurrent_audio_data",
|
|
|
|
| 396 |
"""Test dynamic provider configuration updates."""
|
| 397 |
# Initial configuration
|
| 398 |
initial_providers = mock_config.tts.preferred_providers
|
| 399 |
+
assert 'chatterbox' in initial_providers
|
| 400 |
|
| 401 |
# Update configuration
|
| 402 |
+
mock_config.tts.preferred_providers = ['chatterbox']
|
| 403 |
|
| 404 |
# Verify configuration update affects provider selection
|
| 405 |
# (This would require actual implementation of dynamic config updates)
|
| 406 |
updated_providers = mock_config.tts.preferred_providers
|
| 407 |
+
assert 'chatterbox' in updated_providers
|
| 408 |
assert 'dummy' in updated_providers
|
| 409 |
|
| 410 |
def test_provider_health_checking(self, dependency_container):
|
tests/unit/application/dtos/test_processing_request_dto.py
CHANGED
|
@@ -24,7 +24,7 @@ class TestProcessingRequestDto:
|
|
| 24 |
audio=sample_audio_upload,
|
| 25 |
asr_model="whisper-small",
|
| 26 |
target_language="es",
|
| 27 |
-
voice="
|
| 28 |
speed=1.0,
|
| 29 |
source_language="en"
|
| 30 |
)
|
|
@@ -43,7 +43,7 @@ class TestProcessingRequestDto:
|
|
| 43 |
audio=sample_audio_upload,
|
| 44 |
asr_model="whisper-medium",
|
| 45 |
target_language="fr",
|
| 46 |
-
voice="
|
| 47 |
)
|
| 48 |
|
| 49 |
assert dto.speed == 1.0 # Default speed
|
|
@@ -61,7 +61,7 @@ class TestProcessingRequestDto:
|
|
| 61 |
audio=sample_audio_upload,
|
| 62 |
asr_model="whisper-large",
|
| 63 |
target_language="de",
|
| 64 |
-
voice="
|
| 65 |
additional_params=additional_params
|
| 66 |
)
|
| 67 |
|
|
@@ -74,7 +74,7 @@ class TestProcessingRequestDto:
|
|
| 74 |
audio="invalid_audio", # Not AudioUploadDto
|
| 75 |
asr_model="whisper-small",
|
| 76 |
target_language="es",
|
| 77 |
-
voice="
|
| 78 |
)
|
| 79 |
|
| 80 |
def test_empty_asr_model_validation(self, sample_audio_upload):
|
|
@@ -84,7 +84,7 @@ class TestProcessingRequestDto:
|
|
| 84 |
audio=sample_audio_upload,
|
| 85 |
asr_model="",
|
| 86 |
target_language="es",
|
| 87 |
-
voice="
|
| 88 |
)
|
| 89 |
|
| 90 |
def test_unsupported_asr_model_validation(self, sample_audio_upload):
|
|
@@ -94,7 +94,7 @@ class TestProcessingRequestDto:
|
|
| 94 |
audio=sample_audio_upload,
|
| 95 |
asr_model="invalid-model",
|
| 96 |
target_language="es",
|
| 97 |
-
voice="
|
| 98 |
)
|
| 99 |
|
| 100 |
def test_supported_asr_models(self, sample_audio_upload):
|
|
@@ -107,7 +107,7 @@ class TestProcessingRequestDto:
|
|
| 107 |
audio=sample_audio_upload,
|
| 108 |
asr_model=model,
|
| 109 |
target_language="es",
|
| 110 |
-
voice="
|
| 111 |
)
|
| 112 |
assert dto.asr_model == model
|
| 113 |
|
|
@@ -118,7 +118,7 @@ class TestProcessingRequestDto:
|
|
| 118 |
audio=sample_audio_upload,
|
| 119 |
asr_model="whisper-small",
|
| 120 |
target_language="",
|
| 121 |
-
voice="
|
| 122 |
)
|
| 123 |
|
| 124 |
def test_unsupported_target_language_validation(self, sample_audio_upload):
|
|
@@ -128,7 +128,7 @@ class TestProcessingRequestDto:
|
|
| 128 |
audio=sample_audio_upload,
|
| 129 |
asr_model="whisper-small",
|
| 130 |
target_language="invalid-lang",
|
| 131 |
-
voice="
|
| 132 |
)
|
| 133 |
|
| 134 |
def test_unsupported_source_language_validation(self, sample_audio_upload):
|
|
@@ -138,7 +138,7 @@ class TestProcessingRequestDto:
|
|
| 138 |
audio=sample_audio_upload,
|
| 139 |
asr_model="whisper-small",
|
| 140 |
target_language="es",
|
| 141 |
-
voice="
|
| 142 |
source_language="invalid-lang"
|
| 143 |
)
|
| 144 |
|
|
@@ -155,7 +155,7 @@ class TestProcessingRequestDto:
|
|
| 155 |
audio=sample_audio_upload,
|
| 156 |
asr_model="whisper-small",
|
| 157 |
target_language=lang,
|
| 158 |
-
voice="
|
| 159 |
source_language=lang
|
| 160 |
)
|
| 161 |
assert dto.target_language == lang
|
|
@@ -183,7 +183,7 @@ class TestProcessingRequestDto:
|
|
| 183 |
|
| 184 |
def test_supported_voices(self, sample_audio_upload):
|
| 185 |
"""Test all supported voices"""
|
| 186 |
-
supported_voices = ['
|
| 187 |
|
| 188 |
for voice in supported_voices:
|
| 189 |
# Should not raise exception
|
|
@@ -202,7 +202,7 @@ class TestProcessingRequestDto:
|
|
| 202 |
audio=sample_audio_upload,
|
| 203 |
asr_model="whisper-small",
|
| 204 |
target_language="es",
|
| 205 |
-
voice="
|
| 206 |
speed=0.3 # Too low
|
| 207 |
)
|
| 208 |
|
|
@@ -213,7 +213,7 @@ class TestProcessingRequestDto:
|
|
| 213 |
audio=sample_audio_upload,
|
| 214 |
asr_model="whisper-small",
|
| 215 |
target_language="es",
|
| 216 |
-
voice="
|
| 217 |
speed=2.5 # Too high
|
| 218 |
)
|
| 219 |
|
|
@@ -227,7 +227,7 @@ class TestProcessingRequestDto:
|
|
| 227 |
audio=sample_audio_upload,
|
| 228 |
asr_model="whisper-small",
|
| 229 |
target_language="es",
|
| 230 |
-
voice="
|
| 231 |
speed=speed
|
| 232 |
)
|
| 233 |
assert dto.speed == speed
|
|
@@ -239,7 +239,7 @@ class TestProcessingRequestDto:
|
|
| 239 |
audio=sample_audio_upload,
|
| 240 |
asr_model="whisper-small",
|
| 241 |
target_language="es",
|
| 242 |
-
voice="
|
| 243 |
additional_params="invalid" # Not a dict
|
| 244 |
)
|
| 245 |
|
|
@@ -249,7 +249,7 @@ class TestProcessingRequestDto:
|
|
| 249 |
audio=sample_audio_upload,
|
| 250 |
asr_model="whisper-small",
|
| 251 |
target_language="en",
|
| 252 |
-
voice="
|
| 253 |
source_language="en"
|
| 254 |
)
|
| 255 |
|
|
@@ -261,7 +261,7 @@ class TestProcessingRequestDto:
|
|
| 261 |
audio=sample_audio_upload,
|
| 262 |
asr_model="whisper-small",
|
| 263 |
target_language="es",
|
| 264 |
-
voice="
|
| 265 |
source_language="en"
|
| 266 |
)
|
| 267 |
|
|
@@ -273,7 +273,7 @@ class TestProcessingRequestDto:
|
|
| 273 |
audio=sample_audio_upload,
|
| 274 |
asr_model="whisper-small",
|
| 275 |
target_language="es",
|
| 276 |
-
voice="
|
| 277 |
)
|
| 278 |
|
| 279 |
assert dto.requires_translation is True # Assume translation needed
|
|
@@ -284,7 +284,7 @@ class TestProcessingRequestDto:
|
|
| 284 |
audio=sample_audio_upload,
|
| 285 |
asr_model="whisper-small",
|
| 286 |
target_language="es",
|
| 287 |
-
voice="
|
| 288 |
speed=1.5,
|
| 289 |
source_language="en",
|
| 290 |
additional_params={"custom": "value"}
|
|
@@ -296,7 +296,7 @@ class TestProcessingRequestDto:
|
|
| 296 |
assert result['asr_model'] == "whisper-small"
|
| 297 |
assert result['target_language'] == "es"
|
| 298 |
assert result['source_language'] == "en"
|
| 299 |
-
assert result['voice'] == "
|
| 300 |
assert result['speed'] == 1.5
|
| 301 |
assert result['requires_translation'] is True
|
| 302 |
assert result['additional_params'] == {"custom": "value"}
|
|
@@ -367,7 +367,7 @@ class TestProcessingRequestDto:
|
|
| 367 |
audio=sample_audio_upload,
|
| 368 |
asr_model="", # Invalid empty model
|
| 369 |
target_language="es",
|
| 370 |
-
voice="
|
| 371 |
)
|
| 372 |
|
| 373 |
def test_additional_params_default_initialization(self, sample_audio_upload):
|
|
@@ -376,7 +376,7 @@ class TestProcessingRequestDto:
|
|
| 376 |
audio=sample_audio_upload,
|
| 377 |
asr_model="whisper-small",
|
| 378 |
target_language="es",
|
| 379 |
-
voice="
|
| 380 |
additional_params=None
|
| 381 |
)
|
| 382 |
|
|
|
|
| 24 |
audio=sample_audio_upload,
|
| 25 |
asr_model="whisper-small",
|
| 26 |
target_language="es",
|
| 27 |
+
voice="chatterbox",
|
| 28 |
speed=1.0,
|
| 29 |
source_language="en"
|
| 30 |
)
|
|
|
|
| 43 |
audio=sample_audio_upload,
|
| 44 |
asr_model="whisper-medium",
|
| 45 |
target_language="fr",
|
| 46 |
+
voice="chatterbox"
|
| 47 |
)
|
| 48 |
|
| 49 |
assert dto.speed == 1.0 # Default speed
|
|
|
|
| 61 |
audio=sample_audio_upload,
|
| 62 |
asr_model="whisper-large",
|
| 63 |
target_language="de",
|
| 64 |
+
voice="chatterbox",
|
| 65 |
additional_params=additional_params
|
| 66 |
)
|
| 67 |
|
|
|
|
| 74 |
audio="invalid_audio", # Not AudioUploadDto
|
| 75 |
asr_model="whisper-small",
|
| 76 |
target_language="es",
|
| 77 |
+
voice="chatterbox"
|
| 78 |
)
|
| 79 |
|
| 80 |
def test_empty_asr_model_validation(self, sample_audio_upload):
|
|
|
|
| 84 |
audio=sample_audio_upload,
|
| 85 |
asr_model="",
|
| 86 |
target_language="es",
|
| 87 |
+
voice="chatterbox"
|
| 88 |
)
|
| 89 |
|
| 90 |
def test_unsupported_asr_model_validation(self, sample_audio_upload):
|
|
|
|
| 94 |
audio=sample_audio_upload,
|
| 95 |
asr_model="invalid-model",
|
| 96 |
target_language="es",
|
| 97 |
+
voice="chatterbox"
|
| 98 |
)
|
| 99 |
|
| 100 |
def test_supported_asr_models(self, sample_audio_upload):
|
|
|
|
| 107 |
audio=sample_audio_upload,
|
| 108 |
asr_model=model,
|
| 109 |
target_language="es",
|
| 110 |
+
voice="chatterbox"
|
| 111 |
)
|
| 112 |
assert dto.asr_model == model
|
| 113 |
|
|
|
|
| 118 |
audio=sample_audio_upload,
|
| 119 |
asr_model="whisper-small",
|
| 120 |
target_language="",
|
| 121 |
+
voice="chatterbox"
|
| 122 |
)
|
| 123 |
|
| 124 |
def test_unsupported_target_language_validation(self, sample_audio_upload):
|
|
|
|
| 128 |
audio=sample_audio_upload,
|
| 129 |
asr_model="whisper-small",
|
| 130 |
target_language="invalid-lang",
|
| 131 |
+
voice="chatterbox"
|
| 132 |
)
|
| 133 |
|
| 134 |
def test_unsupported_source_language_validation(self, sample_audio_upload):
|
|
|
|
| 138 |
audio=sample_audio_upload,
|
| 139 |
asr_model="whisper-small",
|
| 140 |
target_language="es",
|
| 141 |
+
voice="chatterbox",
|
| 142 |
source_language="invalid-lang"
|
| 143 |
)
|
| 144 |
|
|
|
|
| 155 |
audio=sample_audio_upload,
|
| 156 |
asr_model="whisper-small",
|
| 157 |
target_language=lang,
|
| 158 |
+
voice="chatterbox",
|
| 159 |
source_language=lang
|
| 160 |
)
|
| 161 |
assert dto.target_language == lang
|
|
|
|
| 183 |
|
| 184 |
def test_supported_voices(self, sample_audio_upload):
|
| 185 |
"""Test all supported voices"""
|
| 186 |
+
supported_voices = ['chatterbox']
|
| 187 |
|
| 188 |
for voice in supported_voices:
|
| 189 |
# Should not raise exception
|
|
|
|
| 202 |
audio=sample_audio_upload,
|
| 203 |
asr_model="whisper-small",
|
| 204 |
target_language="es",
|
| 205 |
+
voice="chatterbox",
|
| 206 |
speed=0.3 # Too low
|
| 207 |
)
|
| 208 |
|
|
|
|
| 213 |
audio=sample_audio_upload,
|
| 214 |
asr_model="whisper-small",
|
| 215 |
target_language="es",
|
| 216 |
+
voice="chatterbox",
|
| 217 |
speed=2.5 # Too high
|
| 218 |
)
|
| 219 |
|
|
|
|
| 227 |
audio=sample_audio_upload,
|
| 228 |
asr_model="whisper-small",
|
| 229 |
target_language="es",
|
| 230 |
+
voice="chatterbox",
|
| 231 |
speed=speed
|
| 232 |
)
|
| 233 |
assert dto.speed == speed
|
|
|
|
| 239 |
audio=sample_audio_upload,
|
| 240 |
asr_model="whisper-small",
|
| 241 |
target_language="es",
|
| 242 |
+
voice="chatterbox",
|
| 243 |
additional_params="invalid" # Not a dict
|
| 244 |
)
|
| 245 |
|
|
|
|
| 249 |
audio=sample_audio_upload,
|
| 250 |
asr_model="whisper-small",
|
| 251 |
target_language="en",
|
| 252 |
+
voice="chatterbox",
|
| 253 |
source_language="en"
|
| 254 |
)
|
| 255 |
|
|
|
|
| 261 |
audio=sample_audio_upload,
|
| 262 |
asr_model="whisper-small",
|
| 263 |
target_language="es",
|
| 264 |
+
voice="chatterbox",
|
| 265 |
source_language="en"
|
| 266 |
)
|
| 267 |
|
|
|
|
| 273 |
audio=sample_audio_upload,
|
| 274 |
asr_model="whisper-small",
|
| 275 |
target_language="es",
|
| 276 |
+
voice="chatterbox"
|
| 277 |
)
|
| 278 |
|
| 279 |
assert dto.requires_translation is True # Assume translation needed
|
|
|
|
| 284 |
audio=sample_audio_upload,
|
| 285 |
asr_model="whisper-small",
|
| 286 |
target_language="es",
|
| 287 |
+
voice="chatterbox",
|
| 288 |
speed=1.5,
|
| 289 |
source_language="en",
|
| 290 |
additional_params={"custom": "value"}
|
|
|
|
| 296 |
assert result['asr_model'] == "whisper-small"
|
| 297 |
assert result['target_language'] == "es"
|
| 298 |
assert result['source_language'] == "en"
|
| 299 |
+
assert result['voice'] == "chatterbox"
|
| 300 |
assert result['speed'] == 1.5
|
| 301 |
assert result['requires_translation'] is True
|
| 302 |
assert result['additional_params'] == {"custom": "value"}
|
|
|
|
| 367 |
audio=sample_audio_upload,
|
| 368 |
asr_model="", # Invalid empty model
|
| 369 |
target_language="es",
|
| 370 |
+
voice="chatterbox"
|
| 371 |
)
|
| 372 |
|
| 373 |
def test_additional_params_default_initialization(self, sample_audio_upload):
|
|
|
|
| 376 |
audio=sample_audio_upload,
|
| 377 |
asr_model="whisper-small",
|
| 378 |
target_language="es",
|
| 379 |
+
voice="chatterbox",
|
| 380 |
additional_params=None
|
| 381 |
)
|
| 382 |
|
tests/unit/application/services/test_audio_processing_service.py
CHANGED
|
@@ -71,7 +71,7 @@ class TestAudioProcessingApplicationService:
|
|
| 71 |
}
|
| 72 |
|
| 73 |
config.get_tts_config.return_value = {
|
| 74 |
-
'preferred_providers': ['
|
| 75 |
}
|
| 76 |
|
| 77 |
return config
|
|
@@ -92,7 +92,7 @@ class TestAudioProcessingApplicationService:
|
|
| 92 |
audio=sample_audio_upload,
|
| 93 |
asr_model="whisper-small",
|
| 94 |
target_language="es",
|
| 95 |
-
voice="
|
| 96 |
speed=1.0,
|
| 97 |
source_language="en"
|
| 98 |
)
|
|
@@ -279,7 +279,7 @@ class TestAudioProcessingApplicationService:
|
|
| 279 |
def test_perform_speech_synthesis_success(self, mock_open, service, mock_container):
|
| 280 |
"""Test successful speech synthesis"""
|
| 281 |
text = TextContent(text="Hola mundo", language="es")
|
| 282 |
-
voice = "
|
| 283 |
speed = 1.0
|
| 284 |
language = "es"
|
| 285 |
temp_dir = "/tmp/test"
|
|
@@ -306,7 +306,7 @@ class TestAudioProcessingApplicationService:
|
|
| 306 |
def test_perform_speech_synthesis_failure(self, service, mock_container):
|
| 307 |
"""Test speech synthesis failure"""
|
| 308 |
text = TextContent(text="Hola mundo", language="es")
|
| 309 |
-
voice = "
|
| 310 |
speed = 1.0
|
| 311 |
language = "es"
|
| 312 |
temp_dir = "/tmp/test"
|
|
@@ -379,7 +379,7 @@ class TestAudioProcessingApplicationService:
|
|
| 379 |
|
| 380 |
# Verify expected values
|
| 381 |
assert 'whisper-small' in result['asr_models']
|
| 382 |
-
assert '
|
| 383 |
assert 'en' in result['languages']
|
| 384 |
|
| 385 |
def test_cleanup(self, service):
|
|
|
|
| 71 |
}
|
| 72 |
|
| 73 |
config.get_tts_config.return_value = {
|
| 74 |
+
'preferred_providers': ['chatterbox']
|
| 75 |
}
|
| 76 |
|
| 77 |
return config
|
|
|
|
| 92 |
audio=sample_audio_upload,
|
| 93 |
asr_model="whisper-small",
|
| 94 |
target_language="es",
|
| 95 |
+
voice="chatterbox",
|
| 96 |
speed=1.0,
|
| 97 |
source_language="en"
|
| 98 |
)
|
|
|
|
| 279 |
def test_perform_speech_synthesis_success(self, mock_open, service, mock_container):
|
| 280 |
"""Test successful speech synthesis"""
|
| 281 |
text = TextContent(text="Hola mundo", language="es")
|
| 282 |
+
voice = "chatterbox"
|
| 283 |
speed = 1.0
|
| 284 |
language = "es"
|
| 285 |
temp_dir = "/tmp/test"
|
|
|
|
| 306 |
def test_perform_speech_synthesis_failure(self, service, mock_container):
|
| 307 |
"""Test speech synthesis failure"""
|
| 308 |
text = TextContent(text="Hola mundo", language="es")
|
| 309 |
+
voice = "chatterbox"
|
| 310 |
speed = 1.0
|
| 311 |
language = "es"
|
| 312 |
temp_dir = "/tmp/test"
|
|
|
|
| 379 |
|
| 380 |
# Verify expected values
|
| 381 |
assert 'whisper-small' in result['asr_models']
|
| 382 |
+
assert 'chatterbox' in result['voices']
|
| 383 |
assert 'en' in result['languages']
|
| 384 |
|
| 385 |
def test_cleanup(self, service):
|
tests/unit/application/services/test_configuration_service.py
CHANGED
|
@@ -29,7 +29,7 @@ class TestConfigurationApplicationService:
|
|
| 29 |
|
| 30 |
# Mock configuration methods
|
| 31 |
config.get_tts_config.return_value = {
|
| 32 |
-
'preferred_providers': ['
|
| 33 |
'default_speed': 1.0,
|
| 34 |
'default_language': 'en',
|
| 35 |
'enable_streaming': False,
|
|
@@ -128,7 +128,7 @@ class TestConfigurationApplicationService:
|
|
| 128 |
"""Test successful TTS configuration retrieval"""
|
| 129 |
result = service.get_tts_configuration()
|
| 130 |
|
| 131 |
-
assert result['preferred_providers'] == ['
|
| 132 |
assert result['default_speed'] == 1.0
|
| 133 |
mock_config.get_tts_config.assert_called_once()
|
| 134 |
|
|
@@ -303,7 +303,7 @@ class TestConfigurationApplicationService:
|
|
| 303 |
def test_validate_tts_updates_valid(self, service):
|
| 304 |
"""Test TTS updates validation with valid data"""
|
| 305 |
updates = {
|
| 306 |
-
'preferred_providers': ['
|
| 307 |
'default_speed': 1.5,
|
| 308 |
'default_language': 'es',
|
| 309 |
'enable_streaming': True,
|
|
|
|
| 29 |
|
| 30 |
# Mock configuration methods
|
| 31 |
config.get_tts_config.return_value = {
|
| 32 |
+
'preferred_providers': ['chatterbox'],
|
| 33 |
'default_speed': 1.0,
|
| 34 |
'default_language': 'en',
|
| 35 |
'enable_streaming': False,
|
|
|
|
| 128 |
"""Test successful TTS configuration retrieval"""
|
| 129 |
result = service.get_tts_configuration()
|
| 130 |
|
| 131 |
+
assert result['preferred_providers'] == ['chatterbox']
|
| 132 |
assert result['default_speed'] == 1.0
|
| 133 |
mock_config.get_tts_config.assert_called_once()
|
| 134 |
|
|
|
|
| 303 |
def test_validate_tts_updates_valid(self, service):
|
| 304 |
"""Test TTS updates validation with valid data"""
|
| 305 |
updates = {
|
| 306 |
+
'preferred_providers': ['chatterbox'],
|
| 307 |
'default_speed': 1.5,
|
| 308 |
'default_language': 'es',
|
| 309 |
'enable_streaming': True,
|
tests/unit/domain/interfaces/test_speech_synthesis.py
CHANGED
|
@@ -215,24 +215,24 @@ class TestISpeechSynthesisService:
|
|
| 215 |
|
| 216 |
class KokoroImplementation(ISpeechSynthesisService):
|
| 217 |
def synthesize(self, request):
|
| 218 |
-
return AudioContent(data=b"
|
| 219 |
|
| 220 |
def synthesize_stream(self, request):
|
| 221 |
-
yield AudioChunk(data=b"
|
| 222 |
|
| 223 |
class DiaImplementation(ISpeechSynthesisService):
|
| 224 |
def synthesize(self, request):
|
| 225 |
-
return AudioContent(data=b"
|
| 226 |
|
| 227 |
def synthesize_stream(self, request):
|
| 228 |
-
yield AudioChunk(data=b"
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
|
| 233 |
-
assert isinstance(
|
| 234 |
-
assert isinstance(
|
| 235 |
-
assert type(
|
| 236 |
|
| 237 |
def test_interface_methods_can_be_called_polymorphically(self):
|
| 238 |
"""Test that interface methods can be called polymorphically."""
|
|
|
|
| 215 |
|
| 216 |
class KokoroImplementation(ISpeechSynthesisService):
|
| 217 |
def synthesize(self, request):
|
| 218 |
+
return AudioContent(data=b"chatterbox_audio", format="wav", sample_rate=22050, duration=1.0)
|
| 219 |
|
| 220 |
def synthesize_stream(self, request):
|
| 221 |
+
yield AudioChunk(data=b"chatterbox_chunk", format="wav", sample_rate=22050, chunk_index=0, is_final=True)
|
| 222 |
|
| 223 |
class DiaImplementation(ISpeechSynthesisService):
|
| 224 |
def synthesize(self, request):
|
| 225 |
+
return AudioContent(data=b"chatterbox2_audio", format="wav", sample_rate=22050, duration=1.0)
|
| 226 |
|
| 227 |
def synthesize_stream(self, request):
|
| 228 |
+
yield AudioChunk(data=b"chatterbox2_chunk", format="wav", sample_rate=22050, chunk_index=0, is_final=True)
|
| 229 |
|
| 230 |
+
chatterbox1 = KokoroImplementation()
|
| 231 |
+
chatterbox2 = DiaImplementation()
|
| 232 |
|
| 233 |
+
assert isinstance(chatterbox1, ISpeechSynthesisService)
|
| 234 |
+
assert isinstance(chatterbox2, ISpeechSynthesisService)
|
| 235 |
+
assert type(chatterbox1) != type(chatterbox2)
|
| 236 |
|
| 237 |
def test_interface_methods_can_be_called_polymorphically(self):
|
| 238 |
"""Test that interface methods can be called polymorphically."""
|
tests/unit/infrastructure/factories/test_tts_provider_factory.py
CHANGED
|
@@ -41,31 +41,21 @@ class TestTTSProviderFactory:
|
|
| 41 |
"""Test factory initialization."""
|
| 42 |
assert isinstance(self.factory._providers, dict)
|
| 43 |
assert isinstance(self.factory._provider_instances, dict)
|
| 44 |
-
assert '
|
| 45 |
|
| 46 |
-
@patch('src.infrastructure.tts.provider_factory.
|
| 47 |
-
def
|
| 48 |
-
"""Test registration of
|
| 49 |
factory = TTSProviderFactory()
|
| 50 |
|
| 51 |
-
assert '
|
| 52 |
-
assert factory._providers['
|
| 53 |
|
| 54 |
-
@patch('src.infrastructure.tts.
|
| 55 |
-
def
|
| 56 |
-
"""Test
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
assert 'kokoro' in factory._providers
|
| 60 |
-
assert factory._providers['kokoro'] == mock_kokoro
|
| 61 |
-
|
| 62 |
-
@patch('src.infrastructure.tts.kokoro_provider.KokoroTTSProvider', side_effect=ImportError("Not available"))
|
| 63 |
-
def test_register_default_providers_kokoro_unavailable(self, mock_kokoro):
|
| 64 |
-
"""Test handling when Kokoro provider is not available."""
|
| 65 |
-
factory = TTSProviderFactory()
|
| 66 |
-
|
| 67 |
-
# Should not crash, just not register the provider
|
| 68 |
-
assert 'kokoro' not in factory._providers or factory._providers.get('kokoro') is None
|
| 69 |
|
| 70 |
@patch.object(TTSProviderFactory, '_providers', {'mock': MockTTSProvider})
|
| 71 |
def test_get_available_providers(self):
|
|
@@ -116,14 +106,11 @@ class TestTTSProviderFactory:
|
|
| 116 |
with pytest.raises(SpeechSynthesisException, match="Failed to create TTS provider mock"):
|
| 117 |
self.factory.create_provider('mock')
|
| 118 |
|
| 119 |
-
@patch.object(TTSProviderFactory, '_providers', {'
|
| 120 |
def test_create_provider_with_lang_code(self):
|
| 121 |
"""Test creating provider with language code."""
|
| 122 |
with patch.object(MockTTSProvider, 'is_available', return_value=True):
|
| 123 |
-
|
| 124 |
-
self.factory._providers['kokoro'] = MockTTSProvider
|
| 125 |
-
|
| 126 |
-
provider = self.factory.create_provider('kokoro', lang_code='en')
|
| 127 |
assert isinstance(provider, MockTTSProvider)
|
| 128 |
|
| 129 |
@patch.object(TTSProviderFactory, '_providers', {
|
|
|
|
| 41 |
"""Test factory initialization."""
|
| 42 |
assert isinstance(self.factory._providers, dict)
|
| 43 |
assert isinstance(self.factory._provider_instances, dict)
|
| 44 |
+
assert 'chatterbox' in self.factory._providers
|
| 45 |
|
| 46 |
+
@patch('src.infrastructure.tts.provider_factory.ChatterboxTTSProvider')
|
| 47 |
+
def test_register_default_providers_chatterbox(self, mock_chatterbox):
|
| 48 |
+
"""Test registration of chatterbox provider."""
|
| 49 |
factory = TTSProviderFactory()
|
| 50 |
|
| 51 |
+
assert 'chatterbox' in factory._providers
|
| 52 |
+
assert factory._providers['chatterbox'] == mock_chatterbox
|
| 53 |
|
| 54 |
+
@patch('src.infrastructure.tts.chatterbox_provider.ChatterboxTTSProvider', side_effect=ImportError("Not available"))
|
| 55 |
+
def test_register_default_providers_chatterbox_unavailable(self, mock_chatterbox):
|
| 56 |
+
"""Test handling when Chatterbox provider is not available."""
|
| 57 |
+
with pytest.raises(SpeechSynthesisException, match="No TTS providers available"):
|
| 58 |
+
TTSProviderFactory()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
@patch.object(TTSProviderFactory, '_providers', {'mock': MockTTSProvider})
|
| 61 |
def test_get_available_providers(self):
|
|
|
|
| 106 |
with pytest.raises(SpeechSynthesisException, match="Failed to create TTS provider mock"):
|
| 107 |
self.factory.create_provider('mock')
|
| 108 |
|
| 109 |
+
@patch.object(TTSProviderFactory, '_providers', {'chatterbox': MockTTSProvider})
|
| 110 |
def test_create_provider_with_lang_code(self):
|
| 111 |
"""Test creating provider with language code."""
|
| 112 |
with patch.object(MockTTSProvider, 'is_available', return_value=True):
|
| 113 |
+
provider = self.factory.create_provider('chatterbox', lang_code='en')
|
|
|
|
|
|
|
|
|
|
| 114 |
assert isinstance(provider, MockTTSProvider)
|
| 115 |
|
| 116 |
@patch.object(TTSProviderFactory, '_providers', {
|