Michael Hu commited on
Commit
237cb26
·
1 Parent(s): 1e2d288

feat: replace legacy TTS providers with Chatterbox as the single, default provider

Browse files
DEVELOPER_GUIDE.md CHANGED
@@ -187,7 +187,7 @@ class AppConfig:
187
  # ... existing configuration ...
188
 
189
  # TTS Provider Configuration
190
- TTS_PROVIDERS = os.getenv('TTS_PROVIDERS', 'kokoro,dia,cosyvoice2,my_tts,dummy').split(',')
191
 
192
  # Provider-specific settings
193
  MY_TTS_API_KEY = os.getenv('MY_TTS_API_KEY')
 
187
  # ... existing configuration ...
188
 
189
  # TTS Provider Configuration
190
+ TTS_PROVIDERS = os.getenv('TTS_PROVIDERS', 'chatterbox,my_tts').split(',')
191
 
192
  # Provider-specific settings
193
  MY_TTS_API_KEY = os.getenv('MY_TTS_API_KEY')
README.md CHANGED
@@ -95,10 +95,7 @@ graph TD
95
  - **NLLB** - Meta's No Language Left Behind model
96
 
97
  ### Text-to-Speech (TTS)
98
- - **Kokoro** - High-quality neural TTS
99
- - **Dia** - Fast neural TTS
100
- - **CosyVoice2** - Advanced voice synthesis
101
- - **Dummy** - Test provider for development
102
 
103
  ## 📖 Usage
104
 
@@ -135,7 +132,7 @@ request = ProcessingRequestDto(
135
  audio=audio_upload,
136
  asr_model="whisper-small",
137
  target_language="zh",
138
- voice="kokoro",
139
  speed=1.0
140
  )
141
 
@@ -179,7 +176,7 @@ Create a `.env` file or set environment variables:
179
 
180
  ```bash
181
  # Provider preferences (comma-separated, in order of preference)
182
- TTS_PROVIDERS=kokoro,dia,cosyvoice2,dummy
183
  STT_PROVIDERS=whisper,parakeet
184
  TRANSLATION_PROVIDERS=nllb
185
 
@@ -201,7 +198,7 @@ The system automatically detects available providers and falls back gracefully:
201
  from src.infrastructure.config.dependency_container import DependencyContainer
202
 
203
  container = DependencyContainer()
204
- container.configure_tts_providers(['kokoro', 'dummy']) # Preferred order
205
  ```
206
 
207
  ## 🏗️ Architecture Benefits
 
95
  - **NLLB** - Meta's No Language Left Behind model
96
 
97
  ### Text-to-Speech (TTS)
98
+ - **Chatterbox** - High-quality neural TTS provider
 
 
 
99
 
100
  ## 📖 Usage
101
 
 
132
  audio=audio_upload,
133
  asr_model="whisper-small",
134
  target_language="zh",
135
+ voice="chatterbox",
136
  speed=1.0
137
  )
138
 
 
176
 
177
  ```bash
178
  # Provider preferences (comma-separated, in order of preference)
179
+ TTS_PROVIDERS=chatterbox
180
  STT_PROVIDERS=whisper,parakeet
181
  TRANSLATION_PROVIDERS=nllb
182
 
 
198
  from src.infrastructure.config.dependency_container import DependencyContainer
199
 
200
  container = DependencyContainer()
201
+ container.configure_tts_providers(['chatterbox']) # Preferred order
202
  ```
203
 
204
  ## 🏗️ Architecture Benefits
app.py CHANGED
@@ -110,7 +110,7 @@ def get_supported_configurations() -> dict:
110
  # Return fallback configurations
111
  return {
112
  'asr_models': ['whisper-small', 'parakeet'],
113
- 'voices': ['kokoro', 'dia', 'cosyvoice2', 'dummy'],
114
  'languages': ['en', 'zh', 'es', 'fr', 'de'],
115
  'audio_formats': ['wav', 'mp3'],
116
  'max_file_size_mb': 100,
 
110
  # Return fallback configurations
111
  return {
112
  'asr_models': ['whisper-small', 'parakeet'],
113
+ 'voices': ['chatterbox'],
114
  'languages': ['en', 'zh', 'es', 'fr', 'de'],
115
  'audio_formats': ['wav', 'mp3'],
116
  'max_file_size_mb': 100,
config.example.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "tts": {
3
- "preferred_providers": ["kokoro", "dia", "cosyvoice2", "dummy"],
4
  "default_voice": "default",
5
  "default_speed": 1.0,
6
  "default_language": "en",
 
1
  {
2
  "tts": {
3
+ "preferred_providers": ["chatterbox"],
4
  "default_voice": "default",
5
  "default_speed": 1.0,
6
  "default_language": "en",
src/application/dtos/processing_request_dto.py CHANGED
@@ -57,7 +57,7 @@ class ProcessingRequestDto:
57
  raise ValueError("Voice cannot be empty")
58
 
59
  # Validate voice options
60
- supported_voices = ['kokoro', 'dia', 'cosyvoice2', 'dummy']
61
  if self.voice not in supported_voices:
62
  raise ValueError(f"Unsupported voice: {self.voice}. Supported: {supported_voices}")
63
 
 
57
  raise ValueError("Voice cannot be empty")
58
 
59
  # Validate voice options
60
+ supported_voices = ['chatterbox']
61
  if self.voice not in supported_voices:
62
  raise ValueError(f"Unsupported voice: {self.voice}. Supported: {supported_voices}")
63
 
src/application/services/configuration_service.py CHANGED
@@ -294,7 +294,7 @@ class ConfigurationApplicationService:
294
  Raises:
295
  ConfigurationException: If validation fails
296
  """
297
- valid_providers = ['chatterbox', 'dummy']
298
  valid_languages = ['en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh']
299
 
300
  for key, value in updates.items():
@@ -515,7 +515,7 @@ class ConfigurationApplicationService:
515
 
516
  # Check TTS providers
517
  tts_factory = self._container.resolve(type(self._container._get_tts_factory()))
518
- for provider in ['chatterbox', 'dummy']:
519
  try:
520
  tts_factory.create_provider(provider)
521
  availability['tts'][provider] = True
 
294
  Raises:
295
  ConfigurationException: If validation fails
296
  """
297
+ valid_providers = ['chatterbox']
298
  valid_languages = ['en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh']
299
 
300
  for key, value in updates.items():
 
515
 
516
  # Check TTS providers
517
  tts_factory = self._container.resolve(type(self._container._get_tts_factory()))
518
+ for provider in ['chatterbox']:
519
  try:
520
  tts_factory.create_provider(provider)
521
  availability['tts'][provider] = True
src/domain/interfaces/audio_processing.py CHANGED
@@ -111,7 +111,7 @@ class IAudioProcessingService(ABC):
111
 
112
  # Configure voice settings
113
  voice_settings = VoiceSettings(
114
- voice_id="kokoro",
115
  speed=1.0,
116
  language="zh"
117
  )
 
111
 
112
  # Configure voice settings
113
  voice_settings = VoiceSettings(
114
+ voice_id="chatterbox",
115
  speed=1.0,
116
  language="zh"
117
  )
src/domain/interfaces/speech_synthesis.py CHANGED
@@ -98,7 +98,7 @@ class ISpeechSynthesisService(ABC):
98
 
99
  # Configure voice settings
100
  voice_settings = VoiceSettings(
101
- voice_id="kokoro",
102
  speed=1.0,
103
  pitch=0.0,
104
  volume=1.0
 
98
 
99
  # Configure voice settings
100
  voice_settings = VoiceSettings(
101
+ voice_id="chatterbox",
102
  speed=1.0,
103
  pitch=0.0,
104
  volume=1.0
src/infrastructure/config/app_config.py CHANGED
@@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
12
  @dataclass
13
  class TTSConfig:
14
  """Configuration for TTS providers."""
15
- preferred_providers: List[str] = field(default_factory=lambda: ['chatterbox', 'dummy'])
16
  default_voice: str = 'default'
17
  default_speed: float = 1.0
18
  default_language: str = 'en'
 
12
  @dataclass
13
  class TTSConfig:
14
  """Configuration for TTS providers."""
15
+ preferred_providers: List[str] = field(default_factory=lambda: ['chatterbox'])
16
  default_voice: str = 'default'
17
  default_speed: float = 1.0
18
  default_language: str = 'en'
src/infrastructure/tts/__init__.py CHANGED
@@ -1,24 +1,8 @@
1
  """TTS provider implementations."""
2
 
3
  from .provider_factory import TTSProviderFactory
4
- from .dummy_provider import DummyTTSProvider
5
-
6
- # Try to import optional providers
7
- try:
8
- from .kokoro_provider import KokoroTTSProvider
9
- except ImportError:
10
- KokoroTTSProvider = None
11
-
12
- try:
13
- from .dia_provider import DiaTTSProvider
14
- except ImportError:
15
- DiaTTSProvider = None
16
-
17
- try:
18
- from .cosyvoice2_provider import CosyVoice2TTSProvider
19
- except ImportError:
20
- CosyVoice2TTSProvider = None
21
 
 
22
  try:
23
  from .chatterbox_provider import ChatterboxTTSProvider
24
  except ImportError:
@@ -26,9 +10,5 @@ except ImportError:
26
 
27
  __all__ = [
28
  'TTSProviderFactory',
29
- 'DummyTTSProvider',
30
- 'KokoroTTSProvider',
31
- 'DiaTTSProvider',
32
- 'CosyVoice2TTSProvider',
33
  'ChatterboxTTSProvider'
34
  ]
 
1
  """TTS provider implementations."""
2
 
3
  from .provider_factory import TTSProviderFactory
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # Try to import chatterbox provider
6
  try:
7
  from .chatterbox_provider import ChatterboxTTSProvider
8
  except ImportError:
 
10
 
11
  __all__ = [
12
  'TTSProviderFactory',
 
 
 
 
13
  'ChatterboxTTSProvider'
14
  ]
src/infrastructure/tts/dummy_provider.py DELETED
@@ -1,139 +0,0 @@
1
- """Dummy TTS provider implementation for testing and fallback."""
2
-
3
- import logging
4
- import numpy as np
5
- import soundfile as sf
6
- import io
7
- from typing import Iterator, TYPE_CHECKING
8
-
9
- if TYPE_CHECKING:
10
- from ...domain.models.speech_synthesis_request import SpeechSynthesisRequest
11
-
12
- from ..base.tts_provider_base import TTSProviderBase
13
- from ...domain.exceptions import SpeechSynthesisException
14
-
15
- logger = logging.getLogger(__name__)
16
-
17
-
18
- class DummyTTSProvider(TTSProviderBase):
19
- """Dummy TTS provider that generates sine wave audio for testing."""
20
-
21
- def __init__(self):
22
- """Initialize the Dummy TTS provider."""
23
- super().__init__(
24
- provider_name="Dummy",
25
- supported_languages=['en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh']
26
- )
27
-
28
- def is_available(self) -> bool:
29
- """Dummy TTS is always available."""
30
- return True
31
-
32
- def get_available_voices(self) -> list[str]:
33
- """Get available voices for Dummy TTS."""
34
- return ['default', 'male', 'female', 'robot']
35
-
36
- def _generate_audio(self, request: 'SpeechSynthesisRequest') -> tuple[bytes, int]:
37
- """Generate dummy sine wave audio."""
38
- try:
39
- # Extract parameters from request
40
- text = request.text_content.text
41
- speed = request.voice_settings.speed
42
-
43
- # Generate a simple sine wave based on text length and speed
44
- sample_rate = 24000
45
- # Rough approximation of speech duration adjusted by speed
46
- duration = min(len(text) / (20 * speed), 10)
47
-
48
- # Create time array
49
- t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
50
-
51
- # Generate sine wave (440 Hz base frequency)
52
- frequency = 440
53
- audio = 0.5 * np.sin(2 * np.pi * frequency * t)
54
-
55
- # Add some variation based on voice setting
56
- voice = request.voice_settings.voice_id
57
- if voice == 'male':
58
- # Lower frequency for male voice
59
- audio = 0.5 * np.sin(2 * np.pi * 220 * t)
60
- elif voice == 'female':
61
- # Higher frequency for female voice
62
- audio = 0.5 * np.sin(2 * np.pi * 660 * t)
63
- elif voice == 'robot':
64
- # Square wave for robot voice
65
- audio = 0.5 * np.sign(np.sin(2 * np.pi * 440 * t))
66
-
67
- # Convert to bytes
68
- audio_bytes = self._numpy_to_bytes(audio, sample_rate)
69
-
70
- logger.info(f"Generated dummy audio: duration={duration:.2f}s, voice={voice}")
71
- return audio_bytes, sample_rate
72
-
73
- except Exception as e:
74
- self._handle_provider_error(e, "dummy audio generation")
75
-
76
- def _generate_audio_stream(self, request: 'SpeechSynthesisRequest') -> Iterator[tuple[bytes, int, bool]]:
77
- """Generate dummy sine wave audio stream."""
78
- try:
79
- # Extract parameters from request
80
- text = request.text_content.text
81
- speed = request.voice_settings.speed
82
-
83
- # Generate audio in chunks
84
- sample_rate = 24000
85
- chunk_duration = 1.0 # 1 second chunks
86
- total_duration = min(len(text) / (20 * speed), 10)
87
-
88
- chunks_count = int(np.ceil(total_duration / chunk_duration))
89
-
90
- for chunk_idx in range(chunks_count):
91
- start_time = chunk_idx * chunk_duration
92
- end_time = min((chunk_idx + 1) * chunk_duration, total_duration)
93
- actual_duration = end_time - start_time
94
-
95
- if actual_duration <= 0:
96
- break
97
-
98
- # Create time array for this chunk
99
- t = np.linspace(0, actual_duration, int(sample_rate * actual_duration), endpoint=False)
100
-
101
- # Generate sine wave
102
- frequency = 440
103
- audio = 0.5 * np.sin(2 * np.pi * frequency * t)
104
-
105
- # Apply voice variations
106
- voice = request.voice_settings.voice_id
107
- if voice == 'male':
108
- audio = 0.5 * np.sin(2 * np.pi * 220 * t)
109
- elif voice == 'female':
110
- audio = 0.5 * np.sin(2 * np.pi * 660 * t)
111
- elif voice == 'robot':
112
- audio = 0.5 * np.sign(np.sin(2 * np.pi * 440 * t))
113
-
114
- # Convert to bytes
115
- audio_bytes = self._numpy_to_bytes(audio, sample_rate)
116
-
117
- # Check if this is the final chunk
118
- is_final = (chunk_idx == chunks_count - 1)
119
-
120
- yield audio_bytes, sample_rate, is_final
121
-
122
- except Exception as e:
123
- self._handle_provider_error(e, "dummy streaming audio generation")
124
-
125
- def _numpy_to_bytes(self, audio_array: np.ndarray, sample_rate: int) -> bytes:
126
- """Convert numpy audio array to bytes."""
127
- try:
128
- # Create an in-memory buffer
129
- buffer = io.BytesIO()
130
-
131
- # Write audio data to buffer as WAV
132
- sf.write(buffer, audio_array, sample_rate, format='WAV')
133
-
134
- # Get bytes from buffer
135
- buffer.seek(0)
136
- return buffer.read()
137
-
138
- except Exception as e:
139
- raise SpeechSynthesisException(f"Failed to convert audio to bytes: {str(e)}") from e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/infrastructure/tts/provider_factory.py CHANGED
@@ -18,20 +18,17 @@ class TTSProviderFactory:
18
  self._register_default_providers()
19
 
20
  def _register_default_providers(self):
21
- """Register all available TTS providers."""
22
  # Import providers dynamically to avoid import errors if dependencies are missing
23
 
24
- # Always register dummy provider as fallback
25
- from .dummy_provider import DummyTTSProvider
26
- self._providers['dummy'] = DummyTTSProvider
27
-
28
  # Register only Chatterbox provider
29
  try:
30
  from .chatterbox_provider import ChatterboxTTSProvider
31
  self._providers['chatterbox'] = ChatterboxTTSProvider
32
  logger.info("Registered Chatterbox TTS provider")
33
  except ImportError as e:
34
- logger.info(f"Chatterbox TTS provider not available: {e}")
 
35
 
36
  def get_available_providers(self) -> List[str]:
37
  """Get list of available TTS providers."""
@@ -44,10 +41,7 @@ class TTSProviderFactory:
44
  # Create instance if not cached
45
  if name not in self._provider_instances:
46
  logger.info(f"Creating instance for {name} provider")
47
- if name == 'chatterbox':
48
- self._provider_instances[name] = provider_class()
49
- else:
50
- self._provider_instances[name] = provider_class()
51
 
52
  # Check if provider is available
53
  logger.info(f"Checking availability for {name}")
@@ -94,11 +88,8 @@ class TTSProviderFactory:
94
  provider_class = self._providers[provider_name]
95
 
96
  # Create instance with appropriate parameters
97
- if provider_name == 'chatterbox':
98
- lang_code = kwargs.get('lang_code', 'en')
99
- provider = provider_class(lang_code=lang_code)
100
- else:
101
- provider = provider_class(**kwargs)
102
 
103
  # Verify the provider is available
104
  if not provider.is_available():
@@ -126,7 +117,7 @@ class TTSProviderFactory:
126
  SpeechSynthesisException: If no providers are available
127
  """
128
  if preferred_providers is None:
129
- preferred_providers = ['chatterbox', 'dummy']
130
 
131
  logger.info(f"🔄 Getting TTS provider with fallback, preferred order: {preferred_providers}")
132
  available_providers = self.get_available_providers()
@@ -174,10 +165,7 @@ class TTSProviderFactory:
174
  # Create instance if not cached
175
  if provider_name not in self._provider_instances:
176
  provider_class = self._providers[provider_name]
177
- if provider_name == 'chatterbox':
178
- self._provider_instances[provider_name] = provider_class()
179
- else:
180
- self._provider_instances[provider_name] = provider_class()
181
 
182
  provider = self._provider_instances[provider_name]
183
 
 
18
  self._register_default_providers()
19
 
20
  def _register_default_providers(self):
21
+ """Register available TTS providers."""
22
  # Import providers dynamically to avoid import errors if dependencies are missing
23
 
 
 
 
 
24
  # Register only Chatterbox provider
25
  try:
26
  from .chatterbox_provider import ChatterboxTTSProvider
27
  self._providers['chatterbox'] = ChatterboxTTSProvider
28
  logger.info("Registered Chatterbox TTS provider")
29
  except ImportError as e:
30
+ logger.warning(f"Chatterbox TTS provider not available: {e}")
31
+ raise SpeechSynthesisException("No TTS providers available - Chatterbox is required") from e
32
 
33
  def get_available_providers(self) -> List[str]:
34
  """Get list of available TTS providers."""
 
41
  # Create instance if not cached
42
  if name not in self._provider_instances:
43
  logger.info(f"Creating instance for {name} provider")
44
+ self._provider_instances[name] = provider_class()
 
 
 
45
 
46
  # Check if provider is available
47
  logger.info(f"Checking availability for {name}")
 
88
  provider_class = self._providers[provider_name]
89
 
90
  # Create instance with appropriate parameters
91
+ lang_code = kwargs.get('lang_code', 'en')
92
+ provider = provider_class(lang_code=lang_code)
 
 
 
93
 
94
  # Verify the provider is available
95
  if not provider.is_available():
 
117
  SpeechSynthesisException: If no providers are available
118
  """
119
  if preferred_providers is None:
120
+ preferred_providers = ['chatterbox']
121
 
122
  logger.info(f"🔄 Getting TTS provider with fallback, preferred order: {preferred_providers}")
123
  available_providers = self.get_available_providers()
 
165
  # Create instance if not cached
166
  if provider_name not in self._provider_instances:
167
  provider_class = self._providers[provider_name]
168
+ self._provider_instances[provider_name] = provider_class()
 
 
 
169
 
170
  provider = self._provider_instances[provider_name]
171
 
tests/integration/test_audio_processing_pipeline.py CHANGED
@@ -61,7 +61,7 @@ class TestAudioProcessingPipeline:
61
 
62
  # TTS configuration
63
  config.get_tts_config.return_value = {
64
- 'preferred_providers': ['kokoro', 'dia', 'cosyvoice2', 'dummy']
65
  }
66
 
67
  return config
@@ -124,7 +124,7 @@ class TestAudioProcessingPipeline:
124
  asr_model="whisper-small",
125
  target_language="es",
126
  source_language="en",
127
- voice="kokoro",
128
  speed=1.0,
129
  requires_translation=True
130
  )
@@ -152,7 +152,7 @@ class TestAudioProcessingPipeline:
152
  asr_model="whisper-small",
153
  target_language="en",
154
  source_language="en",
155
- voice="kokoro",
156
  speed=1.0,
157
  requires_translation=False
158
  )
@@ -171,7 +171,7 @@ class TestAudioProcessingPipeline:
171
  asr_model="whisper-medium",
172
  target_language="fr",
173
  source_language="en",
174
- voice="dia",
175
  speed=1.5,
176
  requires_translation=True
177
  )
@@ -179,7 +179,7 @@ class TestAudioProcessingPipeline:
179
  result = audio_service.process_audio_pipeline(request)
180
 
181
  assert result.success is True
182
- assert result.metadata['voice'] == "dia"
183
  assert result.metadata['speed'] == 1.5
184
  assert result.metadata['asr_model'] == "whisper-medium"
185
 
@@ -208,7 +208,7 @@ class TestAudioProcessingPipeline:
208
  audio=large_audio,
209
  asr_model="whisper-small",
210
  target_language="es",
211
- voice="kokoro",
212
  speed=1.0,
213
  requires_translation=True
214
  )
@@ -296,7 +296,7 @@ class TestAudioProcessingPipeline:
296
  audio=sample_audio_upload,
297
  asr_model="whisper-small",
298
  target_language="es",
299
- voice="kokoro",
300
  speed=1.0,
301
  requires_translation=True
302
  )
 
61
 
62
  # TTS configuration
63
  config.get_tts_config.return_value = {
64
+ 'preferred_providers': ['chatterbox']
65
  }
66
 
67
  return config
 
124
  asr_model="whisper-small",
125
  target_language="es",
126
  source_language="en",
127
+ voice="chatterbox",
128
  speed=1.0,
129
  requires_translation=True
130
  )
 
152
  asr_model="whisper-small",
153
  target_language="en",
154
  source_language="en",
155
+ voice="chatterbox",
156
  speed=1.0,
157
  requires_translation=False
158
  )
 
171
  asr_model="whisper-medium",
172
  target_language="fr",
173
  source_language="en",
174
+ voice="chatterbox",
175
  speed=1.5,
176
  requires_translation=True
177
  )
 
179
  result = audio_service.process_audio_pipeline(request)
180
 
181
  assert result.success is True
182
+ assert result.metadata['voice'] == "chatterbox"
183
  assert result.metadata['speed'] == 1.5
184
  assert result.metadata['asr_model'] == "whisper-medium"
185
 
 
208
  audio=large_audio,
209
  asr_model="whisper-small",
210
  target_language="es",
211
+ voice="chatterbox",
212
  speed=1.0,
213
  requires_translation=True
214
  )
 
296
  audio=sample_audio_upload,
297
  asr_model="whisper-small",
298
  target_language="es",
299
+ voice="chatterbox",
300
  speed=1.0,
301
  requires_translation=True
302
  )
tests/integration/test_file_handling.py CHANGED
@@ -57,7 +57,7 @@ class TestFileHandling:
57
 
58
  # TTS configuration
59
  config.get_tts_config.return_value = {
60
- 'preferred_providers': ['dummy']
61
  }
62
 
63
  return config
@@ -139,7 +139,7 @@ class TestFileHandling:
139
  audio=audio_upload,
140
  asr_model="whisper-small",
141
  target_language="es",
142
- voice="dummy",
143
  speed=1.0,
144
  requires_translation=True
145
  )
@@ -170,7 +170,7 @@ class TestFileHandling:
170
  audio=audio_upload,
171
  asr_model="whisper-small",
172
  target_language="en",
173
- voice="dummy",
174
  speed=1.0,
175
  requires_translation=False
176
  )
@@ -194,7 +194,7 @@ class TestFileHandling:
194
  audio=audio_upload,
195
  asr_model="whisper-small",
196
  target_language="es",
197
- voice="dummy",
198
  speed=1.0,
199
  requires_translation=True
200
  )
@@ -226,7 +226,7 @@ class TestFileHandling:
226
  audio=audio_upload,
227
  asr_model="whisper-small",
228
  target_language="es",
229
- voice="dummy",
230
  speed=1.0,
231
  requires_translation=True
232
  )
@@ -262,7 +262,7 @@ class TestFileHandling:
262
  audio=audio_upload,
263
  asr_model="whisper-small",
264
  target_language="es",
265
- voice="dummy",
266
  speed=1.0,
267
  requires_translation=True
268
  )
@@ -294,7 +294,7 @@ class TestFileHandling:
294
  audio=audio_upload,
295
  asr_model="whisper-small",
296
  target_language="es",
297
- voice="dummy",
298
  speed=1.0,
299
  requires_translation=True
300
  )
@@ -325,7 +325,7 @@ class TestFileHandling:
325
  audio=audio_upload,
326
  asr_model="whisper-small",
327
  target_language="es",
328
- voice="dummy",
329
  speed=1.0,
330
  requires_translation=True
331
  )
@@ -373,7 +373,7 @@ class TestFileHandling:
373
  audio=audio_upload,
374
  asr_model="whisper-small",
375
  target_language="es",
376
- voice="dummy",
377
  speed=1.0,
378
  requires_translation=True
379
  )
@@ -405,7 +405,7 @@ class TestFileHandling:
405
  audio=audio_upload,
406
  asr_model="whisper-small",
407
  target_language="es",
408
- voice="dummy",
409
  speed=1.0,
410
  requires_translation=True
411
  )
@@ -435,7 +435,7 @@ class TestFileHandling:
435
  audio=audio_upload,
436
  asr_model="whisper-small",
437
  target_language="es",
438
- voice="dummy",
439
  speed=1.0,
440
  requires_translation=True
441
  )
@@ -474,7 +474,7 @@ class TestFileHandling:
474
  audio=audio_upload,
475
  asr_model="whisper-small",
476
  target_language="es",
477
- voice="dummy",
478
  speed=1.0,
479
  requires_translation=True
480
  )
@@ -501,7 +501,7 @@ class TestFileHandling:
501
  audio=audio_upload,
502
  asr_model="whisper-small",
503
  target_language="es",
504
- voice="dummy",
505
  speed=1.0,
506
  requires_translation=True
507
  )
@@ -533,7 +533,7 @@ class TestFileHandling:
533
  audio=audio_upload,
534
  asr_model="whisper-small",
535
  target_language="es",
536
- voice="dummy",
537
  speed=1.0,
538
  requires_translation=True
539
  )
@@ -561,7 +561,7 @@ class TestFileHandling:
561
  audio=audio_upload,
562
  asr_model="whisper-small",
563
  target_language="es",
564
- voice="dummy",
565
  speed=1.0,
566
  requires_translation=True
567
  )
 
57
 
58
  # TTS configuration
59
  config.get_tts_config.return_value = {
60
+ 'preferred_providers': ['chatterbox']
61
  }
62
 
63
  return config
 
139
  audio=audio_upload,
140
  asr_model="whisper-small",
141
  target_language="es",
142
+ voice="chatterbox",
143
  speed=1.0,
144
  requires_translation=True
145
  )
 
170
  audio=audio_upload,
171
  asr_model="whisper-small",
172
  target_language="en",
173
+ voice="chatterbox",
174
  speed=1.0,
175
  requires_translation=False
176
  )
 
194
  audio=audio_upload,
195
  asr_model="whisper-small",
196
  target_language="es",
197
+ voice="chatterbox",
198
  speed=1.0,
199
  requires_translation=True
200
  )
 
226
  audio=audio_upload,
227
  asr_model="whisper-small",
228
  target_language="es",
229
+ voice="chatterbox",
230
  speed=1.0,
231
  requires_translation=True
232
  )
 
262
  audio=audio_upload,
263
  asr_model="whisper-small",
264
  target_language="es",
265
+ voice="chatterbox",
266
  speed=1.0,
267
  requires_translation=True
268
  )
 
294
  audio=audio_upload,
295
  asr_model="whisper-small",
296
  target_language="es",
297
+ voice="chatterbox",
298
  speed=1.0,
299
  requires_translation=True
300
  )
 
325
  audio=audio_upload,
326
  asr_model="whisper-small",
327
  target_language="es",
328
+ voice="chatterbox",
329
  speed=1.0,
330
  requires_translation=True
331
  )
 
373
  audio=audio_upload,
374
  asr_model="whisper-small",
375
  target_language="es",
376
+ voice="chatterbox",
377
  speed=1.0,
378
  requires_translation=True
379
  )
 
405
  audio=audio_upload,
406
  asr_model="whisper-small",
407
  target_language="es",
408
+ voice="chatterbox",
409
  speed=1.0,
410
  requires_translation=True
411
  )
 
435
  audio=audio_upload,
436
  asr_model="whisper-small",
437
  target_language="es",
438
+ voice="chatterbox",
439
  speed=1.0,
440
  requires_translation=True
441
  )
 
474
  audio=audio_upload,
475
  asr_model="whisper-small",
476
  target_language="es",
477
+ voice="chatterbox",
478
  speed=1.0,
479
  requires_translation=True
480
  )
 
501
  audio=audio_upload,
502
  asr_model="whisper-small",
503
  target_language="es",
504
+ voice="chatterbox",
505
  speed=1.0,
506
  requires_translation=True
507
  )
 
533
  audio=audio_upload,
534
  asr_model="whisper-small",
535
  target_language="es",
536
+ voice="chatterbox",
537
  speed=1.0,
538
  requires_translation=True
539
  )
 
561
  audio=audio_upload,
562
  asr_model="whisper-small",
563
  target_language="es",
564
+ voice="chatterbox",
565
  speed=1.0,
566
  requires_translation=True
567
  )
tests/integration/test_performance_and_errors.py CHANGED
@@ -61,7 +61,7 @@ class TestPerformanceAndErrors:
61
 
62
  # TTS configuration
63
  config.get_tts_config.return_value = {
64
- 'preferred_providers': ['kokoro', 'dia', 'cosyvoice2', 'dummy'],
65
  'provider_timeout': 30.0,
66
  'max_retries': 3
67
  }
@@ -133,7 +133,7 @@ class TestPerformanceAndErrors:
133
  audio=audio_upload,
134
  asr_model="whisper-small",
135
  target_language="es",
136
- voice="kokoro",
137
  speed=1.0,
138
  requires_translation=True
139
  )
@@ -243,7 +243,7 @@ class TestPerformanceAndErrors:
243
  audio=audio_upload,
244
  asr_model="whisper-small",
245
  target_language="es",
246
- voice="kokoro",
247
  speed=1.0,
248
  requires_translation=True
249
  )
@@ -359,7 +359,7 @@ class TestPerformanceAndErrors:
359
  audio=invalid_audio,
360
  asr_model="whisper-small",
361
  target_language="es",
362
- voice="kokoro",
363
  speed=1.0,
364
  requires_translation=True
365
  )
@@ -389,7 +389,7 @@ class TestPerformanceAndErrors:
389
  audio=oversized_audio,
390
  asr_model="whisper-small",
391
  target_language="es",
392
- voice="kokoro",
393
  speed=1.0,
394
  requires_translation=True
395
  )
@@ -413,7 +413,7 @@ class TestPerformanceAndErrors:
413
  audio=corrupted_audio,
414
  asr_model="whisper-small",
415
  target_language="es",
416
- voice="kokoro",
417
  speed=1.0,
418
  requires_translation=True
419
  )
 
61
 
62
  # TTS configuration
63
  config.get_tts_config.return_value = {
64
+ 'preferred_providers': ['chatterbox'],
65
  'provider_timeout': 30.0,
66
  'max_retries': 3
67
  }
 
133
  audio=audio_upload,
134
  asr_model="whisper-small",
135
  target_language="es",
136
+ voice="chatterbox",
137
  speed=1.0,
138
  requires_translation=True
139
  )
 
243
  audio=audio_upload,
244
  asr_model="whisper-small",
245
  target_language="es",
246
+ voice="chatterbox",
247
  speed=1.0,
248
  requires_translation=True
249
  )
 
359
  audio=invalid_audio,
360
  asr_model="whisper-small",
361
  target_language="es",
362
+ voice="chatterbox",
363
  speed=1.0,
364
  requires_translation=True
365
  )
 
389
  audio=oversized_audio,
390
  asr_model="whisper-small",
391
  target_language="es",
392
+ voice="chatterbox",
393
  speed=1.0,
394
  requires_translation=True
395
  )
 
413
  audio=corrupted_audio,
414
  asr_model="whisper-small",
415
  target_language="es",
416
+ voice="chatterbox",
417
  speed=1.0,
418
  requires_translation=True
419
  )
tests/integration/test_provider_integration.py CHANGED
@@ -31,7 +31,7 @@ class TestProviderIntegration:
31
  config = Mock(spec=AppConfig)
32
 
33
  # TTS configuration
34
- config.tts.preferred_providers = ['kokoro', 'dia', 'cosyvoice2', 'dummy']
35
  config.tts.fallback_enabled = True
36
  config.tts.provider_timeout = 30.0
37
 
@@ -85,7 +85,7 @@ class TestProviderIntegration:
85
  )
86
 
87
  # Test each TTS provider
88
- providers_to_test = ['kokoro', 'dia', 'cosyvoice2', 'dummy']
89
 
90
  for provider_name in providers_to_test:
91
  with patch(f'src.infrastructure.tts.{provider_name}_provider') as mock_provider_module:
@@ -247,7 +247,7 @@ class TestProviderIntegration:
247
  def test_provider_configuration_loading(self, dependency_container, mock_config):
248
  """Test provider configuration loading and validation."""
249
  # Test TTS configuration
250
- tts_provider = dependency_container.get_tts_provider('dummy')
251
  assert tts_provider is not None
252
 
253
  # Test STT configuration
@@ -310,7 +310,7 @@ class TestProviderIntegration:
310
 
311
  # Measure performance
312
  start_time = time.time()
313
- provider = dependency_container.get_tts_provider('dummy')
314
  result = provider.synthesize(synthesis_request)
315
  end_time = time.time()
316
 
@@ -322,7 +322,7 @@ class TestProviderIntegration:
322
  def test_provider_resource_cleanup(self, dependency_container):
323
  """Test provider resource cleanup."""
324
  # Get multiple providers
325
- tts_provider = dependency_container.get_tts_provider('dummy')
326
  stt_provider = dependency_container.get_stt_provider('whisper-small')
327
  translation_provider = dependency_container.get_translation_provider()
328
 
@@ -356,7 +356,7 @@ class TestProviderIntegration:
356
 
357
  def synthesize_audio():
358
  try:
359
- provider = dependency_container.get_tts_provider('dummy')
360
  with patch.object(provider, 'synthesize') as mock_synthesize:
361
  mock_synthesize.return_value = AudioContent(
362
  data=b"concurrent_audio_data",
@@ -396,15 +396,15 @@ class TestProviderIntegration:
396
  """Test dynamic provider configuration updates."""
397
  # Initial configuration
398
  initial_providers = mock_config.tts.preferred_providers
399
- assert 'kokoro' in initial_providers
400
 
401
  # Update configuration
402
- mock_config.tts.preferred_providers = ['dia', 'dummy']
403
 
404
  # Verify configuration update affects provider selection
405
  # (This would require actual implementation of dynamic config updates)
406
  updated_providers = mock_config.tts.preferred_providers
407
- assert 'dia' in updated_providers
408
  assert 'dummy' in updated_providers
409
 
410
  def test_provider_health_checking(self, dependency_container):
 
31
  config = Mock(spec=AppConfig)
32
 
33
  # TTS configuration
34
+ config.tts.preferred_providers = ['chatterbox']
35
  config.tts.fallback_enabled = True
36
  config.tts.provider_timeout = 30.0
37
 
 
85
  )
86
 
87
  # Test each TTS provider
88
+ providers_to_test = ['chatterbox']
89
 
90
  for provider_name in providers_to_test:
91
  with patch(f'src.infrastructure.tts.{provider_name}_provider') as mock_provider_module:
 
247
  def test_provider_configuration_loading(self, dependency_container, mock_config):
248
  """Test provider configuration loading and validation."""
249
  # Test TTS configuration
250
+ tts_provider = dependency_container.get_tts_provider('chatterbox')
251
  assert tts_provider is not None
252
 
253
  # Test STT configuration
 
310
 
311
  # Measure performance
312
  start_time = time.time()
313
+ provider = dependency_container.get_tts_provider('chatterbox')
314
  result = provider.synthesize(synthesis_request)
315
  end_time = time.time()
316
 
 
322
  def test_provider_resource_cleanup(self, dependency_container):
323
  """Test provider resource cleanup."""
324
  # Get multiple providers
325
+ tts_provider = dependency_container.get_tts_provider('chatterbox')
326
  stt_provider = dependency_container.get_stt_provider('whisper-small')
327
  translation_provider = dependency_container.get_translation_provider()
328
 
 
356
 
357
  def synthesize_audio():
358
  try:
359
+ provider = dependency_container.get_tts_provider('chatterbox')
360
  with patch.object(provider, 'synthesize') as mock_synthesize:
361
  mock_synthesize.return_value = AudioContent(
362
  data=b"concurrent_audio_data",
 
396
  """Test dynamic provider configuration updates."""
397
  # Initial configuration
398
  initial_providers = mock_config.tts.preferred_providers
399
+ assert 'chatterbox' in initial_providers
400
 
401
  # Update configuration
402
+ mock_config.tts.preferred_providers = ['chatterbox']
403
 
404
  # Verify configuration update affects provider selection
405
  # (This would require actual implementation of dynamic config updates)
406
  updated_providers = mock_config.tts.preferred_providers
407
+ assert 'chatterbox' in updated_providers
408
  assert 'dummy' in updated_providers
409
 
410
  def test_provider_health_checking(self, dependency_container):
tests/unit/application/dtos/test_processing_request_dto.py CHANGED
@@ -24,7 +24,7 @@ class TestProcessingRequestDto:
24
  audio=sample_audio_upload,
25
  asr_model="whisper-small",
26
  target_language="es",
27
- voice="kokoro",
28
  speed=1.0,
29
  source_language="en"
30
  )
@@ -43,7 +43,7 @@ class TestProcessingRequestDto:
43
  audio=sample_audio_upload,
44
  asr_model="whisper-medium",
45
  target_language="fr",
46
- voice="dia"
47
  )
48
 
49
  assert dto.speed == 1.0 # Default speed
@@ -61,7 +61,7 @@ class TestProcessingRequestDto:
61
  audio=sample_audio_upload,
62
  asr_model="whisper-large",
63
  target_language="de",
64
- voice="cosyvoice2",
65
  additional_params=additional_params
66
  )
67
 
@@ -74,7 +74,7 @@ class TestProcessingRequestDto:
74
  audio="invalid_audio", # Not AudioUploadDto
75
  asr_model="whisper-small",
76
  target_language="es",
77
- voice="kokoro"
78
  )
79
 
80
  def test_empty_asr_model_validation(self, sample_audio_upload):
@@ -84,7 +84,7 @@ class TestProcessingRequestDto:
84
  audio=sample_audio_upload,
85
  asr_model="",
86
  target_language="es",
87
- voice="kokoro"
88
  )
89
 
90
  def test_unsupported_asr_model_validation(self, sample_audio_upload):
@@ -94,7 +94,7 @@ class TestProcessingRequestDto:
94
  audio=sample_audio_upload,
95
  asr_model="invalid-model",
96
  target_language="es",
97
- voice="kokoro"
98
  )
99
 
100
  def test_supported_asr_models(self, sample_audio_upload):
@@ -107,7 +107,7 @@ class TestProcessingRequestDto:
107
  audio=sample_audio_upload,
108
  asr_model=model,
109
  target_language="es",
110
- voice="kokoro"
111
  )
112
  assert dto.asr_model == model
113
 
@@ -118,7 +118,7 @@ class TestProcessingRequestDto:
118
  audio=sample_audio_upload,
119
  asr_model="whisper-small",
120
  target_language="",
121
- voice="kokoro"
122
  )
123
 
124
  def test_unsupported_target_language_validation(self, sample_audio_upload):
@@ -128,7 +128,7 @@ class TestProcessingRequestDto:
128
  audio=sample_audio_upload,
129
  asr_model="whisper-small",
130
  target_language="invalid-lang",
131
- voice="kokoro"
132
  )
133
 
134
  def test_unsupported_source_language_validation(self, sample_audio_upload):
@@ -138,7 +138,7 @@ class TestProcessingRequestDto:
138
  audio=sample_audio_upload,
139
  asr_model="whisper-small",
140
  target_language="es",
141
- voice="kokoro",
142
  source_language="invalid-lang"
143
  )
144
 
@@ -155,7 +155,7 @@ class TestProcessingRequestDto:
155
  audio=sample_audio_upload,
156
  asr_model="whisper-small",
157
  target_language=lang,
158
- voice="kokoro",
159
  source_language=lang
160
  )
161
  assert dto.target_language == lang
@@ -183,7 +183,7 @@ class TestProcessingRequestDto:
183
 
184
  def test_supported_voices(self, sample_audio_upload):
185
  """Test all supported voices"""
186
- supported_voices = ['kokoro', 'dia', 'cosyvoice2', 'dummy']
187
 
188
  for voice in supported_voices:
189
  # Should not raise exception
@@ -202,7 +202,7 @@ class TestProcessingRequestDto:
202
  audio=sample_audio_upload,
203
  asr_model="whisper-small",
204
  target_language="es",
205
- voice="kokoro",
206
  speed=0.3 # Too low
207
  )
208
 
@@ -213,7 +213,7 @@ class TestProcessingRequestDto:
213
  audio=sample_audio_upload,
214
  asr_model="whisper-small",
215
  target_language="es",
216
- voice="kokoro",
217
  speed=2.5 # Too high
218
  )
219
 
@@ -227,7 +227,7 @@ class TestProcessingRequestDto:
227
  audio=sample_audio_upload,
228
  asr_model="whisper-small",
229
  target_language="es",
230
- voice="kokoro",
231
  speed=speed
232
  )
233
  assert dto.speed == speed
@@ -239,7 +239,7 @@ class TestProcessingRequestDto:
239
  audio=sample_audio_upload,
240
  asr_model="whisper-small",
241
  target_language="es",
242
- voice="kokoro",
243
  additional_params="invalid" # Not a dict
244
  )
245
 
@@ -249,7 +249,7 @@ class TestProcessingRequestDto:
249
  audio=sample_audio_upload,
250
  asr_model="whisper-small",
251
  target_language="en",
252
- voice="kokoro",
253
  source_language="en"
254
  )
255
 
@@ -261,7 +261,7 @@ class TestProcessingRequestDto:
261
  audio=sample_audio_upload,
262
  asr_model="whisper-small",
263
  target_language="es",
264
- voice="kokoro",
265
  source_language="en"
266
  )
267
 
@@ -273,7 +273,7 @@ class TestProcessingRequestDto:
273
  audio=sample_audio_upload,
274
  asr_model="whisper-small",
275
  target_language="es",
276
- voice="kokoro"
277
  )
278
 
279
  assert dto.requires_translation is True # Assume translation needed
@@ -284,7 +284,7 @@ class TestProcessingRequestDto:
284
  audio=sample_audio_upload,
285
  asr_model="whisper-small",
286
  target_language="es",
287
- voice="kokoro",
288
  speed=1.5,
289
  source_language="en",
290
  additional_params={"custom": "value"}
@@ -296,7 +296,7 @@ class TestProcessingRequestDto:
296
  assert result['asr_model'] == "whisper-small"
297
  assert result['target_language'] == "es"
298
  assert result['source_language'] == "en"
299
- assert result['voice'] == "kokoro"
300
  assert result['speed'] == 1.5
301
  assert result['requires_translation'] is True
302
  assert result['additional_params'] == {"custom": "value"}
@@ -367,7 +367,7 @@ class TestProcessingRequestDto:
367
  audio=sample_audio_upload,
368
  asr_model="", # Invalid empty model
369
  target_language="es",
370
- voice="kokoro"
371
  )
372
 
373
  def test_additional_params_default_initialization(self, sample_audio_upload):
@@ -376,7 +376,7 @@ class TestProcessingRequestDto:
376
  audio=sample_audio_upload,
377
  asr_model="whisper-small",
378
  target_language="es",
379
- voice="kokoro",
380
  additional_params=None
381
  )
382
 
 
24
  audio=sample_audio_upload,
25
  asr_model="whisper-small",
26
  target_language="es",
27
+ voice="chatterbox",
28
  speed=1.0,
29
  source_language="en"
30
  )
 
43
  audio=sample_audio_upload,
44
  asr_model="whisper-medium",
45
  target_language="fr",
46
+ voice="chatterbox"
47
  )
48
 
49
  assert dto.speed == 1.0 # Default speed
 
61
  audio=sample_audio_upload,
62
  asr_model="whisper-large",
63
  target_language="de",
64
+ voice="chatterbox",
65
  additional_params=additional_params
66
  )
67
 
 
74
  audio="invalid_audio", # Not AudioUploadDto
75
  asr_model="whisper-small",
76
  target_language="es",
77
+ voice="chatterbox"
78
  )
79
 
80
  def test_empty_asr_model_validation(self, sample_audio_upload):
 
84
  audio=sample_audio_upload,
85
  asr_model="",
86
  target_language="es",
87
+ voice="chatterbox"
88
  )
89
 
90
  def test_unsupported_asr_model_validation(self, sample_audio_upload):
 
94
  audio=sample_audio_upload,
95
  asr_model="invalid-model",
96
  target_language="es",
97
+ voice="chatterbox"
98
  )
99
 
100
  def test_supported_asr_models(self, sample_audio_upload):
 
107
  audio=sample_audio_upload,
108
  asr_model=model,
109
  target_language="es",
110
+ voice="chatterbox"
111
  )
112
  assert dto.asr_model == model
113
 
 
118
  audio=sample_audio_upload,
119
  asr_model="whisper-small",
120
  target_language="",
121
+ voice="chatterbox"
122
  )
123
 
124
  def test_unsupported_target_language_validation(self, sample_audio_upload):
 
128
  audio=sample_audio_upload,
129
  asr_model="whisper-small",
130
  target_language="invalid-lang",
131
+ voice="chatterbox"
132
  )
133
 
134
  def test_unsupported_source_language_validation(self, sample_audio_upload):
 
138
  audio=sample_audio_upload,
139
  asr_model="whisper-small",
140
  target_language="es",
141
+ voice="chatterbox",
142
  source_language="invalid-lang"
143
  )
144
 
 
155
  audio=sample_audio_upload,
156
  asr_model="whisper-small",
157
  target_language=lang,
158
+ voice="chatterbox",
159
  source_language=lang
160
  )
161
  assert dto.target_language == lang
 
183
 
184
  def test_supported_voices(self, sample_audio_upload):
185
  """Test all supported voices"""
186
+ supported_voices = ['chatterbox']
187
 
188
  for voice in supported_voices:
189
  # Should not raise exception
 
202
  audio=sample_audio_upload,
203
  asr_model="whisper-small",
204
  target_language="es",
205
+ voice="chatterbox",
206
  speed=0.3 # Too low
207
  )
208
 
 
213
  audio=sample_audio_upload,
214
  asr_model="whisper-small",
215
  target_language="es",
216
+ voice="chatterbox",
217
  speed=2.5 # Too high
218
  )
219
 
 
227
  audio=sample_audio_upload,
228
  asr_model="whisper-small",
229
  target_language="es",
230
+ voice="chatterbox",
231
  speed=speed
232
  )
233
  assert dto.speed == speed
 
239
  audio=sample_audio_upload,
240
  asr_model="whisper-small",
241
  target_language="es",
242
+ voice="chatterbox",
243
  additional_params="invalid" # Not a dict
244
  )
245
 
 
249
  audio=sample_audio_upload,
250
  asr_model="whisper-small",
251
  target_language="en",
252
+ voice="chatterbox",
253
  source_language="en"
254
  )
255
 
 
261
  audio=sample_audio_upload,
262
  asr_model="whisper-small",
263
  target_language="es",
264
+ voice="chatterbox",
265
  source_language="en"
266
  )
267
 
 
273
  audio=sample_audio_upload,
274
  asr_model="whisper-small",
275
  target_language="es",
276
+ voice="chatterbox"
277
  )
278
 
279
  assert dto.requires_translation is True # Assume translation needed
 
284
  audio=sample_audio_upload,
285
  asr_model="whisper-small",
286
  target_language="es",
287
+ voice="chatterbox",
288
  speed=1.5,
289
  source_language="en",
290
  additional_params={"custom": "value"}
 
296
  assert result['asr_model'] == "whisper-small"
297
  assert result['target_language'] == "es"
298
  assert result['source_language'] == "en"
299
+ assert result['voice'] == "chatterbox"
300
  assert result['speed'] == 1.5
301
  assert result['requires_translation'] is True
302
  assert result['additional_params'] == {"custom": "value"}
 
367
  audio=sample_audio_upload,
368
  asr_model="", # Invalid empty model
369
  target_language="es",
370
+ voice="chatterbox"
371
  )
372
 
373
  def test_additional_params_default_initialization(self, sample_audio_upload):
 
376
  audio=sample_audio_upload,
377
  asr_model="whisper-small",
378
  target_language="es",
379
+ voice="chatterbox",
380
  additional_params=None
381
  )
382
 
tests/unit/application/services/test_audio_processing_service.py CHANGED
@@ -71,7 +71,7 @@ class TestAudioProcessingApplicationService:
71
  }
72
 
73
  config.get_tts_config.return_value = {
74
- 'preferred_providers': ['kokoro', 'dia']
75
  }
76
 
77
  return config
@@ -92,7 +92,7 @@ class TestAudioProcessingApplicationService:
92
  audio=sample_audio_upload,
93
  asr_model="whisper-small",
94
  target_language="es",
95
- voice="kokoro",
96
  speed=1.0,
97
  source_language="en"
98
  )
@@ -279,7 +279,7 @@ class TestAudioProcessingApplicationService:
279
  def test_perform_speech_synthesis_success(self, mock_open, service, mock_container):
280
  """Test successful speech synthesis"""
281
  text = TextContent(text="Hola mundo", language="es")
282
- voice = "kokoro"
283
  speed = 1.0
284
  language = "es"
285
  temp_dir = "/tmp/test"
@@ -306,7 +306,7 @@ class TestAudioProcessingApplicationService:
306
  def test_perform_speech_synthesis_failure(self, service, mock_container):
307
  """Test speech synthesis failure"""
308
  text = TextContent(text="Hola mundo", language="es")
309
- voice = "kokoro"
310
  speed = 1.0
311
  language = "es"
312
  temp_dir = "/tmp/test"
@@ -379,7 +379,7 @@ class TestAudioProcessingApplicationService:
379
 
380
  # Verify expected values
381
  assert 'whisper-small' in result['asr_models']
382
- assert 'kokoro' in result['voices']
383
  assert 'en' in result['languages']
384
 
385
  def test_cleanup(self, service):
 
71
  }
72
 
73
  config.get_tts_config.return_value = {
74
+ 'preferred_providers': ['chatterbox']
75
  }
76
 
77
  return config
 
92
  audio=sample_audio_upload,
93
  asr_model="whisper-small",
94
  target_language="es",
95
+ voice="chatterbox",
96
  speed=1.0,
97
  source_language="en"
98
  )
 
279
  def test_perform_speech_synthesis_success(self, mock_open, service, mock_container):
280
  """Test successful speech synthesis"""
281
  text = TextContent(text="Hola mundo", language="es")
282
+ voice = "chatterbox"
283
  speed = 1.0
284
  language = "es"
285
  temp_dir = "/tmp/test"
 
306
  def test_perform_speech_synthesis_failure(self, service, mock_container):
307
  """Test speech synthesis failure"""
308
  text = TextContent(text="Hola mundo", language="es")
309
+ voice = "chatterbox"
310
  speed = 1.0
311
  language = "es"
312
  temp_dir = "/tmp/test"
 
379
 
380
  # Verify expected values
381
  assert 'whisper-small' in result['asr_models']
382
+ assert 'chatterbox' in result['voices']
383
  assert 'en' in result['languages']
384
 
385
  def test_cleanup(self, service):
tests/unit/application/services/test_configuration_service.py CHANGED
@@ -29,7 +29,7 @@ class TestConfigurationApplicationService:
29
 
30
  # Mock configuration methods
31
  config.get_tts_config.return_value = {
32
- 'preferred_providers': ['kokoro', 'dia'],
33
  'default_speed': 1.0,
34
  'default_language': 'en',
35
  'enable_streaming': False,
@@ -128,7 +128,7 @@ class TestConfigurationApplicationService:
128
  """Test successful TTS configuration retrieval"""
129
  result = service.get_tts_configuration()
130
 
131
- assert result['preferred_providers'] == ['kokoro', 'dia']
132
  assert result['default_speed'] == 1.0
133
  mock_config.get_tts_config.assert_called_once()
134
 
@@ -303,7 +303,7 @@ class TestConfigurationApplicationService:
303
  def test_validate_tts_updates_valid(self, service):
304
  """Test TTS updates validation with valid data"""
305
  updates = {
306
- 'preferred_providers': ['kokoro', 'dia'],
307
  'default_speed': 1.5,
308
  'default_language': 'es',
309
  'enable_streaming': True,
 
29
 
30
  # Mock configuration methods
31
  config.get_tts_config.return_value = {
32
+ 'preferred_providers': ['chatterbox'],
33
  'default_speed': 1.0,
34
  'default_language': 'en',
35
  'enable_streaming': False,
 
128
  """Test successful TTS configuration retrieval"""
129
  result = service.get_tts_configuration()
130
 
131
+ assert result['preferred_providers'] == ['chatterbox']
132
  assert result['default_speed'] == 1.0
133
  mock_config.get_tts_config.assert_called_once()
134
 
 
303
  def test_validate_tts_updates_valid(self, service):
304
  """Test TTS updates validation with valid data"""
305
  updates = {
306
+ 'preferred_providers': ['chatterbox'],
307
  'default_speed': 1.5,
308
  'default_language': 'es',
309
  'enable_streaming': True,
tests/unit/domain/interfaces/test_speech_synthesis.py CHANGED
@@ -215,24 +215,24 @@ class TestISpeechSynthesisService:
215
 
216
  class KokoroImplementation(ISpeechSynthesisService):
217
  def synthesize(self, request):
218
- return AudioContent(data=b"kokoro_audio", format="wav", sample_rate=22050, duration=1.0)
219
 
220
  def synthesize_stream(self, request):
221
- yield AudioChunk(data=b"kokoro_chunk", format="wav", sample_rate=22050, chunk_index=0, is_final=True)
222
 
223
  class DiaImplementation(ISpeechSynthesisService):
224
  def synthesize(self, request):
225
- return AudioContent(data=b"dia_audio", format="wav", sample_rate=22050, duration=1.0)
226
 
227
  def synthesize_stream(self, request):
228
- yield AudioChunk(data=b"dia_chunk", format="wav", sample_rate=22050, chunk_index=0, is_final=True)
229
 
230
- kokoro = KokoroImplementation()
231
- dia = DiaImplementation()
232
 
233
- assert isinstance(kokoro, ISpeechSynthesisService)
234
- assert isinstance(dia, ISpeechSynthesisService)
235
- assert type(kokoro) != type(dia)
236
 
237
  def test_interface_methods_can_be_called_polymorphically(self):
238
  """Test that interface methods can be called polymorphically."""
 
215
 
216
  class KokoroImplementation(ISpeechSynthesisService):
217
  def synthesize(self, request):
218
+ return AudioContent(data=b"chatterbox_audio", format="wav", sample_rate=22050, duration=1.0)
219
 
220
  def synthesize_stream(self, request):
221
+ yield AudioChunk(data=b"chatterbox_chunk", format="wav", sample_rate=22050, chunk_index=0, is_final=True)
222
 
223
  class DiaImplementation(ISpeechSynthesisService):
224
  def synthesize(self, request):
225
+ return AudioContent(data=b"chatterbox2_audio", format="wav", sample_rate=22050, duration=1.0)
226
 
227
  def synthesize_stream(self, request):
228
+ yield AudioChunk(data=b"chatterbox2_chunk", format="wav", sample_rate=22050, chunk_index=0, is_final=True)
229
 
230
+ chatterbox1 = KokoroImplementation()
231
+ chatterbox2 = DiaImplementation()
232
 
233
+ assert isinstance(chatterbox1, ISpeechSynthesisService)
234
+ assert isinstance(chatterbox2, ISpeechSynthesisService)
235
+ assert type(chatterbox1) != type(chatterbox2)
236
 
237
  def test_interface_methods_can_be_called_polymorphically(self):
238
  """Test that interface methods can be called polymorphically."""
tests/unit/infrastructure/factories/test_tts_provider_factory.py CHANGED
@@ -41,31 +41,21 @@ class TestTTSProviderFactory:
41
  """Test factory initialization."""
42
  assert isinstance(self.factory._providers, dict)
43
  assert isinstance(self.factory._provider_instances, dict)
44
- assert 'dummy' in self.factory._providers
45
 
46
- @patch('src.infrastructure.tts.provider_factory.DummyTTSProvider')
47
- def test_register_default_providers_dummy(self, mock_dummy):
48
- """Test registration of dummy provider."""
49
  factory = TTSProviderFactory()
50
 
51
- assert 'dummy' in factory._providers
52
- assert factory._providers['dummy'] == mock_dummy
53
 
54
- @patch('src.infrastructure.tts.provider_factory.KokoroTTSProvider')
55
- def test_register_default_providers_kokoro_available(self, mock_kokoro):
56
- """Test registration of Kokoro provider when available."""
57
- factory = TTSProviderFactory()
58
-
59
- assert 'kokoro' in factory._providers
60
- assert factory._providers['kokoro'] == mock_kokoro
61
-
62
- @patch('src.infrastructure.tts.kokoro_provider.KokoroTTSProvider', side_effect=ImportError("Not available"))
63
- def test_register_default_providers_kokoro_unavailable(self, mock_kokoro):
64
- """Test handling when Kokoro provider is not available."""
65
- factory = TTSProviderFactory()
66
-
67
- # Should not crash, just not register the provider
68
- assert 'kokoro' not in factory._providers or factory._providers.get('kokoro') is None
69
 
70
  @patch.object(TTSProviderFactory, '_providers', {'mock': MockTTSProvider})
71
  def test_get_available_providers(self):
@@ -116,14 +106,11 @@ class TestTTSProviderFactory:
116
  with pytest.raises(SpeechSynthesisException, match="Failed to create TTS provider mock"):
117
  self.factory.create_provider('mock')
118
 
119
- @patch.object(TTSProviderFactory, '_providers', {'mock': MockTTSProvider})
120
  def test_create_provider_with_lang_code(self):
121
  """Test creating provider with language code."""
122
  with patch.object(MockTTSProvider, 'is_available', return_value=True):
123
- # Mock providers that accept lang_code
124
- self.factory._providers['kokoro'] = MockTTSProvider
125
-
126
- provider = self.factory.create_provider('kokoro', lang_code='en')
127
  assert isinstance(provider, MockTTSProvider)
128
 
129
  @patch.object(TTSProviderFactory, '_providers', {
 
41
  """Test factory initialization."""
42
  assert isinstance(self.factory._providers, dict)
43
  assert isinstance(self.factory._provider_instances, dict)
44
+ assert 'chatterbox' in self.factory._providers
45
 
46
+ @patch('src.infrastructure.tts.provider_factory.ChatterboxTTSProvider')
47
+ def test_register_default_providers_chatterbox(self, mock_chatterbox):
48
+ """Test registration of chatterbox provider."""
49
  factory = TTSProviderFactory()
50
 
51
+ assert 'chatterbox' in factory._providers
52
+ assert factory._providers['chatterbox'] == mock_chatterbox
53
 
54
+ @patch('src.infrastructure.tts.chatterbox_provider.ChatterboxTTSProvider', side_effect=ImportError("Not available"))
55
+ def test_register_default_providers_chatterbox_unavailable(self, mock_chatterbox):
56
+ """Test handling when Chatterbox provider is not available."""
57
+ with pytest.raises(SpeechSynthesisException, match="No TTS providers available"):
58
+ TTSProviderFactory()
 
 
 
 
 
 
 
 
 
 
59
 
60
  @patch.object(TTSProviderFactory, '_providers', {'mock': MockTTSProvider})
61
  def test_get_available_providers(self):
 
106
  with pytest.raises(SpeechSynthesisException, match="Failed to create TTS provider mock"):
107
  self.factory.create_provider('mock')
108
 
109
+ @patch.object(TTSProviderFactory, '_providers', {'chatterbox': MockTTSProvider})
110
  def test_create_provider_with_lang_code(self):
111
  """Test creating provider with language code."""
112
  with patch.object(MockTTSProvider, 'is_available', return_value=True):
113
+ provider = self.factory.create_provider('chatterbox', lang_code='en')
 
 
 
114
  assert isinstance(provider, MockTTSProvider)
115
 
116
  @patch.object(TTSProviderFactory, '_providers', {