Spaces:
Sleeping
Sleeping
Michael Hu
commited on
Commit
·
6aea21a
1
Parent(s):
8a0c4b0
Implement domain services
Browse files
src/domain/services/__init__.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
| 1 |
"""Domain services package."""
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""Domain services package."""
|
| 2 |
|
| 3 |
+
from .audio_processing_service import AudioProcessingService
|
| 4 |
+
|
| 5 |
+
__all__ = [
|
| 6 |
+
'AudioProcessingService'
|
| 7 |
+
]
|
src/domain/services/audio_processing_service.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Concrete implementation of audio processing service."""
|
| 2 |
+
|
| 3 |
+
import time
|
| 4 |
+
from typing import TYPE_CHECKING
|
| 5 |
+
|
| 6 |
+
from ..interfaces.audio_processing import IAudioProcessingService
|
| 7 |
+
from ..interfaces.speech_recognition import ISpeechRecognitionService
|
| 8 |
+
from ..interfaces.translation import ITranslationService
|
| 9 |
+
from ..interfaces.speech_synthesis import ISpeechSynthesisService
|
| 10 |
+
from ..models.processing_result import ProcessingResult
|
| 11 |
+
from ..models.translation_request import TranslationRequest
|
| 12 |
+
from ..models.speech_synthesis_request import SpeechSynthesisRequest
|
| 13 |
+
from ..exceptions import (
|
| 14 |
+
AudioProcessingException,
|
| 15 |
+
SpeechRecognitionException,
|
| 16 |
+
TranslationFailedException,
|
| 17 |
+
SpeechSynthesisException
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
if TYPE_CHECKING:
|
| 21 |
+
from ..models.audio_content import AudioContent
|
| 22 |
+
from ..models.voice_settings import VoiceSettings
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class AudioProcessingService(IAudioProcessingService):
|
| 26 |
+
"""Concrete implementation of audio processing pipeline orchestration."""
|
| 27 |
+
|
| 28 |
+
def __init__(
|
| 29 |
+
self,
|
| 30 |
+
speech_recognition_service: ISpeechRecognitionService,
|
| 31 |
+
translation_service: ITranslationService,
|
| 32 |
+
speech_synthesis_service: ISpeechSynthesisService
|
| 33 |
+
):
|
| 34 |
+
"""
|
| 35 |
+
Initialize the audio processing service with injected dependencies.
|
| 36 |
+
|
| 37 |
+
Args:
|
| 38 |
+
speech_recognition_service: Service for speech-to-text conversion
|
| 39 |
+
translation_service: Service for text translation
|
| 40 |
+
speech_synthesis_service: Service for text-to-speech synthesis
|
| 41 |
+
"""
|
| 42 |
+
self._speech_recognition_service = speech_recognition_service
|
| 43 |
+
self._translation_service = translation_service
|
| 44 |
+
self._speech_synthesis_service = speech_synthesis_service
|
| 45 |
+
|
| 46 |
+
def process_audio_pipeline(
|
| 47 |
+
self,
|
| 48 |
+
audio: 'AudioContent',
|
| 49 |
+
target_language: str,
|
| 50 |
+
voice_settings: 'VoiceSettings'
|
| 51 |
+
) -> 'ProcessingResult':
|
| 52 |
+
"""
|
| 53 |
+
Process audio through the complete pipeline: STT -> Translation -> TTS.
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
audio: The input audio content
|
| 57 |
+
target_language: The target language for translation
|
| 58 |
+
voice_settings: Voice settings for TTS synthesis
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
ProcessingResult: The result of the complete processing pipeline
|
| 62 |
+
|
| 63 |
+
Raises:
|
| 64 |
+
AudioProcessingException: If any step in the pipeline fails
|
| 65 |
+
"""
|
| 66 |
+
start_time = time.time()
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
# Validate inputs
|
| 70 |
+
self._validate_pipeline_inputs(audio, target_language, voice_settings)
|
| 71 |
+
|
| 72 |
+
# Step 1: Speech Recognition (STT)
|
| 73 |
+
original_text = self._perform_speech_recognition(audio)
|
| 74 |
+
|
| 75 |
+
# Step 2: Translation
|
| 76 |
+
translated_text = self._perform_translation(original_text, target_language)
|
| 77 |
+
|
| 78 |
+
# Step 3: Speech Synthesis (TTS)
|
| 79 |
+
audio_output = self._perform_speech_synthesis(translated_text, voice_settings)
|
| 80 |
+
|
| 81 |
+
# Calculate processing time
|
| 82 |
+
processing_time = time.time() - start_time
|
| 83 |
+
|
| 84 |
+
# Create successful result
|
| 85 |
+
return ProcessingResult.success_result(
|
| 86 |
+
original_text=original_text,
|
| 87 |
+
translated_text=translated_text,
|
| 88 |
+
audio_output=audio_output,
|
| 89 |
+
processing_time=processing_time
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
except (SpeechRecognitionException, TranslationFailedException, SpeechSynthesisException) as e:
|
| 93 |
+
# Handle domain-specific exceptions
|
| 94 |
+
processing_time = time.time() - start_time
|
| 95 |
+
return ProcessingResult.failure_result(
|
| 96 |
+
error_message=str(e),
|
| 97 |
+
processing_time=processing_time
|
| 98 |
+
)
|
| 99 |
+
except Exception as e:
|
| 100 |
+
# Handle unexpected exceptions
|
| 101 |
+
processing_time = time.time() - start_time
|
| 102 |
+
error_message = f"Unexpected error in audio processing pipeline: {str(e)}"
|
| 103 |
+
return ProcessingResult.failure_result(
|
| 104 |
+
error_message=error_message,
|
| 105 |
+
processing_time=processing_time
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
def _validate_pipeline_inputs(
|
| 109 |
+
self,
|
| 110 |
+
audio: 'AudioContent',
|
| 111 |
+
target_language: str,
|
| 112 |
+
voice_settings: 'VoiceSettings'
|
| 113 |
+
) -> None:
|
| 114 |
+
"""
|
| 115 |
+
Validate inputs for the audio processing pipeline.
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
audio: The input audio content
|
| 119 |
+
target_language: The target language for translation
|
| 120 |
+
voice_settings: Voice settings for TTS synthesis
|
| 121 |
+
|
| 122 |
+
Raises:
|
| 123 |
+
AudioProcessingException: If validation fails
|
| 124 |
+
"""
|
| 125 |
+
if audio is None:
|
| 126 |
+
raise AudioProcessingException("Audio content cannot be None")
|
| 127 |
+
|
| 128 |
+
if not target_language or not target_language.strip():
|
| 129 |
+
raise AudioProcessingException("Target language cannot be empty")
|
| 130 |
+
|
| 131 |
+
if voice_settings is None:
|
| 132 |
+
raise AudioProcessingException("Voice settings cannot be None")
|
| 133 |
+
|
| 134 |
+
# Validate that voice settings language matches target language
|
| 135 |
+
if voice_settings.language != target_language:
|
| 136 |
+
raise AudioProcessingException(
|
| 137 |
+
f"Voice settings language ({voice_settings.language}) must match "
|
| 138 |
+
f"target language ({target_language})"
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
# Validate audio duration for processing limits
|
| 142 |
+
if audio.duration > 300: # 5 minutes limit
|
| 143 |
+
raise AudioProcessingException(
|
| 144 |
+
f"Audio duration ({audio.duration:.1f}s) exceeds maximum allowed duration (300s)"
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
# Validate audio format is supported
|
| 148 |
+
if not audio.is_valid_format:
|
| 149 |
+
raise AudioProcessingException(f"Unsupported audio format: {audio.format}")
|
| 150 |
+
|
| 151 |
+
def _perform_speech_recognition(self, audio: 'AudioContent') -> 'TextContent':
|
| 152 |
+
"""
|
| 153 |
+
Perform speech recognition on the input audio.
|
| 154 |
+
|
| 155 |
+
Args:
|
| 156 |
+
audio: The input audio content
|
| 157 |
+
|
| 158 |
+
Returns:
|
| 159 |
+
TextContent: The transcribed text
|
| 160 |
+
|
| 161 |
+
Raises:
|
| 162 |
+
SpeechRecognitionException: If transcription fails
|
| 163 |
+
"""
|
| 164 |
+
try:
|
| 165 |
+
# Use a default STT model - this could be configurable in the future
|
| 166 |
+
model = "whisper-base" # Default model
|
| 167 |
+
return self._speech_recognition_service.transcribe(audio, model)
|
| 168 |
+
except Exception as e:
|
| 169 |
+
raise SpeechRecognitionException(f"Speech recognition failed: {str(e)}")
|
| 170 |
+
|
| 171 |
+
def _perform_translation(self, text: 'TextContent', target_language: str) -> 'TextContent':
|
| 172 |
+
"""
|
| 173 |
+
Perform translation of the transcribed text.
|
| 174 |
+
|
| 175 |
+
Args:
|
| 176 |
+
text: The text to translate
|
| 177 |
+
target_language: The target language for translation
|
| 178 |
+
|
| 179 |
+
Returns:
|
| 180 |
+
TextContent: The translated text
|
| 181 |
+
|
| 182 |
+
Raises:
|
| 183 |
+
TranslationFailedException: If translation fails
|
| 184 |
+
"""
|
| 185 |
+
try:
|
| 186 |
+
# Check if translation is needed
|
| 187 |
+
if text.language == target_language:
|
| 188 |
+
# No translation needed, return original text
|
| 189 |
+
return text
|
| 190 |
+
|
| 191 |
+
# Create translation request
|
| 192 |
+
translation_request = TranslationRequest(
|
| 193 |
+
source_text=text,
|
| 194 |
+
target_language=target_language
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
return self._translation_service.translate(translation_request)
|
| 198 |
+
except Exception as e:
|
| 199 |
+
raise TranslationFailedException(f"Translation failed: {str(e)}")
|
| 200 |
+
|
| 201 |
+
def _perform_speech_synthesis(
|
| 202 |
+
self,
|
| 203 |
+
text: 'TextContent',
|
| 204 |
+
voice_settings: 'VoiceSettings'
|
| 205 |
+
) -> 'AudioContent':
|
| 206 |
+
"""
|
| 207 |
+
Perform speech synthesis on the translated text.
|
| 208 |
+
|
| 209 |
+
Args:
|
| 210 |
+
text: The text to synthesize
|
| 211 |
+
voice_settings: Voice settings for synthesis
|
| 212 |
+
|
| 213 |
+
Returns:
|
| 214 |
+
AudioContent: The synthesized audio
|
| 215 |
+
|
| 216 |
+
Raises:
|
| 217 |
+
SpeechSynthesisException: If synthesis fails
|
| 218 |
+
"""
|
| 219 |
+
try:
|
| 220 |
+
# Create speech synthesis request
|
| 221 |
+
synthesis_request = SpeechSynthesisRequest(
|
| 222 |
+
text_content=text,
|
| 223 |
+
voice_settings=voice_settings
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
return self._speech_synthesis_service.synthesize(synthesis_request)
|
| 227 |
+
except Exception as e:
|
| 228 |
+
raise SpeechSynthesisException(f"Speech synthesis failed: {str(e)}")
|
tests/unit/domain/services/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Domain services tests package."""
|
tests/unit/domain/services/test_audio_processing_service.py
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for AudioProcessingService."""
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
from unittest.mock import Mock, MagicMock
|
| 5 |
+
from src.domain.services.audio_processing_service import AudioProcessingService
|
| 6 |
+
from src.domain.models.audio_content import AudioContent
|
| 7 |
+
from src.domain.models.text_content import TextContent
|
| 8 |
+
from src.domain.models.voice_settings import VoiceSettings
|
| 9 |
+
from src.domain.models.translation_request import TranslationRequest
|
| 10 |
+
from src.domain.models.speech_synthesis_request import SpeechSynthesisRequest
|
| 11 |
+
from src.domain.exceptions import (
|
| 12 |
+
AudioProcessingException,
|
| 13 |
+
SpeechRecognitionException,
|
| 14 |
+
TranslationFailedException,
|
| 15 |
+
SpeechSynthesisException
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class TestAudioProcessingService:
|
| 20 |
+
"""Test cases for AudioProcessingService."""
|
| 21 |
+
|
| 22 |
+
@pytest.fixture
|
| 23 |
+
def mock_stt_service(self):
|
| 24 |
+
"""Mock speech recognition service."""
|
| 25 |
+
return Mock()
|
| 26 |
+
|
| 27 |
+
@pytest.fixture
|
| 28 |
+
def mock_translation_service(self):
|
| 29 |
+
"""Mock translation service."""
|
| 30 |
+
return Mock()
|
| 31 |
+
|
| 32 |
+
@pytest.fixture
|
| 33 |
+
def mock_tts_service(self):
|
| 34 |
+
"""Mock speech synthesis service."""
|
| 35 |
+
return Mock()
|
| 36 |
+
|
| 37 |
+
@pytest.fixture
|
| 38 |
+
def audio_processing_service(self, mock_stt_service, mock_translation_service, mock_tts_service):
|
| 39 |
+
"""AudioProcessingService instance with mocked dependencies."""
|
| 40 |
+
return AudioProcessingService(
|
| 41 |
+
speech_recognition_service=mock_stt_service,
|
| 42 |
+
translation_service=mock_translation_service,
|
| 43 |
+
speech_synthesis_service=mock_tts_service
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
@pytest.fixture
|
| 47 |
+
def sample_audio(self):
|
| 48 |
+
"""Sample audio content for testing."""
|
| 49 |
+
return AudioContent(
|
| 50 |
+
data=b"fake_audio_data",
|
| 51 |
+
format="wav",
|
| 52 |
+
sample_rate=22050,
|
| 53 |
+
duration=10.0,
|
| 54 |
+
filename="test.wav"
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
@pytest.fixture
|
| 58 |
+
def sample_voice_settings(self):
|
| 59 |
+
"""Sample voice settings for testing."""
|
| 60 |
+
return VoiceSettings(
|
| 61 |
+
voice_id="test_voice",
|
| 62 |
+
speed=1.0,
|
| 63 |
+
language="es"
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
@pytest.fixture
|
| 67 |
+
def sample_text_content(self):
|
| 68 |
+
"""Sample text content for testing."""
|
| 69 |
+
return TextContent(
|
| 70 |
+
text="Hello world",
|
| 71 |
+
language="en"
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
def test_successful_pipeline_processing(
|
| 75 |
+
self,
|
| 76 |
+
audio_processing_service,
|
| 77 |
+
mock_stt_service,
|
| 78 |
+
mock_translation_service,
|
| 79 |
+
mock_tts_service,
|
| 80 |
+
sample_audio,
|
| 81 |
+
sample_voice_settings,
|
| 82 |
+
sample_text_content
|
| 83 |
+
):
|
| 84 |
+
"""Test successful processing through the complete pipeline."""
|
| 85 |
+
# Arrange
|
| 86 |
+
original_text = TextContent(text="Hello world", language="en")
|
| 87 |
+
translated_text = TextContent(text="Hola mundo", language="es")
|
| 88 |
+
output_audio = AudioContent(
|
| 89 |
+
data=b"synthesized_audio",
|
| 90 |
+
format="wav",
|
| 91 |
+
sample_rate=22050,
|
| 92 |
+
duration=5.0
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
mock_stt_service.transcribe.return_value = original_text
|
| 96 |
+
mock_translation_service.translate.return_value = translated_text
|
| 97 |
+
mock_tts_service.synthesize.return_value = output_audio
|
| 98 |
+
|
| 99 |
+
# Act
|
| 100 |
+
result = audio_processing_service.process_audio_pipeline(
|
| 101 |
+
audio=sample_audio,
|
| 102 |
+
target_language="es",
|
| 103 |
+
voice_settings=sample_voice_settings
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
# Assert
|
| 107 |
+
assert result.success is True
|
| 108 |
+
assert result.original_text == original_text
|
| 109 |
+
assert result.translated_text == translated_text
|
| 110 |
+
assert result.audio_output == output_audio
|
| 111 |
+
assert result.error_message is None
|
| 112 |
+
assert result.processing_time >= 0
|
| 113 |
+
|
| 114 |
+
# Verify service calls
|
| 115 |
+
mock_stt_service.transcribe.assert_called_once_with(sample_audio, "whisper-base")
|
| 116 |
+
mock_translation_service.translate.assert_called_once()
|
| 117 |
+
mock_tts_service.synthesize.assert_called_once()
|
| 118 |
+
|
| 119 |
+
def test_no_translation_needed_same_language(
|
| 120 |
+
self,
|
| 121 |
+
audio_processing_service,
|
| 122 |
+
mock_stt_service,
|
| 123 |
+
mock_translation_service,
|
| 124 |
+
mock_tts_service,
|
| 125 |
+
sample_audio
|
| 126 |
+
):
|
| 127 |
+
"""Test pipeline when no translation is needed (same language)."""
|
| 128 |
+
# Arrange
|
| 129 |
+
original_text = TextContent(text="Hola mundo", language="es")
|
| 130 |
+
voice_settings = VoiceSettings(voice_id="test_voice", speed=1.0, language="es")
|
| 131 |
+
output_audio = AudioContent(
|
| 132 |
+
data=b"synthesized_audio",
|
| 133 |
+
format="wav",
|
| 134 |
+
sample_rate=22050,
|
| 135 |
+
duration=5.0
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
mock_stt_service.transcribe.return_value = original_text
|
| 139 |
+
mock_tts_service.synthesize.return_value = output_audio
|
| 140 |
+
|
| 141 |
+
# Act
|
| 142 |
+
result = audio_processing_service.process_audio_pipeline(
|
| 143 |
+
audio=sample_audio,
|
| 144 |
+
target_language="es",
|
| 145 |
+
voice_settings=voice_settings
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# Assert
|
| 149 |
+
assert result.success is True
|
| 150 |
+
assert result.original_text == original_text
|
| 151 |
+
assert result.translated_text == original_text # Same as original
|
| 152 |
+
assert result.audio_output == output_audio
|
| 153 |
+
|
| 154 |
+
# Translation service should not be called
|
| 155 |
+
mock_translation_service.translate.assert_not_called()
|
| 156 |
+
|
| 157 |
+
def test_validation_error_none_audio(self, audio_processing_service, sample_voice_settings):
|
| 158 |
+
"""Test validation error when audio is None."""
|
| 159 |
+
# Act
|
| 160 |
+
result = audio_processing_service.process_audio_pipeline(
|
| 161 |
+
audio=None,
|
| 162 |
+
target_language="es",
|
| 163 |
+
voice_settings=sample_voice_settings
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
# Assert
|
| 167 |
+
assert result.success is False
|
| 168 |
+
assert "Audio content cannot be None" in result.error_message
|
| 169 |
+
|
| 170 |
+
def test_validation_error_empty_target_language(self, audio_processing_service, sample_audio, sample_voice_settings):
|
| 171 |
+
"""Test validation error when target language is empty."""
|
| 172 |
+
# Act
|
| 173 |
+
result = audio_processing_service.process_audio_pipeline(
|
| 174 |
+
audio=sample_audio,
|
| 175 |
+
target_language="",
|
| 176 |
+
voice_settings=sample_voice_settings
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
# Assert
|
| 180 |
+
assert result.success is False
|
| 181 |
+
assert "Target language cannot be empty" in result.error_message
|
| 182 |
+
|
| 183 |
+
def test_validation_error_language_mismatch(self, audio_processing_service, sample_audio):
|
| 184 |
+
"""Test validation error when voice settings language doesn't match target language."""
|
| 185 |
+
# Arrange
|
| 186 |
+
voice_settings = VoiceSettings(voice_id="test_voice", speed=1.0, language="en")
|
| 187 |
+
|
| 188 |
+
# Act
|
| 189 |
+
result = audio_processing_service.process_audio_pipeline(
|
| 190 |
+
audio=sample_audio,
|
| 191 |
+
target_language="es",
|
| 192 |
+
voice_settings=voice_settings
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
# Assert
|
| 196 |
+
assert result.success is False
|
| 197 |
+
assert "Voice settings language (en) must match target language (es)" in result.error_message
|
| 198 |
+
|
| 199 |
+
def test_validation_error_audio_too_long(self, audio_processing_service, sample_voice_settings):
|
| 200 |
+
"""Test validation error when audio is too long."""
|
| 201 |
+
# Arrange
|
| 202 |
+
long_audio = AudioContent(
|
| 203 |
+
data=b"fake_audio_data",
|
| 204 |
+
format="wav",
|
| 205 |
+
sample_rate=22050,
|
| 206 |
+
duration=400.0 # Exceeds 300s limit
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
# Act
|
| 210 |
+
result = audio_processing_service.process_audio_pipeline(
|
| 211 |
+
audio=long_audio,
|
| 212 |
+
target_language="es",
|
| 213 |
+
voice_settings=sample_voice_settings
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
# Assert
|
| 217 |
+
assert result.success is False
|
| 218 |
+
assert "exceeds maximum allowed duration" in result.error_message
|
| 219 |
+
|
| 220 |
+
def test_stt_failure_handling(
|
| 221 |
+
self,
|
| 222 |
+
audio_processing_service,
|
| 223 |
+
mock_stt_service,
|
| 224 |
+
sample_audio,
|
| 225 |
+
sample_voice_settings
|
| 226 |
+
):
|
| 227 |
+
"""Test handling of STT service failure."""
|
| 228 |
+
# Arrange
|
| 229 |
+
mock_stt_service.transcribe.side_effect = Exception("STT service unavailable")
|
| 230 |
+
|
| 231 |
+
# Act
|
| 232 |
+
result = audio_processing_service.process_audio_pipeline(
|
| 233 |
+
audio=sample_audio,
|
| 234 |
+
target_language="es",
|
| 235 |
+
voice_settings=sample_voice_settings
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
# Assert
|
| 239 |
+
assert result.success is False
|
| 240 |
+
assert "Speech recognition failed" in result.error_message
|
| 241 |
+
assert result.processing_time >= 0
|
| 242 |
+
|
| 243 |
+
def test_translation_failure_handling(
|
| 244 |
+
self,
|
| 245 |
+
audio_processing_service,
|
| 246 |
+
mock_stt_service,
|
| 247 |
+
mock_translation_service,
|
| 248 |
+
sample_audio,
|
| 249 |
+
sample_voice_settings
|
| 250 |
+
):
|
| 251 |
+
"""Test handling of translation service failure."""
|
| 252 |
+
# Arrange
|
| 253 |
+
original_text = TextContent(text="Hello world", language="en")
|
| 254 |
+
mock_stt_service.transcribe.return_value = original_text
|
| 255 |
+
mock_translation_service.translate.side_effect = Exception("Translation service unavailable")
|
| 256 |
+
|
| 257 |
+
# Act
|
| 258 |
+
result = audio_processing_service.process_audio_pipeline(
|
| 259 |
+
audio=sample_audio,
|
| 260 |
+
target_language="es",
|
| 261 |
+
voice_settings=sample_voice_settings
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
# Assert
|
| 265 |
+
assert result.success is False
|
| 266 |
+
assert "Translation failed" in result.error_message
|
| 267 |
+
assert result.processing_time >= 0
|
| 268 |
+
|
| 269 |
+
def test_tts_failure_handling(
|
| 270 |
+
self,
|
| 271 |
+
audio_processing_service,
|
| 272 |
+
mock_stt_service,
|
| 273 |
+
mock_translation_service,
|
| 274 |
+
mock_tts_service,
|
| 275 |
+
sample_audio,
|
| 276 |
+
sample_voice_settings
|
| 277 |
+
):
|
| 278 |
+
"""Test handling of TTS service failure."""
|
| 279 |
+
# Arrange
|
| 280 |
+
original_text = TextContent(text="Hello world", language="en")
|
| 281 |
+
translated_text = TextContent(text="Hola mundo", language="es")
|
| 282 |
+
|
| 283 |
+
mock_stt_service.transcribe.return_value = original_text
|
| 284 |
+
mock_translation_service.translate.return_value = translated_text
|
| 285 |
+
mock_tts_service.synthesize.side_effect = Exception("TTS service unavailable")
|
| 286 |
+
|
| 287 |
+
# Act
|
| 288 |
+
result = audio_processing_service.process_audio_pipeline(
|
| 289 |
+
audio=sample_audio,
|
| 290 |
+
target_language="es",
|
| 291 |
+
voice_settings=sample_voice_settings
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
# Assert
|
| 295 |
+
assert result.success is False
|
| 296 |
+
assert "Speech synthesis failed" in result.error_message
|
| 297 |
+
assert result.processing_time >= 0
|