Spaces:

DroolingPanda
/

teachingAssistant

Sleeping

App Files Files Community

Michael Hu commited on Jul 27

Commit

6aea21a

1 Parent(s): 8a0c4b0

Implement domain services

Browse files

Files changed (4) hide show

src/domain/services/__init__.py +5 -1
src/domain/services/audio_processing_service.py +228 -0
tests/unit/domain/services/__init__.py +1 -0
tests/unit/domain/services/test_audio_processing_service.py +297 -0

src/domain/services/__init__.py CHANGED Viewed

@@ -1,3 +1,7 @@
 """Domain services package."""
-# Services will be added in subsequent tasks

 """Domain services package."""
+from .audio_processing_service import AudioProcessingService
+__all__ = [
+    'AudioProcessingService'
+]

src/domain/services/audio_processing_service.py ADDED Viewed

	@@ -0,0 +1,228 @@

+"""Concrete implementation of audio processing service."""
+import time
+from typing import TYPE_CHECKING
+from ..interfaces.audio_processing import IAudioProcessingService
+from ..interfaces.speech_recognition import ISpeechRecognitionService
+from ..interfaces.translation import ITranslationService
+from ..interfaces.speech_synthesis import ISpeechSynthesisService
+from ..models.processing_result import ProcessingResult
+from ..models.translation_request import TranslationRequest
+from ..models.speech_synthesis_request import SpeechSynthesisRequest
+from ..exceptions import (
+    AudioProcessingException,
+    SpeechRecognitionException,
+    TranslationFailedException,
+    SpeechSynthesisException
+)
+if TYPE_CHECKING:
+    from ..models.audio_content import AudioContent
+    from ..models.voice_settings import VoiceSettings
+class AudioProcessingService(IAudioProcessingService):
+    """Concrete implementation of audio processing pipeline orchestration."""
+    def __init__(
+        self,
+        speech_recognition_service: ISpeechRecognitionService,
+        translation_service: ITranslationService,
+        speech_synthesis_service: ISpeechSynthesisService
+    ):
+        """
+        Initialize the audio processing service with injected dependencies.
+        Args:
+            speech_recognition_service: Service for speech-to-text conversion
+            translation_service: Service for text translation
+            speech_synthesis_service: Service for text-to-speech synthesis
+        """
+        self._speech_recognition_service = speech_recognition_service
+        self._translation_service = translation_service
+        self._speech_synthesis_service = speech_synthesis_service
+    def process_audio_pipeline(
+        self,
+        audio: 'AudioContent',
+        target_language: str,
+        voice_settings: 'VoiceSettings'
+    ) -> 'ProcessingResult':
+        """
+        Process audio through the complete pipeline: STT -> Translation -> TTS.
+        Args:
+            audio: The input audio content
+            target_language: The target language for translation
+            voice_settings: Voice settings for TTS synthesis
+        Returns:
+            ProcessingResult: The result of the complete processing pipeline
+        Raises:
+            AudioProcessingException: If any step in the pipeline fails
+        """
+        start_time = time.time()
+        try:
+            # Validate inputs
+            self._validate_pipeline_inputs(audio, target_language, voice_settings)
+            # Step 1: Speech Recognition (STT)
+            original_text = self._perform_speech_recognition(audio)
+            # Step 2: Translation
+            translated_text = self._perform_translation(original_text, target_language)
+            # Step 3: Speech Synthesis (TTS)
+            audio_output = self._perform_speech_synthesis(translated_text, voice_settings)
+            # Calculate processing time
+            processing_time = time.time() - start_time
+            # Create successful result
+            return ProcessingResult.success_result(
+                original_text=original_text,
+                translated_text=translated_text,
+                audio_output=audio_output,
+                processing_time=processing_time
+            )
+        except (SpeechRecognitionException, TranslationFailedException, SpeechSynthesisException) as e:
+            # Handle domain-specific exceptions
+            processing_time = time.time() - start_time
+            return ProcessingResult.failure_result(
+                error_message=str(e),
+                processing_time=processing_time
+            )
+        except Exception as e:
+            # Handle unexpected exceptions
+            processing_time = time.time() - start_time
+            error_message = f"Unexpected error in audio processing pipeline: {str(e)}"
+            return ProcessingResult.failure_result(
+                error_message=error_message,
+                processing_time=processing_time
+            )
+    def _validate_pipeline_inputs(
+        self,
+        audio: 'AudioContent',
+        target_language: str,
+        voice_settings: 'VoiceSettings'
+    ) -> None:
+        """
+        Validate inputs for the audio processing pipeline.
+        Args:
+            audio: The input audio content
+            target_language: The target language for translation
+            voice_settings: Voice settings for TTS synthesis
+        Raises:
+            AudioProcessingException: If validation fails
+        """
+        if audio is None:
+            raise AudioProcessingException("Audio content cannot be None")
+        if not target_language or not target_language.strip():
+            raise AudioProcessingException("Target language cannot be empty")
+        if voice_settings is None:
+            raise AudioProcessingException("Voice settings cannot be None")
+        # Validate that voice settings language matches target language
+        if voice_settings.language != target_language:
+            raise AudioProcessingException(
+                f"Voice settings language ({voice_settings.language}) must match "
+                f"target language ({target_language})"
+            )
+        # Validate audio duration for processing limits
+        if audio.duration > 300:  # 5 minutes limit
+            raise AudioProcessingException(
+                f"Audio duration ({audio.duration:.1f}s) exceeds maximum allowed duration (300s)"
+            )
+        # Validate audio format is supported
+        if not audio.is_valid_format:
+            raise AudioProcessingException(f"Unsupported audio format: {audio.format}")
+    def _perform_speech_recognition(self, audio: 'AudioContent') -> 'TextContent':
+        """
+        Perform speech recognition on the input audio.
+        Args:
+            audio: The input audio content
+        Returns:
+            TextContent: The transcribed text
+        Raises:
+            SpeechRecognitionException: If transcription fails
+        """
+        try:
+            # Use a default STT model - this could be configurable in the future
+            model = "whisper-base"  # Default model
+            return self._speech_recognition_service.transcribe(audio, model)
+        except Exception as e:
+            raise SpeechRecognitionException(f"Speech recognition failed: {str(e)}")
+    def _perform_translation(self, text: 'TextContent', target_language: str) -> 'TextContent':
+        """
+        Perform translation of the transcribed text.
+        Args:
+            text: The text to translate
+            target_language: The target language for translation
+        Returns:
+            TextContent: The translated text
+        Raises:
+            TranslationFailedException: If translation fails
+        """
+        try:
+            # Check if translation is needed
+            if text.language == target_language:
+                # No translation needed, return original text
+                return text
+            # Create translation request
+            translation_request = TranslationRequest(
+                source_text=text,
+                target_language=target_language
+            )
+            return self._translation_service.translate(translation_request)
+        except Exception as e:
+            raise TranslationFailedException(f"Translation failed: {str(e)}")
+    def _perform_speech_synthesis(
+        self,
+        text: 'TextContent',
+        voice_settings: 'VoiceSettings'
+    ) -> 'AudioContent':
+        """
+        Perform speech synthesis on the translated text.
+        Args:
+            text: The text to synthesize
+            voice_settings: Voice settings for synthesis
+        Returns:
+            AudioContent: The synthesized audio
+        Raises:
+            SpeechSynthesisException: If synthesis fails
+        """
+        try:
+            # Create speech synthesis request
+            synthesis_request = SpeechSynthesisRequest(
+                text_content=text,
+                voice_settings=voice_settings
+            )
+            return self._speech_synthesis_service.synthesize(synthesis_request)
+        except Exception as e:
+            raise SpeechSynthesisException(f"Speech synthesis failed: {str(e)}")

tests/unit/domain/services/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Domain services tests package."""

tests/unit/domain/services/test_audio_processing_service.py ADDED Viewed

	@@ -0,0 +1,297 @@

+"""Tests for AudioProcessingService."""
+import pytest
+from unittest.mock import Mock, MagicMock
+from src.domain.services.audio_processing_service import AudioProcessingService
+from src.domain.models.audio_content import AudioContent
+from src.domain.models.text_content import TextContent
+from src.domain.models.voice_settings import VoiceSettings
+from src.domain.models.translation_request import TranslationRequest
+from src.domain.models.speech_synthesis_request import SpeechSynthesisRequest
+from src.domain.exceptions import (
+    AudioProcessingException,
+    SpeechRecognitionException,
+    TranslationFailedException,
+    SpeechSynthesisException
+)
+class TestAudioProcessingService:
+    """Test cases for AudioProcessingService."""
+    @pytest.fixture
+    def mock_stt_service(self):
+        """Mock speech recognition service."""
+        return Mock()
+    @pytest.fixture
+    def mock_translation_service(self):
+        """Mock translation service."""
+        return Mock()
+    @pytest.fixture
+    def mock_tts_service(self):
+        """Mock speech synthesis service."""
+        return Mock()
+    @pytest.fixture
+    def audio_processing_service(self, mock_stt_service, mock_translation_service, mock_tts_service):
+        """AudioProcessingService instance with mocked dependencies."""
+        return AudioProcessingService(
+            speech_recognition_service=mock_stt_service,
+            translation_service=mock_translation_service,
+            speech_synthesis_service=mock_tts_service
+        )
+    @pytest.fixture
+    def sample_audio(self):
+        """Sample audio content for testing."""
+        return AudioContent(
+            data=b"fake_audio_data",
+            format="wav",
+            sample_rate=22050,
+            duration=10.0,
+            filename="test.wav"
+        )
+    @pytest.fixture
+    def sample_voice_settings(self):
+        """Sample voice settings for testing."""
+        return VoiceSettings(
+            voice_id="test_voice",
+            speed=1.0,
+            language="es"
+        )
+    @pytest.fixture
+    def sample_text_content(self):
+        """Sample text content for testing."""
+        return TextContent(
+            text="Hello world",
+            language="en"
+        )
+    def test_successful_pipeline_processing(
+        self,
+        audio_processing_service,
+        mock_stt_service,
+        mock_translation_service,
+        mock_tts_service,
+        sample_audio,
+        sample_voice_settings,
+        sample_text_content
+    ):
+        """Test successful processing through the complete pipeline."""
+        # Arrange
+        original_text = TextContent(text="Hello world", language="en")
+        translated_text = TextContent(text="Hola mundo", language="es")
+        output_audio = AudioContent(
+            data=b"synthesized_audio",
+            format="wav",
+            sample_rate=22050,
+            duration=5.0
+        )
+        mock_stt_service.transcribe.return_value = original_text
+        mock_translation_service.translate.return_value = translated_text
+        mock_tts_service.synthesize.return_value = output_audio
+        # Act
+        result = audio_processing_service.process_audio_pipeline(
+            audio=sample_audio,
+            target_language="es",
+            voice_settings=sample_voice_settings
+        )
+        # Assert
+        assert result.success is True
+        assert result.original_text == original_text
+        assert result.translated_text == translated_text
+        assert result.audio_output == output_audio
+        assert result.error_message is None
+        assert result.processing_time >= 0
+        # Verify service calls
+        mock_stt_service.transcribe.assert_called_once_with(sample_audio, "whisper-base")
+        mock_translation_service.translate.assert_called_once()
+        mock_tts_service.synthesize.assert_called_once()
+    def test_no_translation_needed_same_language(
+        self,
+        audio_processing_service,
+        mock_stt_service,
+        mock_translation_service,
+        mock_tts_service,
+        sample_audio
+    ):
+        """Test pipeline when no translation is needed (same language)."""
+        # Arrange
+        original_text = TextContent(text="Hola mundo", language="es")
+        voice_settings = VoiceSettings(voice_id="test_voice", speed=1.0, language="es")
+        output_audio = AudioContent(
+            data=b"synthesized_audio",
+            format="wav",
+            sample_rate=22050,
+            duration=5.0
+        )
+        mock_stt_service.transcribe.return_value = original_text
+        mock_tts_service.synthesize.return_value = output_audio
+        # Act
+        result = audio_processing_service.process_audio_pipeline(
+            audio=sample_audio,
+            target_language="es",
+            voice_settings=voice_settings
+        )
+        # Assert
+        assert result.success is True
+        assert result.original_text == original_text
+        assert result.translated_text == original_text  # Same as original
+        assert result.audio_output == output_audio
+        # Translation service should not be called
+        mock_translation_service.translate.assert_not_called()
+    def test_validation_error_none_audio(self, audio_processing_service, sample_voice_settings):
+        """Test validation error when audio is None."""
+        # Act
+        result = audio_processing_service.process_audio_pipeline(
+            audio=None,
+            target_language="es",
+            voice_settings=sample_voice_settings
+        )
+        # Assert
+        assert result.success is False
+        assert "Audio content cannot be None" in result.error_message
+    def test_validation_error_empty_target_language(self, audio_processing_service, sample_audio, sample_voice_settings):
+        """Test validation error when target language is empty."""
+        # Act
+        result = audio_processing_service.process_audio_pipeline(
+            audio=sample_audio,
+            target_language="",
+            voice_settings=sample_voice_settings
+        )
+        # Assert
+        assert result.success is False
+        assert "Target language cannot be empty" in result.error_message
+    def test_validation_error_language_mismatch(self, audio_processing_service, sample_audio):
+        """Test validation error when voice settings language doesn't match target language."""
+        # Arrange
+        voice_settings = VoiceSettings(voice_id="test_voice", speed=1.0, language="en")
+        # Act
+        result = audio_processing_service.process_audio_pipeline(
+            audio=sample_audio,
+            target_language="es",
+            voice_settings=voice_settings
+        )
+        # Assert
+        assert result.success is False
+        assert "Voice settings language (en) must match target language (es)" in result.error_message
+    def test_validation_error_audio_too_long(self, audio_processing_service, sample_voice_settings):
+        """Test validation error when audio is too long."""
+        # Arrange
+        long_audio = AudioContent(
+            data=b"fake_audio_data",
+            format="wav",
+            sample_rate=22050,
+            duration=400.0  # Exceeds 300s limit
+        )
+        # Act
+        result = audio_processing_service.process_audio_pipeline(
+            audio=long_audio,
+            target_language="es",
+            voice_settings=sample_voice_settings
+        )
+        # Assert
+        assert result.success is False
+        assert "exceeds maximum allowed duration" in result.error_message
+    def test_stt_failure_handling(
+        self,
+        audio_processing_service,
+        mock_stt_service,
+        sample_audio,
+        sample_voice_settings
+    ):
+        """Test handling of STT service failure."""
+        # Arrange
+        mock_stt_service.transcribe.side_effect = Exception("STT service unavailable")
+        # Act
+        result = audio_processing_service.process_audio_pipeline(
+            audio=sample_audio,
+            target_language="es",
+            voice_settings=sample_voice_settings
+        )
+        # Assert
+        assert result.success is False
+        assert "Speech recognition failed" in result.error_message
+        assert result.processing_time >= 0
+    def test_translation_failure_handling(
+        self,
+        audio_processing_service,
+        mock_stt_service,
+        mock_translation_service,
+        sample_audio,
+        sample_voice_settings
+    ):
+        """Test handling of translation service failure."""
+        # Arrange
+        original_text = TextContent(text="Hello world", language="en")
+        mock_stt_service.transcribe.return_value = original_text
+        mock_translation_service.translate.side_effect = Exception("Translation service unavailable")
+        # Act
+        result = audio_processing_service.process_audio_pipeline(
+            audio=sample_audio,
+            target_language="es",
+            voice_settings=sample_voice_settings
+        )
+        # Assert
+        assert result.success is False
+        assert "Translation failed" in result.error_message
+        assert result.processing_time >= 0
+    def test_tts_failure_handling(
+        self,
+        audio_processing_service,
+        mock_stt_service,
+        mock_translation_service,
+        mock_tts_service,
+        sample_audio,
+        sample_voice_settings
+    ):
+        """Test handling of TTS service failure."""
+        # Arrange
+        original_text = TextContent(text="Hello world", language="en")
+        translated_text = TextContent(text="Hola mundo", language="es")
+        mock_stt_service.transcribe.return_value = original_text
+        mock_translation_service.translate.return_value = translated_text
+        mock_tts_service.synthesize.side_effect = Exception("TTS service unavailable")
+        # Act
+        result = audio_processing_service.process_audio_pipeline(
+            audio=sample_audio,
+            target_language="es",
+            voice_settings=sample_voice_settings
+        )
+        # Assert
+        assert result.success is False
+        assert "Speech synthesis failed" in result.error_message
+        assert result.processing_time >= 0