Spaces:

DroolingPanda
/

teachingAssistant

Sleeping

teachingAssistant / src /domain /services /audio_processing_service.py

Michael Hu

Implement domain services

6aea21a 3 months ago

8.22 kB

	"""Concrete implementation of audio processing service."""

	import time
	from typing import TYPE_CHECKING

	from ..interfaces.audio_processing import IAudioProcessingService
	from ..interfaces.speech_recognition import ISpeechRecognitionService
	from ..interfaces.translation import ITranslationService
	from ..interfaces.speech_synthesis import ISpeechSynthesisService
	from ..models.processing_result import ProcessingResult
	from ..models.translation_request import TranslationRequest
	from ..models.speech_synthesis_request import SpeechSynthesisRequest
	from ..exceptions import (
	AudioProcessingException,
	SpeechRecognitionException,
	TranslationFailedException,
	SpeechSynthesisException
	)

	if TYPE_CHECKING:
	from ..models.audio_content import AudioContent
	from ..models.voice_settings import VoiceSettings


	class AudioProcessingService(IAudioProcessingService):
	"""Concrete implementation of audio processing pipeline orchestration."""

	def __init__(
	self,
	speech_recognition_service: ISpeechRecognitionService,
	translation_service: ITranslationService,
	speech_synthesis_service: ISpeechSynthesisService
	):
	"""
	Initialize the audio processing service with injected dependencies.

	Args:
	speech_recognition_service: Service for speech-to-text conversion
	translation_service: Service for text translation
	speech_synthesis_service: Service for text-to-speech synthesis
	"""
	self._speech_recognition_service = speech_recognition_service
	self._translation_service = translation_service
	self._speech_synthesis_service = speech_synthesis_service

	def process_audio_pipeline(
	self,
	audio: 'AudioContent',
	target_language: str,
	voice_settings: 'VoiceSettings'
	) -> 'ProcessingResult':
	"""
	Process audio through the complete pipeline: STT -> Translation -> TTS.

	Args:
	audio: The input audio content
	target_language: The target language for translation
	voice_settings: Voice settings for TTS synthesis

	Returns:
	ProcessingResult: The result of the complete processing pipeline

	Raises:
	AudioProcessingException: If any step in the pipeline fails
	"""
	start_time = time.time()

	try:
	# Validate inputs
	self._validate_pipeline_inputs(audio, target_language, voice_settings)

	# Step 1: Speech Recognition (STT)
	original_text = self._perform_speech_recognition(audio)

	# Step 2: Translation
	translated_text = self._perform_translation(original_text, target_language)

	# Step 3: Speech Synthesis (TTS)
	audio_output = self._perform_speech_synthesis(translated_text, voice_settings)

	# Calculate processing time
	processing_time = time.time() - start_time

	# Create successful result
	return ProcessingResult.success_result(
	original_text=original_text,
	translated_text=translated_text,
	audio_output=audio_output,
	processing_time=processing_time
	)

	except (SpeechRecognitionException, TranslationFailedException, SpeechSynthesisException) as e:
	# Handle domain-specific exceptions
	processing_time = time.time() - start_time
	return ProcessingResult.failure_result(
	error_message=str(e),
	processing_time=processing_time
	)
	except Exception as e:
	# Handle unexpected exceptions
	processing_time = time.time() - start_time
	error_message = f"Unexpected error in audio processing pipeline: {str(e)}"
	return ProcessingResult.failure_result(
	error_message=error_message,
	processing_time=processing_time
	)

	def _validate_pipeline_inputs(
	self,
	audio: 'AudioContent',
	target_language: str,
	voice_settings: 'VoiceSettings'
	) -> None:
	"""
	Validate inputs for the audio processing pipeline.

	Args:
	audio: The input audio content
	target_language: The target language for translation
	voice_settings: Voice settings for TTS synthesis

	Raises:
	AudioProcessingException: If validation fails
	"""
	if audio is None:
	raise AudioProcessingException("Audio content cannot be None")

	if not target_language or not target_language.strip():
	raise AudioProcessingException("Target language cannot be empty")

	if voice_settings is None:
	raise AudioProcessingException("Voice settings cannot be None")

	# Validate that voice settings language matches target language
	if voice_settings.language != target_language:
	raise AudioProcessingException(
	f"Voice settings language ({voice_settings.language}) must match "
	f"target language ({target_language})"
	)

	# Validate audio duration for processing limits
	if audio.duration > 300: # 5 minutes limit
	raise AudioProcessingException(
	f"Audio duration ({audio.duration:.1f}s) exceeds maximum allowed duration (300s)"
	)

	# Validate audio format is supported
	if not audio.is_valid_format:
	raise AudioProcessingException(f"Unsupported audio format: {audio.format}")

	def _perform_speech_recognition(self, audio: 'AudioContent') -> 'TextContent':
	"""
	Perform speech recognition on the input audio.

	Args:
	audio: The input audio content

	Returns:
	TextContent: The transcribed text

	Raises:
	SpeechRecognitionException: If transcription fails
	"""
	try:
	# Use a default STT model - this could be configurable in the future
	model = "whisper-base" # Default model
	return self._speech_recognition_service.transcribe(audio, model)
	except Exception as e:
	raise SpeechRecognitionException(f"Speech recognition failed: {str(e)}")

	def _perform_translation(self, text: 'TextContent', target_language: str) -> 'TextContent':
	"""
	Perform translation of the transcribed text.

	Args:
	text: The text to translate
	target_language: The target language for translation

	Returns:
	TextContent: The translated text

	Raises:
	TranslationFailedException: If translation fails
	"""
	try:
	# Check if translation is needed
	if text.language == target_language:
	# No translation needed, return original text
	return text

	# Create translation request
	translation_request = TranslationRequest(
	source_text=text,
	target_language=target_language
	)

	return self._translation_service.translate(translation_request)
	except Exception as e:
	raise TranslationFailedException(f"Translation failed: {str(e)}")

	def _perform_speech_synthesis(
	self,
	text: 'TextContent',
	voice_settings: 'VoiceSettings'
	) -> 'AudioContent':
	"""
	Perform speech synthesis on the translated text.

	Args:
	text: The text to synthesize
	voice_settings: Voice settings for synthesis

	Returns:
	AudioContent: The synthesized audio

	Raises:
	SpeechSynthesisException: If synthesis fails
	"""
	try:
	# Create speech synthesis request
	synthesis_request = SpeechSynthesisRequest(
	text_content=text,
	voice_settings=voice_settings
	)

	return self._speech_synthesis_service.synthesize(synthesis_request)
	except Exception as e:
	raise SpeechSynthesisException(f"Speech synthesis failed: {str(e)}")