Spaces:

DroolingPanda
/

teachingAssistant

Sleeping

Michael Hu

refactor tts module

7495571 3 months ago

4.33 kB

	import logging
	import os
	import time
	import numpy as np
	import soundfile as sf
	from typing import Optional, Generator, Tuple, List
	from abc import ABC, abstractmethod

	# Configure logging
	logger = logging.getLogger(__name__)


	class TTSBase(ABC):
	"""Base class for all TTS engines

	This abstract class defines the interface that all TTS engines must implement.
	"""

	def __init__(self, lang_code: str = 'z'):
	"""Initialize the TTS engine

	Args:
	lang_code (str): Language code for the engine
	"""
	self.lang_code = lang_code

	@abstractmethod
	def generate_speech(self, text: str, voice: str = 'default', speed: float = 1.0) -> Optional[str]:
	"""Generate speech from text

	Args:
	text (str): Input text to synthesize
	voice (str): Voice ID to use
	speed (float): Speech speed multiplier

	Returns:
	Optional[str]: Path to the generated audio file or None if generation fails
	"""
	pass

	@abstractmethod
	def generate_speech_stream(self, text: str, voice: str = 'default', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
	"""Generate speech stream from text

	Args:
	text (str): Input text to synthesize
	voice (str): Voice ID to use
	speed (float): Speech speed multiplier

	Yields:
	tuple: (sample_rate, audio_data) pairs for each segment
	"""
	pass

	def _generate_output_path(self, prefix: str = "tts", extension: str = "wav") -> str:
	"""Generate a unique output path for the audio file

	Args:
	prefix (str): Prefix for the filename
	extension (str): File extension

	Returns:
	str: Path to the output file
	"""
	timestamp = int(time.time() * 1000)
	filename = f"{prefix}_{timestamp}.{extension}"
	output_dir = os.path.join(os.getcwd(), "output")
	os.makedirs(output_dir, exist_ok=True)
	return os.path.join(output_dir, filename)


	class DummyTTS(TTSBase):
	"""Dummy TTS engine that generates sine wave audio

	This class is used as a fallback when no other TTS engine is available.
	"""

	def generate_speech(self, text: str, voice: str = 'default', speed: float = 1.0) -> str:
	"""Generate a dummy sine wave audio file

	Args:
	text (str): Input text (not used)
	voice (str): Voice ID (not used)
	speed (float): Speech speed multiplier (not used)

	Returns:
	str: Path to the generated audio file
	"""
	logger.info(f"Generating dummy speech for text length: {len(text)}")

	# Generate a simple sine wave
	sample_rate = 24000
	duration = min(len(text) / 20, 10) # Rough approximation of speech duration
	t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
	audio = 0.5 * np.sin(2 * np.pi * 440 * t) # 440 Hz sine wave

	# Save to file
	output_path = self._generate_output_path(prefix="dummy")
	sf.write(output_path, audio, sample_rate)

	logger.info(f"Generated dummy audio: {output_path}")
	return output_path

	def generate_speech_stream(self, text: str, voice: str = 'default', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
	"""Generate a dummy sine wave audio stream

	Args:
	text (str): Input text (not used)
	voice (str): Voice ID (not used)
	speed (float): Speech speed multiplier (not used)

	Yields:
	tuple: (sample_rate, audio_data) pairs
	"""
	logger.info(f"Generating dummy speech stream for text length: {len(text)}")

	# Generate a simple sine wave
	sample_rate = 24000
	duration = min(len(text) / 20, 10) # Rough approximation of speech duration
	t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
	audio = 0.5 * np.sin(2 * np.pi * 440 * t) # 440 Hz sine wave

	# Yield the audio data
	yield sample_rate, audio