Spaces:

CineAI
/

Chelsea

Sleeping

v0.0.1_DigItBMTH

d5436e0 7 months ago

1.67 kB

	import logging

	from io import BytesIO
	from typing import Optional

	import librosa
	import soundfile as sf
	from streamlit_TTS import auto_play, text_to_audio

	from .config import pipe_tts

	SAMPLING_RATE = 16_000


	class T2A:
	def __init__(self, input_text: Optional[str] = None):
	self.text = input_text
	self.output_model = pipe_tts(input_text)

	def __get_duration(self, raw: bytes) -> float:
	chunk = BytesIO(raw)
	audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE)
	duration = librosa.get_duration(y=audio, sr=sample_rate)
	return duration

	def autoplay(self, lang: str = "en") -> None:
	if self.text is not None:
	if isinstance(self.text, str):
	audio = text_to_audio(self.text, language=lang)
	auto_play(audio)
	else: # more checking
	text = f"Text you provide is {type(self.text)} accepted only string type"
	audio = text_to_audio(text, language=lang)
	auto_play(audio)
	else:
	raise Exception("Text is None")

	def get_audio(self) -> tuple[bytes, int, float]:
	try:
	synth = self.output_model["audio"][0]

	print(f"synth : {synth}")

	with BytesIO() as buffer:
	sf.write(buffer, synth, SAMPLING_RATE, format='wav')
	output = buffer.getvalue() # bytes

	print(f"type : {type(output)}")

	duration = self.__get_duration(output)

	print(f"duration : {duration}")

	return output, SAMPLING_RATE, duration
	except Exception as e:
	logging.error(e)