Spaces:
Sleeping
Sleeping
import json | |
import openai | |
from elevenlabs import VoiceSettings | |
from src.config import ( | |
DEFAULT_TTS_SIMILARITY_BOOST, | |
DEFAULT_TTS_STABILITY, | |
DEFAULT_TTS_STABILITY_ACCEPTABLE_RANGE, | |
DEFAULT_TTS_STYLE, | |
OPENAI_API_KEY, | |
logger, | |
) | |
from src.prompts import EMOTION_STABILITY_MODIFICATION | |
from src.schemas import TTSParams | |
from src.utils import GPTModels, auto_retry | |
class TTSParamProcessor: | |
# TODO: refactor to langchain function (?) | |
def __init__(self): | |
self.client = openai.AsyncOpenAI(api_key=OPENAI_API_KEY) | |
def _wrap_results(data: dict, default_text: str) -> TTSParams: | |
stability = data.get('stability', DEFAULT_TTS_STABILITY) | |
stability = max(stability, DEFAULT_TTS_STABILITY_ACCEPTABLE_RANGE[0]) | |
stability = min(stability, DEFAULT_TTS_STABILITY_ACCEPTABLE_RANGE[1]) | |
similarity_boost = DEFAULT_TTS_SIMILARITY_BOOST | |
style = DEFAULT_TTS_STYLE | |
params = TTSParams( | |
# NOTE: voice will be set later in the builder pipeline | |
voice_id='', | |
text=default_text, | |
# reference: https://elevenlabs.io/docs/speech-synthesis/voice-settings | |
voice_settings=VoiceSettings( | |
stability=stability, | |
similarity_boost=similarity_boost, | |
style=style, | |
use_speaker_boost=False, | |
), | |
) | |
return params | |
async def run(self, text: str) -> TTSParams: | |
text_prepared = text.strip() | |
completion = await self.client.chat.completions.create( | |
model=GPTModels.GPT_4o, | |
messages=[ | |
{"role": "system", "content": EMOTION_STABILITY_MODIFICATION}, | |
{"role": "user", "content": text_prepared}, | |
], | |
response_format={"type": "json_object"}, | |
) | |
chatgpt_output = completion.choices[0].message.content | |
if chatgpt_output is None: | |
raise ValueError(f'received None as openai response content') | |
try: | |
output_dict = json.loads(chatgpt_output) | |
logger.info(f"TTS text processing succeeded: {output_dict}") | |
except json.JSONDecodeError as e: | |
logger.exception(f"Error in parsing LLM output: '{chatgpt_output}'") | |
raise e | |
output_wrapped = self._wrap_results(output_dict, default_text=text_prepared) | |
return output_wrapped | |