# if you dont use pipenv uncomment the following: from dotenv import load_dotenv load_dotenv() #Step1a: Setup Text to Speech–TTS–model with gTTS import os from gtts import gTTS def text_to_speech_with_gtts_old(input_text, output_filepath): language="en" audioobj= gTTS( text=input_text, lang=language, slow=False ) audioobj.save(output_filepath) # input_text="Hi" # text_to_speech_with_gtts_old(input_text=input_text, output_filepath="gtts_testing.mp3") #Step1b: Setup Text to Speech–TTS–model with ElevenLabs import elevenlabs from elevenlabs.client import ElevenLabs ELEVENLABS_API_KEY=os.environ.get("ELEVENLABS_API_KEY") def text_to_speech_with_elevenlabs_old(input_text, output_filepath): client=ElevenLabs(api_key=ELEVENLABS_API_KEY) audio=client.generate( text= input_text, voice= "Emily", output_format= "mp3_22050_32", model= "eleven_turbo_v2" ) elevenlabs.save(audio, output_filepath) # text_to_speech_with_elevenlabs_old(input_text, output_filepath="elevenlabs_testing.mp3") # #Step2: Use Model for Text output to Voice # when the files of the doctor gets saved, they dont play automatically so we have to do this step 2 in order to automatically run the audio files. import subprocess import platform from pydub import AudioSegment from pydub.playback import play import tempfile def text_to_speech_with_gtts(input_text, output_filepath): language="en" audioobj= gTTS( text=input_text, lang=language, slow=False ) audioobj.save(output_filepath) os_name = platform.system() try: if os_name == "Darwin": # macOS subprocess.run(['afplay', output_filepath]) elif os_name == "Windows": # Windows subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();']) elif os_name == "Linux": # Linux subprocess.run(['aplay', output_filepath]) # Alternative: use 'mpg123' or 'ffplay' else: raise OSError("Unsupported operating system") except Exception as e: print(f"An error occurred while trying to play the audio: {e}") # input_text="Hi" # #text_to_speech_with_gtts(input_text=input_text, output_filepath="gtts_testing_autoplay.mp3") def play_audio(file_path): os_name = platform.system() try: if os_name == "Darwin": # macOS subprocess.run(['afplay', file_path]) elif os_name == "Windows": # Windows # Load MP3 and convert to WAV for playback audio = AudioSegment.from_mp3(file_path) # Create a temporary WAV file with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_wav: wav_path = temp_wav.name audio.export(wav_path, format='wav') # Play the WAV file subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{wav_path}").PlaySync();']) # Clean up temporary file os.unlink(wav_path) elif os_name == "Linux": # Linux subprocess.run(['mpg123', file_path]) # Using mpg123 for MP3 playback else: raise OSError("Unsupported operating system") except Exception as e: print(f"An error occurred while trying to play the audio: {e}") def text_to_speech_with_elevenlabs(input_text, output_filepath): client = ElevenLabs(api_key=ELEVENLABS_API_KEY) audio = client.generate( text=input_text, voice="Aria", output_format="mp3_22050_32", model="eleven_turbo_v2" ) elevenlabs.save(audio, output_filepath) # Play the audio play_audio(output_filepath) return output_filepath # text_to_speech_with_elevenlabs(input_text, output_filepath="elevenlabs_testing_autoplay.mp3")