Spaces:
Build error
Build error
#!/usr/bin/env rye run python | |
import gradio as gr | |
import time | |
from pathlib import Path | |
from openai import OpenAI | |
import io | |
from dotenv import load_dotenv | |
# Load environment variables from .env file | |
load_dotenv() | |
# Access the API key using the variable name defined in the .env file | |
api_key = os.getenv("OPENAI_API_KEY") | |
speech_file_path = Path(__file__).parent / "speech.mp3" | |
def main() -> None: | |
# Prompting user to input the text they want to convert to speech | |
user_input = input("Please enter the text you want to convert to speech: ") | |
# Stream the user's input text to speakers | |
stream_to_speakers(user_input) | |
# Create text-to-speech audio file with user input | |
with openai.audio.speech.with_streaming_response.create( | |
model="tts-1", | |
voice="alloy", | |
input=user_input, | |
) as response: | |
response.stream_to_file(speech_file_path) | |
# Create transcription from audio file | |
transcription = openai.audio.transcriptions.create( | |
model="whisper-1", | |
file=speech_file_path, | |
) | |
print(transcription.text) | |
# Create translation from audio file | |
translation = openai.audio.translations.create( | |
model="whisper-1", | |
file=speech_file_path, | |
) | |
print(translation.text) | |
def stream_to_speakers(user_input: str) -> None: | |
import pyaudio | |
import io # We'll need the 'io' module | |
player_stream = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1, rate=24000, output=True) | |
start_time = time.time() | |
with openai.audio.speech.with_streaming_response.create( | |
model="tts-1", | |
voice="alloy", | |
response_format="pcm", | |
input=user_input, | |
) as response: | |
print(f"Time to first byte: {int((time.time() - start_time) * 1000)}ms") | |
# Create an in-memory buffer to hold audio chunks | |
audio_buffer = io.BytesIO() | |
for chunk in response.iter_bytes(chunk_size=1024): | |
audio_buffer.write(chunk) # Write chunks to buffer | |
player_stream.write(audio_buffer.getvalue()) # Play from buffer | |
audio_buffer.seek(0) # Reset buffer position | |
audio_buffer.truncate() # Clear the buffer | |
print(f"Done in {int((time.time() - start_time) * 1000)}ms.") | |
if __name__ == "__main__": | |
main() | |
# Create a Gradio interface | |
iface = gr.Interface( | |
fn=stream_to_speakers, | |
inputs="text", | |
outputs="audio", | |
title="MattGPT Text to Speech", | |
description="Enter text and hear it converted to speech." | |
) | |
# Launch the interface | |
iface.launch(share=True) |