Spaces:

mhanagan
/

gpt-4-turbo-chatbot

Running

App Files Files Community

gpt-4-turbo-chatbot / mattgpt-text-to-audio.py

mhanagan

Upload folder using huggingface_hub

a63d350 verified about 1 year ago

raw

history blame contribute delete

2.58 kB

	#!/usr/bin/env rye run python
	import gradio as gr
	import time
	from pathlib import Path
	from openai import OpenAI
	import io
	from dotenv import load_dotenv

	# Load environment variables from .env file
	load_dotenv()
	# Access the API key using the variable name defined in the .env file
	api_key = os.getenv("OPENAI_API_KEY")


	speech_file_path = Path(__file__).parent / "speech.mp3"

	def main() -> None:
	# Prompting user to input the text they want to convert to speech
	user_input = input("Please enter the text you want to convert to speech: ")

	# Stream the user's input text to speakers
	stream_to_speakers(user_input)

	# Create text-to-speech audio file with user input
	with openai.audio.speech.with_streaming_response.create(
	model="tts-1",
	voice="alloy",
	input=user_input,
	) as response:
	response.stream_to_file(speech_file_path)

	# Create transcription from audio file
	transcription = openai.audio.transcriptions.create(
	model="whisper-1",
	file=speech_file_path,
	)
	print(transcription.text)

	# Create translation from audio file
	translation = openai.audio.translations.create(
	model="whisper-1",
	file=speech_file_path,
	)
	print(translation.text)
	def stream_to_speakers(user_input: str) -> None:
	import pyaudio
	import io # We'll need the 'io' module

	player_stream = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1, rate=24000, output=True)

	start_time = time.time()

	with openai.audio.speech.with_streaming_response.create(
	model="tts-1",
	voice="alloy",
	response_format="pcm",
	input=user_input,
	) as response:
	print(f"Time to first byte: {int((time.time() - start_time) * 1000)}ms")

	# Create an in-memory buffer to hold audio chunks
	audio_buffer = io.BytesIO()

	for chunk in response.iter_bytes(chunk_size=1024):
	audio_buffer.write(chunk) # Write chunks to buffer
	player_stream.write(audio_buffer.getvalue()) # Play from buffer
	audio_buffer.seek(0) # Reset buffer position
	audio_buffer.truncate() # Clear the buffer

	print(f"Done in {int((time.time() - start_time) * 1000)}ms.")

	if __name__ == "__main__":
	main()


	# Create a Gradio interface
	iface = gr.Interface(
	fn=stream_to_speakers,
	inputs="text",
	outputs="audio",
	title="MattGPT Text to Speech",
	description="Enter text and hear it converted to speech."
	)

	# Launch the interface
	iface.launch(share=True)