import streamlit as st from openai import OpenAI import os import base64 from pathlib import Path st.set_page_config(page_title="SpeakEasy", layout="wide") def img_to_bytes(img_path): img_bytes = Path(img_path).read_bytes() encoded = base64.b64encode(img_bytes).decode() return encoded def img_to_html(img_path, width='25', height='25'): img_html = f"" return img_html st.markdown(""" """, unsafe_allow_html=True) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) # Header logo_html = img_to_html('clear_productonics_logo.png', width='50', height='50') st.markdown(f"""
{logo_html}

SpeakEasy

Professional Text-to-Speech Conversion

""", unsafe_allow_html=True) # Get API key from environment variable api_key = os.getenv("OPENAI_API_KEY") if not api_key: st.error("OpenAI API Key not found. Please set it in your .env file.") else: st.sidebar.success("API Key loaded successfully!") # OpenAI TTS voices openai_voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"] # Main content # st.markdown('
', unsafe_allow_html=True) st.markdown('

Text to Speech Converter

', unsafe_allow_html=True) col1, col2 = st.columns([3, 2]) output_filename = None # Initialize output_filename in a broader scope with col2: uploaded_file = st.file_uploader("Upload a text file", type=["txt"]) with col1: text_input = st.text_area("Enter text or paste content", height=150) col1_1, col1_2 = st.columns(2) with col1_1: st.markdown(""" """, unsafe_allow_html=True) with col1_2: voice_options = "".join([f'' for voice in openai_voices]) st.markdown(f""" """, unsafe_allow_html=True) if st.button("Convert to Speech"): if not api_key: st.error("OpenAI API Key not found. Please set it in your .env file.") elif text_input or uploaded_file: text = text_input if text_input else uploaded_file.read().decode("utf-8").strip() client = OpenAI(api_key=api_key) try: # Progress bar progress_bar = st.progress(0) status_text = st.empty() # Translate the text using GPT-4 status_text.text("Translating text...") progress_bar.progress(25) translation_response = client.chat.completions.create( model="gpt-4-0613", messages=[ {"role": "system", "content": f"Translate all text given to {language}"}, {"role": "user", "content": text}, ], max_tokens=300 ) translated_text = translation_response.choices[0].message.content progress_bar.progress(50) # Generate speech using OpenAI TTS status_text.text("Generating speech...") speech_response = client.audio.speech.create( model="tts-1", voice=voice, input=translated_text ) progress_bar.progress(75) output_filename = f"output_{language.lower()}_{voice.lower()}.mp3" with open(output_filename, "wb") as f: f.write(speech_response.content) progress_bar.progress(100) status_text.text("Speech generated successfully!") st.write(f"Speech generated successfully for {language} with {voice} voice") st.audio(output_filename, format='audio/mp3') except Exception as e: st.error(f"An error occurred: {str(e)}") else: st.warning("Please provide some text for speech generation.") # Download section if output_filename: st.markdown("---") st.markdown('

Download Your Audio

', unsafe_allow_html=True) col3, col4 = st.columns([1, 4]) with col3: st.image("https://img.icons8.com/fluency-systems-filled/96/3B82F6/download.png", width=60) with col4: st.markdown('

Ready to Download

', unsafe_allow_html=True) st.markdown( """
""", unsafe_allow_html=True ) st.download_button("Download Audio", data=open(output_filename, 'rb').read(), file_name=output_filename) st.markdown('
', unsafe_allow_html=True) # Footer st.markdown( """ """, unsafe_allow_html=True )