Xylaria-TTS

Running

File size: 7,126 Bytes

import streamlit as st
import openai
from kokoro import KPipeline
import soundfile as sf
import io

# Streamlit App UI Setup
st.title("Text-to-Speech Translator with Kokoro")

# Expander section to display information in multiple languages
with st.expander("Sample Prompt!"):
    st.markdown(""" 
    - My name is Shukdev. (In English) 
    - Mi nombre es Shukdev. (In Spanish) 
    - Je m'appelle Choukdev. (In French) 
    - मेरा नाम शुकदेव है. (In Hindi) 
    - Il mio nome è Shukdev. (In Italy) 
    - Meu nome é Sukhdev. (In Portuguese, Brazil) 
    - 我叫苏赫德夫。(In Chinese) 
    - 私の名前はスクデフです。(In Japanese) 
    """)

st.sidebar.markdown("""
        ### Courtesy: [Kokoro](https://huggingface.co/hexgrad/Kokoro-82M?fbclid=IwY2xjawIKqzxleHRuA2FlbQIxMAABHaf9GldgYOzXktNuoRtNKqd-aL7r-S7zPGyC8ttYOiG2zYfQqLyV4Qm75A_aem_0wKLC2C87ZZ2F04WjPJbtA)
    """)

st.sidebar.header("Configuration & Instructions")

st.sidebar.markdown("""
### How to Use the Text-to-Speech App:
1. **Enter Text**:
   - Type or paste the text you want to convert to speech in the main text area.
   
2. **Select Language**:
   - Choose the language of the input text. The available language options include:
     - 🇺🇸 American English (`a`)
     - 🇬🇧 British English (`b`)
     - 🇪🇸 Spanish (`e`)
     - 🇫🇷 French (`f`)
     - 🇮🇳 Hindi (`h`)
     - 🇮🇹 Italian (`i`)
     - 🇧🇷 Brazilian Portuguese (`p`)
     - 🇨🇳 Mandarin Chinese (`z`)
     - 🇯🇵 Japanese (`j`)
3. **Select Voice**:
   - Choose the voice you want for the speech. There are multiple voice styles based on tone and gender (e.g., af_heart, af_joy, etc.).
   
4. **Adjust Speech Speed**:
   - Use the slider to adjust how fast the speech will be generated. The speed can be set from 0.5x to 2.0x, with 1.0x being the default normal speed.
5. **Generate Speech**:
   - Once you've selected the text, language, voice, and speed, click the **"Generate Audio"** button. The app will process the text and generate the speech.
6. **Download Audio**:
   - After the audio is generated, you can play it directly within the app or download it as a .wav file by clicking the **"Download Audio"** button.
### Additional Features:
- **Text Translation**:
   - If you enter text in another language and want to hear it in English, provide your OpenAI API key (optional).
   - The app will automatically translate the text to English and generate the speech in English with the voice you selected. 
- Enjoy exploring different languages, voices, and speeds with the text-to-speech conversion!
""")

# User input for text, language, and voice settings
input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 
 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 
 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', 
 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', 
 'ef_dora', 
 'em_alex', 'em_santa', 
 'ff_siwis', 
 'hf_alpha', 'hf_beta', 
 'hm_omega', 'hm_psi', 
 'if_sara', 
 'im_nicola', 
 'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro', 
 'jm_kumo', 
 'pf_dora', 
 'pm_alex', 'pm_santa', 
 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi', 
 'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang']
)  # Change voice options as per model
speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)

# Initialize the TTS pipeline with user-selected language
pipeline = KPipeline(lang_code=lang_code)

# Function to get the OpenAI API key from the user (optional for translation)
openai_api_key = st.text_input("Enter your OpenAI API Key (Optional for Translation)", type="password")

# Function to translate text to English using OpenAI's Chat API
def translate_to_english(api_key, text, lang_code):
    openai.api_key = api_key
    try:
        # Construct the prompt for translation
        prompt = f"Translate the following text from {lang_code} to English: \n\n{text}"

        response = openai.ChatCompletion.create(
            model="gpt-4",  # Using ChatGPT model for translation
            messages=[{"role": "system", "content": "You are a helpful assistant that translates text."},
                      {"role": "user", "content": prompt}]
        )

        # Extract translated text from response, removing any additional context or prefixes
        translated_text = response['choices'][0]['message']['content'].strip()

        # Clean up any unwanted prefixes or context
        if translated_text.lower().startswith("the translated text"):
            translated_text = translated_text.split(":", 1)[1].strip()

        return translated_text
    except Exception as e:
        st.error(f"Error occurred during translation: {e}")
        return text  # Fallback to original text in case of an error

# Generate Audio function
def generate_audio(text, lang_code, voice, speed):
    generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
    for i, (gs, ps, audio) in enumerate(generator):
        audio_data = audio
        # Save audio to in-memory buffer
        buffer = io.BytesIO()
        # Explicitly specify format as WAV
        sf.write(buffer, audio_data, 24000, format='WAV')  # Add 'format="WAV"'
        buffer.seek(0)
        return buffer

# Generate and display the audio file
if st.button('Generate Audio'):

    # Generate audio for the original text
    st.write("Generating speech for the original text...")
    audio_buffer = generate_audio(input_text, lang_code, voice, speed)
    
    # Display Audio player for the original language
    st.audio(audio_buffer, format='audio/wav')

    # Optional: Save the generated audio file for download (Original Text)
    st.download_button(
        label="Download Audio (Original Text)",
        data=audio_buffer,
        file_name="generated_speech_original.wav",
        mime="audio/wav"
    )

    # Check if OpenAI API Key is provided for translation and English audio generation
    if openai_api_key:
        # Translate the input text to English using OpenAI
        translated_text = translate_to_english(openai_api_key, input_text, lang_code)

        # Generate audio for the translated English text
        translated_audio_buffer = generate_audio(translated_text, 'a', voice, speed)

        # Display Audio for the translated text
        st.write(f"Translated Text: {translated_text}")
        st.audio(translated_audio_buffer, format='audio/wav')

        # Optional: Save the generated audio file for download (Translated Text)
        st.download_button(
            label="Download Audio (Translated to English)",
            data=translated_audio_buffer,
            file_name="generated_speech_translated.wav",
            mime="audio/wav"
        )