import streamlit as st import soundfile as sf from dia.model import Dia import os import uuid import torch # Set page config st.set_page_config( page_title="Dia Text-to-Speech Converter", page_icon="🎙️", layout="centered" ) # Create directory for audio files os.makedirs("static/audio", exist_ok=True) # Initialize session state for model if 'model' not in st.session_state: try: device = "cuda" if torch.cuda.is_available() else "cpu" st.sidebar.info(f"Using device: {device}") with st.spinner("Loading Dia model..."): st.session_state.model = Dia.from_pretrained( "nari-labs/Dia-1.6B", compute_dtype="float16", device=device, ) if device == "cpu": st.session_state.model = st.session_state.model.eval() torch.set_num_threads(4) st.sidebar.success("Model loaded successfully with optimizations") except Exception as e: st.error(f"Error loading Dia model: {str(e)}") st.stop() # Function to generate audio def generate_audio(text): try: if not text: st.error("Text is required") return None output = st.session_state.model.generate(text) filename = f"audio_{uuid.uuid4()}.wav" filepath = f"static/audio/{filename}" os.makedirs(os.path.dirname(filepath), exist_ok=True) sf.write(filepath, output, 44100) return filepath except Exception as e: st.error(f"Error generating audio: {str(e)}") return None # UI st.title("🎙️ Dia - Text to Dialogue Demo") st.markdown("Enter a multi-speaker script below using `[S1]`, `[S2]`, etc.") # Text input text_input = st.text_area( "Script", value="[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face.", height=150 ) # Action selection action = st.selectbox( "Choose Action", ["Convert to Audio", "Summarize (Coming Soon)"], index=0 ) # Generate button if st.button("Generate Audio", type="primary"): if action == "Convert to Audio": with st.spinner("Generating audio..."): audio_file = generate_audio(text_input) if audio_file: st.success("Audio generated successfully!") st.audio(audio_file) # Download button with open(audio_file, "rb") as file: btn = st.download_button( label="Download Audio", data=file, file_name="generated_dialogue.wav", mime="audio/wav" ) else: st.error("Summarization not implemented yet") # Display info in sidebar st.sidebar.markdown("---") st.sidebar.markdown("Powered by Dia-1.6B AI Text-to-Dialogue Model")