import os
import sys
import gradio as gr
import numpy as np
import subprocess
import scipy.io.wavfile
from pathlib import Path

# Ensure the repository is cloned
REPO_URL = "https://github.com/hpbyte/myanmar-tts.git"
REPO_DIR = "myanmar-tts"

def setup():
    """Set up the environment by cloning the repository if needed."""
    if not os.path.exists(REPO_DIR):
        print(f"Cloning {REPO_URL}...")
        subprocess.run(["git", "clone", REPO_URL], check=True)
    
    # Add the repository to Python path
    repo_path = os.path.abspath(REPO_DIR)
    if repo_path not in sys.path:
        sys.path.append(repo_path)
    
    # Create model directory if it doesn't exist
    if not os.path.exists("trained_model"):
        os.makedirs("trained_model")

def text_to_speech(text):
    """Convert text to speech using Myanmar TTS."""
    if not text.strip():
        return None, "Please enter some text."
    
    try:
        # Try to import the necessary modules
        try:
            import torch
            from text import text_to_sequence
            from utils.hparams import create_hparams
            from train import load_model
            from synthesis import generate_speech
        except ImportError:
            # If direct import fails, try to import from the local module
            import torch
            from myanmar_tts import synthesize
            
            # Use the simplified wrapper function
            waveform, sample_rate = synthesize(text)
            output_path = "output.wav"
            scipy.io.wavfile.write(output_path, sample_rate, waveform)
            return output_path, "Speech generated successfully!"
        
        # If direct import worked, continue with standard approach
        checkpoint_path = os.path.join("trained_model", "checkpoint_latest.pth.tar")
        config_path = os.path.join("trained_model", "hparams.yml")
        
        if not os.path.exists(checkpoint_path) or not os.path.exists(config_path):
            return None, f"""Model files not found. Please upload:
1. The checkpoint file to: {checkpoint_path}
2. The hparams.yml file to: {config_path}

You can obtain these files from the original repository."""
        
        # Load model and hyperparameters
        hparams = create_hparams(config_path)
        model = load_model(hparams)
        model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu'))['state_dict'])
        model.eval()
        
        # Process text input
        sequence = np.array(text_to_sequence(text, ['burmese_cleaners']))[None, :]
        sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cpu().long()
        
        # Generate mel spectrograms
        mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
        
        # Generate waveform
        with torch.no_grad():
            waveform = generate_speech(mel_outputs_postnet, hparams)
        
        # Save and return the audio
        output_path = "output.wav"
        scipy.io.wavfile.write(output_path, hparams.sampling_rate, waveform)
        
        return output_path, "Speech generated successfully!"
        
    except Exception as e:
        error_msg = str(e)
        detailed_msg = f"""Error: {error_msg}

Make sure you have:
1. Uploaded the model files to the 'trained_model' directory
2. The files are correctly named 'checkpoint_latest.pth.tar' and 'hparams.yml'

If you're still seeing this error, please check the repository for any specific setup instructions."""
        
        return None, detailed_msg

# Set up the environment
setup()

# Create Gradio interface
demo = gr.Interface(
    fn=text_to_speech,
    inputs=[
        gr.Textbox(
            lines=3, 
            placeholder="Enter Burmese text here...", 
            label="Text"
        )
    ],
    outputs=[
        gr.Audio(label="Generated Speech"),
        gr.Textbox(label="Status", max_lines=10)
    ],
    title="Myanmar (Burmese) Text-to-Speech",
    description="""
    This is a demo of the Myanmar Text-to-Speech system developed by hpbyte.
    Enter Burmese text in the box below and click 'Submit' to generate speech.
    
    **Important**: You need to upload the model files to the 'trained_model' directory:
    - checkpoint_latest.pth.tar (the model checkpoint)
    - hparams.yml (hyperparameters configuration)
    
    Source: [GitHub Repository](https://github.com/hpbyte/myanmar-tts)
    """,
    examples=[
        ["မင်္ဂလာပါ"],
        ["မြန်မာစကားပြောစနစ်ကို ကြိုဆိုပါတယ်"],
        ["ဒီစနစ်ဟာ မြန်မာစာကို အသံအဖြစ် ပြောင်းပေးနိုင်ပါတယ်"],
    ]
)

if __name__ == "__main__":
    demo.launch()