import os, io, uuid, re, tempfile, traceback
from typing import List

# ---- Make Spaces happy: force CPU & avoid MPS/CUDA surprises ----
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
os.environ.setdefault("COQUI_TOS_AGREED", "1")  # add this line

import numpy as np
import gradio as gr

# Lazy flags
_TTS = None
_SR = 24000  # XTTS v2 typical output rate

# ---------- Utilities ----------
_SENT_SPLIT = re.compile(r"(?<=[\.\!\?\:\;\n])\s+")

def chunk_text(text: str, max_len: int = 480) -> List[str]:
    text = re.sub(r"\s+", " ", text).strip()
    if not text:
        return []
    if len(text) <= max_len:
        return [text]
    sents = [s.strip() for s in _SENT_SPLIT.split(text) if s.strip()]
    chunks, buf = [], ""
    for s in sents:
        if len(buf) + 1 + len(s) <= max_len:
            buf = f"{buf} {s}".strip() if buf else s
        else:
            if buf:
                chunks.append(buf)
            if len(s) > max_len:  # very long single sentence
                for i in range(0, len(s), max_len):
                    chunks.append(s[i:i+max_len])
                buf = ""
            else:
                buf = s
    if buf:
        chunks.append(buf)
    return chunks

def read_text_from_file(file_obj) -> str:
    if not file_obj:
        return ""
    # gr.File in v4 gives a TempFile with .name path string
    path = getattr(file_obj, "name", None)
    if not path or not os.path.exists(path):
        return ""
    ext = os.path.splitext(path)[1].lower()
    if ext == ".txt":
        with open(path, "rb") as f:
            return f.read().decode("utf-8", errors="ignore")
    elif ext == ".docx":
        try:
            import docx
        except Exception:
            raise gr.Error("python-docx not installed. Check requirements.txt")
        d = docx.Document(path)
        return "\n".join(p.text for p in d.paragraphs).strip()
    else:
        raise gr.Error("Unsupported file type. Please upload .txt or .docx")

def get_tts():
    global _TTS, _SR
    if _TTS is None:
        try:
            from TTS.api import TTS
        except Exception as e:
            raise gr.Error(
                "Coqui TTS is not installed or failed to import. "
                "Make sure your Space installed requirements.txt.\n\n" + str(e)
            )
        # CPU-safe init
        _TTS = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=False)
        # sample rate if exposed
        _SR = int(getattr(_TTS, "output_sample_rate", 24000) or 24000)
    return _TTS

def safe_concat_wav(chunks_audio: List[np.ndarray], sr: int, out_path: str) -> str:
    import soundfile as sf
    with sf.SoundFile(out_path, mode="w", samplerate=sr, channels=1, subtype="PCM_16") as f:
        for a in chunks_audio:
            a = np.asarray(a).flatten().astype("float32")
            # guard against NaNs/Infs
            a = np.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
            # clamp to [-1, 1]
            a = np.clip(a, -1.0, 1.0)
            f.write(a)
    return out_path

# ---------- Core pipeline ----------
def synthesize_pipeline(text_input, file_input, language, voice_ref):
    # Gather text
    user = (text_input or "").strip()
    from_file = read_text_from_file(file_input) if file_input else ""
    final_text = (user + ("\n" if user and from_file else "") + from_file).strip()

    if not final_text:
        raise gr.Error("Please paste/type text or upload a .txt/.docx file.")

    # Limit very long inputs so Spaces don't OOM
    if len(final_text) > 20000:
        final_text = final_text[:20000] + " ..."

    chunks = chunk_text(final_text, max_len=480)
    if not chunks:
        raise gr.Error("No readable text found.")

    tts = get_tts()

    # Optional voice clone
    speaker_wav = None
    if voice_ref is not None:
        try:
            speaker_wav = getattr(voice_ref, "name", None)
        except Exception:
            speaker_wav = None

    # Synthesize
    audios = []
    for i, ch in enumerate(chunks, 1):
        audio = tts.tts(text=ch, language=language, speaker_wav=speaker_wav)
        audios.append(audio)

    # Write single WAV
    out_path = os.path.join(tempfile.gettempdir(), f"tts_{uuid.uuid4().hex}.wav")
    return safe_concat_wav(audios, _SR, out_path)

# ---------- Gradio UI ----------
LANG_OPTIONS = [
    ("English", "en"), ("Spanish", "es"), ("French", "fr"), ("German", "de"),
    ("Italian", "it"), ("Portuguese", "pt"), ("Polish", "pl"), ("Turkish", "tr"),
    ("Russian", "ru"), ("Dutch", "nl"), ("Chinese (Simplified)", "zh-cn"),
    ("Japanese", "ja"), ("Korean", "ko"), ("Arabic", "ar"),
]

with gr.Blocks(title="High-Quality TTS (XTTS v2)") as demo:
    gr.Markdown(
        """
        # 🔊 High-Quality Text-to-Speech (Coqui XTTS v2)
        - **Type/paste** text or **upload** `.docx` / `.txt`
        - Optional: upload a short **.wav** (10–30s) to clone voice
        - Click **Generate Audio**
        """
    )
    text_in = gr.Textbox(label="Type or paste text", lines=8, placeholder="Paste text here…")
    file_in = gr.File(label="Drag & drop .docx / .txt (optional)", file_types=[".docx", ".txt"])
    with gr.Row():
        voice_ref = gr.File(label="Optional voice reference (.wav, 10–30s)", file_types=[".wav"])
        lang = gr.Dropdown(
            choices=[code for (_, code) in LANG_OPTIONS],
            value="en",
            label="Language",
        )
    run_btn = gr.Button("🎙️ Generate Audio", variant="primary")
    audio_out = gr.Audio(label="Result", type="filepath", autoplay=True)
    download = gr.File(label="Download WAV")
    err_box = gr.Markdown("", elem_id="error_box")

    def run(text_input, file_input, language, voice_ref_file):
        try:
            path = synthesize_pipeline(text_input, file_input, language, voice_ref_file)
            return path, path, ""  # clear errors
        except Exception as e:
            tb = traceback.format_exc()
            # Show a compact, readable error in the UI
            msg = f"**Error:** {e}\n\n```\n{tb[-1500:]}\n```"
            return None, None, msg

    run_btn.click(
        run,
        inputs=[text_in, file_in, lang, voice_ref],
        outputs=[audio_out, download, err_box],
    )

if __name__ == "__main__":
    demo.launch()