import torch
from TTS.api import TTS
import os
import docx2txt
import gradio as gr
import json

folder = '.'


os.environ["COQUI_TOS_AGREED"] = "1"

device = "cuda" if torch.cuda.is_available() else "cpu"

tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)

folder = '.'

with open(f"{folder}/config.json") as f:
    config_settings = json.load(f)

def generate_voiceover(text, voice, n, name):
    if not os.path.exists(f"{folder}/Result/{voice}"):
        os.makedirs(f"{folder}/Result/{voice}")
    file_path = f"{folder}/Result/{voice}/{name}_{n}.mp3"
    tts.tts_to_file(text=text,  speaker_wav=f'{folder}/voices/{voice}.mp3', language=config_settings["voices"][f'{voice}.mp3'], file_path= file_path)
    # speed up for 1.2
    os.system("play " +file_path+" tempo {}".format(config_settings["speed"]))

    return file_path


def process_file(file, voice):
    if file is None:
        return "Please upload a file.", None

    file_type = file.name.split(".")[-1]

    if file_type == "txt":
        with open(file) as file_:
          text = file_.read()
    elif file_type == "docx":
        text  = docx2txt.process()
    else:
        return "Unsupported file type.", None

    if not text.strip():
        return "File is empty. Please upload a file with content.", None

    paragraphs = text.split("\n\n")
    print(paragraphs)
    audio_outputs = []
    n=1
    for paragraph in paragraphs:
        if paragraph.strip():
            audio_placeholder = generate_voiceover(paragraph, voice, n, file.name.split("/")[-1].split(".")[0])
            audio_outputs.append(audio_placeholder)
            n+=1

    return "\n\n".join(audio_outputs)

# all fileanmes in voices folder
voices = [f.name.split(".")[0] for f in os.scandir("voices") if f.is_file()]


def main():
    gr.Interface(
        fn=process_file,
        inputs=[
            gr.File(file_types=['.txt', '.docx'], label="Upload a file"),

            gr.Dropdown(voices, label="Select a language")
        ],
        outputs=[
            gr.Textbox(label="Voiceover Text")
        ],
        title="Text to Speech App",
        description="Upload a .txt or .docx file, select a language, and generate voiceovers for the content."
    ).launch(debug=True)

if __name__ == "__main__":
    main()