import gc
import logging
import threading

import torch
from transformers import LlamaTokenizer

from modules import config
from modules.ChatTTS import ChatTTS
from modules.devices import devices

logger = logging.getLogger(__name__)

chat_tts = None
lock = threading.Lock()


def load_chat_tts_in_thread():
    global chat_tts
    if chat_tts:
        return

    logger.info("Loading ChatTTS models")
    chat_tts = ChatTTS.Chat()
    device = devices.get_device_for("chattts")
    dtype = devices.dtype
    chat_tts.load_models(
        compile=config.runtime_env_vars.compile,
        source="local",
        local_path="./models/ChatTTS",
        device=device,
        dtype=dtype,
        dtype_vocos=devices.dtype_vocos,
        dtype_dvae=devices.dtype_dvae,
        dtype_gpt=devices.dtype_gpt,
        dtype_decoder=devices.dtype_decoder,
    )

    # 如果 device 为 cpu 同时，又是 dtype == float16 那么报 warn
    # 提示可能无法正常运行，建议使用 float32 即开启 `--no_half` 参数
    if device == devices.cpu and dtype == torch.float16:
        logger.warning(
            "The device is CPU and dtype is float16, which may not work properly. It is recommended to use float32 by enabling the `--no_half` parameter."
        )

    devices.torch_gc()
    logger.info("ChatTTS models loaded")


def load_chat_tts():
    with lock:
        if chat_tts is None:
            load_chat_tts_in_thread()
    if chat_tts is None:
        raise Exception("Failed to load ChatTTS models")
    return chat_tts


def unload_chat_tts():
    logging.info("Unloading ChatTTS models")
    global chat_tts

    if chat_tts:
        for model_name, model in chat_tts.pretrain_models.items():
            if isinstance(model, torch.nn.Module):
                model.cpu()
                del model
    chat_tts = None
    devices.torch_gc()
    gc.collect()
    logger.info("ChatTTS models unloaded")


def reload_chat_tts():
    logging.info("Reloading ChatTTS models")
    unload_chat_tts()
    instance = load_chat_tts()
    logger.info("ChatTTS models reloaded")
    return instance


def get_tokenizer() -> LlamaTokenizer:
    chat_tts = load_chat_tts()
    tokenizer = chat_tts.pretrain_models["tokenizer"]
    return tokenizer