import base64
import io
import json
import os
import uuid
from typing import Optional

import gradio as gr
import pandas as pd
import requests
from pydub import AudioSegment

from TTSs.base_tts import Base_TTS


class avaliable_voice_type:
    语言: Optional[str] = ""
    场景: Optional[str] = ""
    音色名称: str
    voice_type: str
    时间戳支持: bool = False
    支持情感与风格类型: Optional[str] = ""
    支持语言类型: Optional[str] = ""

    def __repr__(self):
        data = self.__dict__
        text = ""

        text += f"{data['音色名称']}"
        if data["语言"]:
            text += f"——{data['语言']}"
        if data["场景"]:
            text += f"——{data['场景']}"
        if data["支持情感与风格类型"]:
            text += f"——{data['支持情感与风格类型']}"
        if data["支持语言类型"]:
            text += f"——{data['支持语言类型']}"

        return text


class Volcengine_TTS(Base_TTS):

    def get_name(self):
        return '火山引擎'

    def __init__(self):
        self.useful_voice = self.get_data_map()

    def get_data_map(self, filename="voice_list.xlsx"):
        path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename)
        df = pd.read_excel(path)
        df.fillna('', inplace=True)

        useful_voice = {}
        for index, row in df.iterrows():
            data = avaliable_voice_type()
            data.语言 = row['语言']
            data.场景 = row['场景']
            data.音色名称 = row['音色名称']
            data.voice_type = row['voice_type']
            data.时间戳支持 = row['时间戳']
            data.支持情感与风格类型 = row['支持情感/风格类型']
            data.支持语言类型 = row['支持语言类型']
            useful_voice[str(data)] = data

        return useful_voice

    def _get_config_page(self):
        with gr.Group(visible=False) as config_volcengine:
            voices = list(self.useful_voice.keys())

            with gr.Row():
                volcengine_appid = gr.Textbox(label="volcengine的appid（默认为环境变量值）",
                                              placeholder="请输入volcengine的appid",
                                              type="password",
                                              interactive=True,
                                              value=os.environ.get('VOLCENGINE_APPID', ''))
                volcengine_access_token = gr.Textbox(label="volcengine的access_token（默认为环境变量值）",
                                                     placeholder="请输入volengine的access_token",
                                                     type="password",
                                                     interactive=True,
                                                     value=os.environ.get('VOLCENGINE_ACCESS_TOKEN', ''))

            voice_type = gr.Dropdown(choices=voices, value=voices[0], label="音色选择", interactive=True)

            with gr.Row():
                speed_ratio = gr.Slider(minimum=0.2, maximum=3, value=1, step=0.1, label="语速",
                                        interactive=True)
                volume_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音量",
                                         interactive=True)
                pitch_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音高",
                                        interactive=True)

            with gr.Row():
                emotion = gr.Textbox(label="情感/风格（还未适配）", placeholder="请输入情感", interactive=True)
                language = gr.Textbox(label="语言类型（还未适配）", placeholder="请输入语言", interactive=True)

        inputs = [
            volcengine_appid, volcengine_access_token, voice_type, speed_ratio,
            volume_ratio, pitch_ratio
        ]

        return config_volcengine, inputs

    def _generate(self, text, appid, access_token, voice, speed_ratio,
                  volume_ratio, pitch_ratio):
        host = "openspeech.bytedance.com"
        api_url = f"https://{host}/api/v1/tts"

        header = {"Authorization": f"Bearer;{access_token}"}

        request_json = {
            "app": {
                "appid": appid,
                "token": "access_token",
                "cluster": "volcano_tts"
            },
            "user": {
                "uid": "388808087185088"
            },
            "audio": {
                "voice_type": self.useful_voice[voice].voice_type,
                "encoding": "mp3",
                "speed_ratio": speed_ratio,
                "volume_ratio": volume_ratio,
                "pitch_ratio": pitch_ratio,
            },
            "request": {
                "reqid": str(uuid.uuid4()),
                "text": text,
                "text_type": "plain",
                "operation": "query",
                "with_frontend": 1,
                "frontend_type": "unitTson"

            }
        }

        resp = requests.post(api_url, data=json.dumps(request_json), headers=header)

        if "data" not in resp.json():
            raise Exception(resp.json())

        data = resp.json()["data"]
        mp3_file = base64.b64decode(data)

        original_audio = AudioSegment.from_mp3(io.BytesIO(mp3_file))

        return original_audio