import os
import io
import wave

import numpy as np
import gradio as gr

from openai import OpenAI
import google.generativeai as genai

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

from match_info_crawler import get_matches_info


USE_LOCAL_ASR_PIPELINE = True


# used for chat, if provided
GOOGLE_API_KEY = "" #if 'GOOGLE_API_KEY' not in os.environ else os.environ['GOOGLE_API_KEY']

# used for chat (2nd option) and for text-to-speech
OPENAI_API_KEY = "" if 'OPENAI_API_KEY' not in os.environ else os.environ['OPENAI_API_KEY']

# used for speech recognition, if USE_LOCAL_ASR_PIPELINE is true
assert 'HUGGINGFACE_API_KEY' in os.environ, "Hugging Face API key not found in environment variables"

USE_OPENAI_FOR_CHAT = (GOOGLE_API_KEY == "")

OPENAI_CLIENT = None
if OPENAI_API_KEY != "":
    OPENAI_CLIENT = OpenAI(api_key=OPENAI_API_KEY)

if GOOGLE_API_KEY != "":
    genai.configure(api_key=GOOGLE_API_KEY)

GOOGLE_GEN_CONFIG = genai.types.GenerationConfig(
            candidate_count=1,
            temperature=0.5)

AUDIO_OUT_FILE_PREFIX = "output"   # prefixo do nome do arquivo de áudio .wav

TEMPLATE_SYSTEM_MESSAGE = """Você é assistente virtual com a função é entreter uma criança de idade entre 6 e 8 anos que adora futebol. Diretrizes para a conversa: 
- Você é {GENRE}, seu nome é {NAME}.
- {PERSONALITY}
- Pergunte o nome da criança.
- Fale sobre futebol, times, jogadores, seleções e grandes jogos.
- Tente focar em Brasil, Inglaterra e Espanha.
- Você também pode informar os resultados de jogos de ontem, e jogos que ocorrerão hoje ou amanhã.
- Fale, no máximo, três frases por mensagem.
"""

# Mapeia a personalidade no template e na temperatura
PERSONALITIES = {
    "nova": ("Sua personalidade é bastante amigável e alegre, e um tanto infantil. Tente iniciar novos assuntos, quando a conversa estiver repetitiva. Conte piadas de futebol, de vez em quando.", 0.8, "F"),
    "echo": ("Sua personalidade é amigável, mas objetivo. Tente manter-se no mesmo assunto. Conte alguma curiosidade sobre um grande craque, de vez em quando.", 0.2, "M")
}

INITIAL_PERSON = "nova"


# Função para converter o histórico de chat para o formato esperado pela API do OpenAI
def to_openai_chat_history(system_prompt, chat_history, curr_message):
    prompt = [ { 'role': 'system', 'content': system_prompt } ]
    if len(chat_history) > 10:
        chat_history = chat_history[0:3] + chat_history[-5:]
    for turn in chat_history:
        user_message, bot_message = turn
        prompt.append( {'role': 'user', 'content': user_message} )
        prompt.append( {'role': 'assistant', 'content': bot_message} )
    prompt.append( {'role': 'user', 'content': curr_message } )
    return prompt


# Função para converter o histórico de chat para o formato esperado pela API do Google AI
def to_google_history(chat_history, curr_user_message=None):
    prompt = []
    for turn in chat_history:
        user_message, bot_message = turn
        prompt.append( {'role':'user', 'parts': [user_message]} )
        prompt.append( {'role': 'model', 'parts': [bot_message]} )
    
    if curr_user_message is not None:
        prompt.append( {'role': 'user', 'parts': [curr_user_message]} )
    
    return prompt


import json

TOOLS_SPECIFICATION_OPENAI = [
    {
        "type": "function",
        "function": {
            "name": "get_matches_info",
            "description": "Use this function to retrieve information about football (soccer) matches from the most important leagues. Time of the matches is given in Brazilian timezone.",
                           #+ "Returns a string with one matche per line; or empty string if the service is not available now.",
            "parameters": {
                "type": "object",
                "properties": {
                    "date_str": {
                        "type": "string",
                        "description": "Must be one of these: 'yesterday', 'today' or 'tomorrow'. No other option is valid."
                    }
                },
                "required": ["date_str"],
            },
        }
    }
]


def process_wave(audio_bytes):
    audio_file = io.BytesIO(audio_bytes)

    # Read the wave file using the wave module
    wave_file = wave.open(audio_file)

    # Get audio parameters
    #num_channels = wave_file.getnchannels()
    frame_rate = wave_file.getframerate()
    #sample_width = wave_file.getsampwidth()
    num_frames = wave_file.getnframes()

    # Read the audio data as a NumPy array
    audio_array = np.frombuffer(wave_file.readframes(num_frames), dtype=np.int16)

    return (frame_rate, audio_array)


def respond(system_prompt, user_message, chat_history, temperature, persona="echo"):
        if USE_OPENAI_FOR_CHAT:
            openai_history = to_openai_chat_history(system_prompt, chat_history, user_message)

            bot_response = OPENAI_CLIENT.chat.completions.create(messages=openai_history,
                                                                 temperature=temperature,
                                                                 tools=TOOLS_SPECIFICATION_OPENAI,
                                                                 model="gpt-3.5-turbo-0125")
            bot_response = bot_response.choices[0].message

            if bot_response.tool_calls:
                assert bot_response.tool_calls[0].function.name == "get_matches_info", "Invalid tool call in response."
                print("Processing tool call...")

                date_str = json.loads(bot_response.tool_calls[0].function.arguments)["date_str"]
                results = get_matches_info(date_str)
                openai_history.append({"role": "function", "tool_call_id": bot_response.tool_calls[0].id, "name": bot_response.tool_calls[0].function.name, "content": results})
                
                # nesta chamada, não passo o tools, para economizar tokens
                bot_response = OPENAI_CLIENT.chat.completions.create(messages=openai_history,
                                                                     temperature=temperature,
                                                                     model="gpt-3.5-turbo-0125")
                bot_response = bot_response.choices[0].message

            assistant_msg = bot_response.content
        
        else:
            GOOGLE_GEN_CONFIG.temperature = temperature
            model = genai.GenerativeModel('gemini-1.5-pro-latest', 
                               system_instruction=system_prompt,
                               tools=[get_matches_info],
                               generation_config=GOOGLE_GEN_CONFIG)

            google_history = to_google_history(chat_history)
            chat = model.start_chat(history=google_history, 
                                    enable_automatic_function_calling=True)
            bot_response = chat.send_message(user_message)

            assistant_msg = bot_response.text

        # salva o audio 
        response = OPENAI_CLIENT.audio.speech.create(
            model="tts-1",
            voice=persona,
            input=assistant_msg,
            response_format='wav'  # se for salvar em arquivo, (acho) pode usar 'mp3'
        )

        # adiciona ao chat, com o tipo de dado esperado pelo Gradio
        chat_history.append( (user_message, assistant_msg) )
        
        return "", chat_history, process_wave(response.content)


def reset_and_apply(voice):
    return [("", "Olá, vamos falar de futebol?")], AUDIO_OUT_FILE_PREFIX + f"-001-{voice}.wav"

def reset_openai_client(openai_key):
    global USE_OPENAI_FOR_CHAT, OPENAI_CLIENT, OPENAI_API_KEY
    USE_OPENAI_FOR_CHAT = (GOOGLE_API_KEY == "")
    OPENAI_API_KEY = openai_key
    if OPENAI_API_KEY != "":
        OPENAI_CLIENT = OpenAI(api_key=OPENAI_API_KEY)

def reset_google_client(google_key):
    global GOOGLE_API_KEY, USE_OPENAI_FOR_CHAT
    USE_OPENAI_FOR_CHAT = (google_key == "")
    GOOGLE_API_KEY = google_key
    if GOOGLE_API_KEY != "":
        genai.configure(api_key=GOOGLE_API_KEY)


def on_voice_change(voice):
    persona_description, persona_temperature, sex = PERSONALITIES[voice]
    genre = "menina" if sex=="F" else "menino"
    return TEMPLATE_SYSTEM_MESSAGE.format(NAME=voice.upper(), PERSONALITY=persona_description, GENRE=genre), persona_temperature


# With pipeline (downloaded model)
if USE_LOCAL_ASR_PIPELINE:
    from transformers import pipeline
    import numpy as np

    global ASR_PIPELINE
    ASR_PIPELINE = pipeline(task="automatic-speech-recognition", 
                            #model="openai/whisper-large-v3")
                            model="openai/whisper-small")
else:
    import requests
    global ASR_API_URL, ASR_API_HEADERS
    
    HF_KEY = os.environ['HUGGINGFACE_API_KEY']

    # Serverless API endpoint for OpenAI's Whisper model
    #ASR_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
    ASR_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-small"
    ASR_API_HEADERS = {"Authorization": f"Bearer {HF_KEY}"}


def transcribe(audio_file):
    if USE_LOCAL_ASR_PIPELINE:
        response = ASR_PIPELINE(audio_file)
        text = response["text"]

    else:
        # using serverless API
        with open(audio_file, "rb") as f:
            data = f.read()
        response = requests.post(ASR_API_URL, headers=ASR_API_HEADERS, data=data)
        text = response.json()["text"]
    
    return text


def transcribe_and_respond(audio_in, system_txtbox, user_msg_txb, *args):
    transcribed_user_msg = transcribe(audio_in)
    outputs = respond(system_txtbox, transcribed_user_msg, *args)
    return outputs


with gr.Blocks() as demo:
    # aqui, é resetado e instanciado o cliente
    initial_chat_history, initial_audio = reset_and_apply(INITIAL_PERSON)

    chatbot_area = gr.Chatbot(value=initial_chat_history)
    audio_out = gr.Audio(label="Escute a última mensagem", value=initial_audio, autoplay=True, interactive=False)
    
    user_msg_txb = gr.Textbox(label="Mensagem")

    audio_in = gr.Audio(label="Mensagem de Áudio", sources=['microphone'], interactive=True, type='filepath')
    
    submit_btn = gr.Button("Enviar")
    
    #clear_btn = gr.ClearButton(components=[user_msg, chatbot], value="Clear console")
    reset_btn = gr.Button("Reiniciar")
    
    with gr.Accordion(label="Configurações",open=False):
        openai_key = gr.Textbox(label="OpenAI API Key (GPT e vozes)", value="", placeholder="Insira a chave aqui")
        openai_key.change(reset_openai_client, inputs=[openai_key])

        #openai_key = gr.Textbox(label="Google API Key (Gemini 1.5)", value="", placeholder="Insira a chave aqui")
        #openai_key.change(reset_google_client, inputs=[openai_key])

        # opções de vozes e personalidades
        voice_ddown = gr.Dropdown(label="Personalidade (muda os dois abaixo)", choices=["nova", "echo"], value=INITIAL_PERSON)

        initial_system_message, initial_temperature = on_voice_change(INITIAL_PERSON)        
        temperature_sldr = gr.Slider(label="Diversidade de respostas", minimum=0.0, maximum=1.0, value=initial_temperature, step=0.1)
        
        with gr.Accordion(label="Avançado",open=False):
            # o valor inicial é dado pela system message com o nome e personalidade dados pelos controles acima
            system_txtbox = gr.Textbox(label="System message", lines=3, value=initial_system_message)

        voice_ddown.change(on_voice_change, inputs=[voice_ddown], outputs=[system_txtbox, temperature_sldr])
        #gr.Markdown("*Clique em 'Reiniciar' para aplicar as (a maior parte das) configurações.*")

    reset_btn.click(reset_and_apply, inputs=[voice_ddown], outputs=[chatbot_area, audio_out])

    audio_in.stop_recording( transcribe_and_respond, inputs=[audio_in, system_txtbox, user_msg_txb, chatbot_area, temperature_sldr, voice_ddown], outputs=[user_msg_txb, chatbot_area, audio_out] )
    submit_btn.click(respond, inputs=[system_txtbox, user_msg_txb, chatbot_area, temperature_sldr, voice_ddown], outputs=[user_msg_txb, chatbot_area, audio_out])    # Click on the button
    user_msg_txb.submit(respond, inputs=[system_txtbox, user_msg_txb, chatbot_area, temperature_sldr, voice_ddown], outputs=[user_msg_txb, chatbot_area, audio_out]) # Press enter to submit - same effect


demo.queue().launch(share=False)