import os
from time import time

import gradio as gr
import requests

from languages import LANGUAGES

GLADIA_API_KEY = os.environ.get("GLADIA_API_KEY")

headers = {
    "accept": "application/json",
    "x-gladia-key": GLADIA_API_KEY,
}

ACCEPTED_LANGUAGE_BEHAVIOUR = [
    "manual",
    "automatic single language",
    "automatic multiple languages",
]


def transcribe(
    audio: str = None,
) -> dict:
    """
    This function transcribes audio to text using the Gladia API. 
    It sends a request to the API with the given audio file or audio URL, and returns the transcribed text.
    Get your api key at gladia.io !

    Parameters:
    audio (str): The path to the audio file to transcribe.

    Returns:
    dict: A dictionary containing the transcribed text and other metadata about the transcription process. If an error occurs, the function returns a string with an error message.
    """
    DEFAULT_MANUAL_LANGUAGE = "english"

    language_behaviour = ACCEPTED_LANGUAGE_BEHAVIOUR[2]

    # if video file is there then send the audio field as the content of the video

    # if video file is there then send the audio field as the content of the video
    files = {
        "language_behaviour": (None, language_behaviour),
        "noise_reduction": (None, "false"),
        'output_format': (None, 'json'),
        'toggle_diarization': (None, 'true'),
        'diarization_max_speakers': (None, '2'),
    }

    # priority given to the audio or video
    if audio:
        files["audio"] = (audio, open(audio, "rb"), "audio/wav")

    # if language is manual then send the language field
    # if it's there for language_behaviour == automatic*
    # it will ignored anyways
    if language_behaviour == "manual":
        files["language"] = (None, DEFAULT_MANUAL_LANGUAGE)

    start_transfer = time()
    response = requests.post(
        "https://api.gladia.io/audio/text/audio-transcription/",
        headers=headers,
        files=files,
    )
    end_transfer = time()

    if response.status_code != 200:
        print(response.content, response.status_code)

        return "Sorry, an error occured with your request :/"

    # we have 2 outputs:
    # prediction and prediction_raw
    # prediction_raw has more details about the processing
    # and other debugging detailed element you might be
    # interested in
    
    
    segments = response.json()["prediction"]

    output = ""
    current_speaker = ""
    for segment in segments:
        if segment["speaker"] != current_speaker and segment["speaker"]!= "unknown":
            current_speaker = segment["speaker"]
            output = output + "<br/><br/><b> Speaker:" + str(segment["speaker"]) + ":</b> " + segment["transcription"]
        else:
            output = output + " " + segment["transcription"]
    

    return output, response.json()["prediction_raw"]


iface = gr.Interface(
    title="Gladia.io fast audio transcription",
    description="""Gladia.io Whisper large-v2 fast audio transcription API
    is able to perform fast audio transcriptions for any audio / video (less than a minute per hour) .<br/>For more details and a benchmark ran on multiple Speech-To-Text providers, please visit
    [our post](https://medium.com/@gladia.io/gladia-alpha-launch-redefining-what-s-possible-with-speech-to-text-ai-686dd4312a86) on Medium.
    <br/><br/>
    You are more than welcome to join us on [Slack](https://gladia-io.slack.com)
    and don't forget to get your own API key on [Gladia.io](https://gladia.io/) during the free alpha !
    """,
    fn=transcribe,
    inputs=[
        gr.Audio(label="Audio file", source="upload", type="filepath"),
    ],
    outputs=["html", "json"],
    examples=[
        ["examples/good.will.hunting.wav"],
        ["examples/wolf.of.wall.street.wav"],
    ],
)
iface.queue()
iface.launch()