import os
import io
import time
import json
import math
import base64
from uuid import uuid4
from PIL import Image as PILImage

os.environ["DEEPFACE_HOME"] = "."

import pyzipper
import numpy as np
import gradio as gr
from annoy import AnnoyIndex

from deepface import DeepFace


index = AnnoyIndex(512, "euclidean")
index.load(f"face.db")

ANNOY_INDEX = json.load(open(f"face.json"))

with pyzipper.AESZipFile('persons.zip') as zf:
    password = os.getenv("VISAGE_KEY","").encode('ascii')
    zf.setpassword(password)
    PERFORMER_DB = json.loads(zf.read('performers.json'))

## Prediction functions


def image_search_performer(image, threshold=20.0, results=3):
    """Search for a performer in an image

    Returns a list of performers with at least following keys:
    - id: the performer's id
    - distance: the distance between the face in the image and the performer's face
    - confidence: a confidence score between 0 and 100
    - hits: the number of times the performer was found in our database
    """
    
    image_array = np.array(image)

    face = DeepFace.represent(img_path = image_array, detector_backend='retinaface', model_name='Facenet512', normalization="Facenet2018")[0]['embedding']
    return search_performer(face, threshold, results)


def image_search_performers(image, threshold=20.0, results=3):
    image_array = np.array(image)

    response = []
    
    t = time.time()
    try:
        faces = DeepFace.represent(img_path = image_array, detector_backend='retinaface', model_name='Facenet512', normalization="Facenet2018")
        # faces = DeepFace.represent(img_path = image_array, detector_backend='yolov8', model_name='Facenet512', normalization="Facenet2018")
        # faces = DeepFace.represent(img_path = image_array, detector_backend='mtcnn', model_name='Facenet512', normalization="Facenet2018")
    except ValueError as e:
        print(e)
        raise gr.Error("No faces found in the image")
    
    print(f"Time to find faces: {time.time() - t}")
    for face in faces:
        embedding = face['embedding']
        area = face['facial_area']
        confidence = face['face_confidence']
        cimage = image.crop((area['x'], area['y'], area['x'] + area['w'], area['y'] + area['h']))
        buf = io.BytesIO()
        cimage.save(buf, format='JPEG')
        im_b64 = base64.b64encode(buf.getvalue()).decode('ascii')

        response.append({
            'image': im_b64,
            'confidence': confidence,
            'performers': search_performer(embedding, threshold, results)
        })
    
    return response


def vector_search_performer(vector_json, threshold=20.0, results=3):
    """Search for a performer from a vector

    The vector should be created with Deepface and should be a 512 vector.

    For best results use the following settings:
    - detector_backend: retinaface
    - model: Facenet512
    - normalization: Facenet2018

    Returns a list of performers with at least following keys:
    - id: the performer's id
    - distance: the distance between the face in the image and the performer's face
    - confidence: a confidence score between 0 and 100
    - hits: the number of times the performer was found in our database
    """

    vector = np.array(json.loads(vector_json))
    return search_performer(vector, threshold, results)
    

def search_performer(vector, threshold=20.0, results=3):
    threshold = threshold or 20.0
    results = results or 3
    
    ids, distances = index.get_nns_by_vector(
        vector, 50, search_k=100000, include_distances=True
    )
    persons = {}
    for p, distance in zip(ids, distances):
        id = ANNOY_INDEX[p]
        if id in persons:
            persons[id]["hits"] += 1
            persons[id]["distance"] -= 0.5
            persons[id]["confidence"] = normalize_confidence_from_distance(persons[id]["distance"], threshold)
            continue

        persons[id] = {
            "id": id,
            "distance": round(distance, 2),
            "confidence": normalize_confidence_from_distance(distance, threshold),
            "hits": 1,
        }

        if id in PERFORMER_DB:
            persons[id].update(PERFORMER_DB.get(id))

    persons = sorted(persons.values(), key=lambda x: x["distance"])
    # persons = [p for p in persons if p["distance"] < threshold]
    return persons[:results]


def normalize_confidence_from_distance(distance, threshold=20.0):
    """Normalize confidence to 0-100 scale"""
    confidence = face_distance_to_conf(distance, threshold)
    return int(((confidence - 0.0) / (1.0 - 0.0)) * (100.0 - 0.0) + 0.0)


def face_distance_to_conf(face_distance, face_match_threshold=20.0):
    """Using a face distance, calculate a similarity confidence value"""
    if face_distance > face_match_threshold:
        # The face is far away, so give it a low confidence
        range = (1.0 - face_match_threshold)
        linear_val = (1.0 - face_distance) / (range * 2.0)
        return linear_val
    else:
        # The face is close, so give it a high confidence
        range = face_match_threshold
        linear_val = 1.0 - (face_distance / (range * 2.0))
        # But adjust this value by a curve so that we don't get a linear
        # transition from close to far. We want it to be more confident
        # the closer it is.
        return linear_val + ((1.0 - linear_val) * math.pow((linear_val - 0.5) * 2, 0.2))


def find_faces_in_sprite(image, vtt):
    vtt = base64.b64decode(vtt.replace("data:text/vtt;base64,", ""))
    sprite = PILImage.fromarray(image)

    results = []
    for i, (left, top, right, bottom, time_seconds) in enumerate(getVTToffsets(vtt)):
        cut_frame = sprite.crop((left, top, left + right, top + bottom))
        faces = DeepFace.extract_faces(np.asarray(cut_frame), detector_backend="mediapipe", enforce_detection=False, align=False)
        faces = [face for face in faces if face['confidence'] > 0.6]
        if faces:
            size = faces[0]['facial_area']['w'] * faces[0]['facial_area']['h']
            data = {'id': str(uuid4()), "offset": (left, top, right, bottom), "frame": i, "time": time_seconds, 'size': size}
            results.append(data)

    return results


def getVTToffsets(vtt):
    time_seconds = 0
    left = top = right = bottom = None
    for line in vtt.decode("utf-8").split("\n"):
        line = line.strip()

        if "-->" in line:
            # grab the start time
            # 00:00:00.000 --> 00:00:41.000
            start = line.split("-->")[0].strip().split(":")
            # convert to seconds
            time_seconds = (
                int(start[0]) * 3600
                + int(start[1]) * 60
                + float(start[2])
            )
            left = top = right = bottom = None
        elif "xywh=" in line:
            left, top, right, bottom = line.split("xywh=")[-1].split(",")
            left, top, right, bottom = (
                int(left),
                int(top),
                int(right),
                int(bottom),
            )
        else:
            continue

        if not left:
            continue

        yield left, top, right, bottom, time_seconds


image_search = gr.Interface(
    fn=image_search_performer,
    inputs=[
        gr.components.Image(),
        gr.components.Slider(label="threshold",minimum=0.0, maximum=30.0, value=20.0),
        gr.components.Slider(label="results", minimum=0, maximum=50, value=3, step=1),
    ],
    outputs=gr.outputs.JSON(label=""),
    title="Who is in the photo?",
    description="Upload an image of a person and we'll tell you who it is.",
)

image_search_multiple = gr.Interface(
    fn=image_search_performers,
    inputs=[
        gr.components.Image(type='pil'),
        gr.components.Slider(label="threshold",minimum=0.0, maximum=30.0, value=20.0),
        gr.components.Slider(label="results", minimum=0, maximum=50, value=3, step=1),
    ],
    outputs=gr.outputs.JSON(label=""),
    title="Who is in the photo?",
    description="Upload an image of a person(s) and we'll tell you who it is.",
)

vector_search = gr.Interface(
    fn=vector_search_performer,
    inputs=[
        gr.components.Textbox(),
        gr.components.Slider(label="threshold",minimum=0.0, maximum=30.0, value=20.0),
        gr.components.Slider(label="results", minimum=0, maximum=50, value=3, step=1),
    ],
    outputs=gr.outputs.JSON(label=""),
    title="Who is in the photo?",
    description="512 vector created with deepface of a person and we'll tell you who it is.",
)

faces_in_sprite = gr.Interface(
    fn=find_faces_in_sprite,
    inputs=[
        gr.Image(),
        gr.Textbox(label="VTT file")
    ],
    outputs=gr.JSON(label=""),
)

gr.TabbedInterface([image_search, image_search_multiple, vector_search, faces_in_sprite]).queue().launch(enable_queue=True, server_name="0.0.0.0")