# --- Project dependencies ---
import os
import io
import base64
import requests
import json
import gradio as gr
from PIL import Image
from dotenv import load_dotenv, find_dotenv


# --- Load environment variables ---
_ = load_dotenv(find_dotenv())  # read local .env file
hf_api_key = os.environ["HF_API_KEY"]


# --- URLs and Endpoints ---
hf_base_url = "https://huggingface.co/"
hf_inference_base_url = "https://api-inference.huggingface.co/models/"

endpoints = [
    "Salesforce/blip-image-captioning-large",
    "Salesforce/blip-image-captioning-base",
    "nlpconnect/vit-gpt2-image-captioning",
    "microsoft/git-base",
    "microsoft/git-large-textcaps",
    "microsoft/git-large-r-coco",
]


# --- Define helper functions ---


# Image-to-text completion
def get_completion(inputs, parameters=None):
    headers = {
        "Authorization": f"Bearer {hf_api_key}",
        "Content-Type": "application/json",
    }
    data = {"inputs": inputs}
    if parameters is not None:
        data.update({"parameters": parameters})

    results = {}
    for endpoint in endpoints:
        try:
            response = requests.post(
                hf_inference_base_url + endpoint,
                headers=headers,
                data=json.dumps(data),
            )
            response.raise_for_status()
            results[endpoint] = json.loads(response.content.decode("utf-8"))
        except requests.exceptions.RequestException as e:
            print(f"Request to {endpoint} failed: {e}")
            results[endpoint] = {"error": str(e)}

    return results


# Format image as base64 string
def image_to_base64_str(pil_image):
    byte_arr = io.BytesIO()
    pil_image.save(byte_arr, format="PNG")
    byte_arr = byte_arr.getvalue()
    return str(base64.b64encode(byte_arr).decode("utf-8"))


# Define captioner function
def captioner(image):
    base64_image = image_to_base64_str(image)
    results = get_completion(base64_image)
    captions = []
    for endpoint, result in results.items():
        # Use a smaller heading or remove the heading syntax for regular text size
        # header = f"#### [{endpoint}]({hf_base_url+endpoint}):"
        header = f"[{endpoint}]({hf_base_url+endpoint}):"  # No heading, regular text
        if "error" not in result:
            caption = result[0]["generated_text"]
        else:
            caption = f"Error - {result['error']}"
        captions.append(
            f"{header}\n{caption} \n\n"
        )  # Use horizontal rule for separation
    return "\n".join(
        captions
    ).strip()  # Join all captions into a single string, separated by horizontal rules


# --- Launch the Gradio App ---
demo = gr.Interface(
    fn=captioner,
    inputs=[gr.Image(label="Upload image", type="pil")],
    outputs=gr.Markdown(label="Captions"),  # Use a single Markdown output
    title="Image Captioning Model Comparison",
    description="Upload an image and see how different models describe it!",
    allow_flagging="never",
    examples=[
        "example_1.jpg",
        "example_2.jpg",
        "example_3.jpg",
        "example_4.png",
        "example_5.png",
    ],
)

demo.launch(share=True, debug=True)


# --- Close all connections ---
gr.close_all()