# Reference: https://huggingface.co/spaces/haotiz/glip-zeroshot-demo/blob/main/app.py 

import requests
import os
from io import BytesIO
from PIL import Image
import numpy as np
from pathlib import Path
import gradio as gr

import warnings

warnings.filterwarnings("ignore")

os.system("python setup.py build develop --user")

from maskrcnn_benchmark.config import cfg
from maskrcnn_benchmark.engine.predictor_glip import GLIPDemo

config_file = "configs/pretrain_new/desco_glip.yaml"
weight_file = "MODEL/desco_glip_tiny.pth"

# update the config options with the config file
# manual override some options
cfg.local_rank = 0
cfg.num_gpus = 1
cfg.merge_from_file(config_file)
cfg.merge_from_list(["MODEL.WEIGHT", weight_file])
cfg.merge_from_list(["MODEL.DEVICE", "cuda"])

glip_demo = GLIPDemo(
    cfg,
    min_image_size=800,
    confidence_threshold=0.7,
    show_mask_heatmaps=False
)

config_file = "configs/pretrain_new/desco_glip.yaml"
weight_file = "MODEL/local1.pth"
from copy import deepcopy
cfg = deepcopy(cfg)
cfg.merge_from_file(config_file)
cfg.merge_from_list(["MODEL.WEIGHT", weight_file])
cfg.merge_from_list(["MODEL.DEVICE", "cuda"])
local_demo1 = GLIPDemo(
    cfg,
    min_image_size=800,
    confidence_threshold=0.7,
    show_mask_heatmaps=False
)

config_file = "configs/pretrain_new/desco_glip.yaml"
weight_file = "MODEL/local2.pth"
from copy import deepcopy
cfg = deepcopy(cfg)
cfg.merge_from_file(config_file)
cfg.merge_from_list(["MODEL.WEIGHT", weight_file])
cfg.merge_from_list(["MODEL.DEVICE", "cuda"])
local_demo2 = GLIPDemo(
    cfg,
    min_image_size=800,
    confidence_threshold=0.7,
    show_mask_heatmaps=False
)

config_file = "configs/pretrain_new/desco_fiber.yaml"
weight_file = "MODEL/desco_fiber_base.pth"
from copy import deepcopy
cfg = deepcopy(cfg)
cfg.merge_from_file(config_file)
cfg.merge_from_list(["MODEL.WEIGHT", weight_file])
cfg.merge_from_list(["MODEL.DEVICE", "cuda"])
fiber_demo = GLIPDemo(
    cfg,
    min_image_size=800,
    confidence_threshold=0.7,
    show_mask_heatmaps=False
)

athetics_params = {
    "skip_name": False, # whether we overlay the phrase over the box
    "override_color": (0, 90, 190),
    "text_size": 1.0,
    "text_pixel": 3,
    "box_alpha": 1.0,
    "box_pixel": 5,
    "text_offset_original": 8, # distance between text and box
}

def predict(image, text, ground_tokens=""):
    img_len = min(image.shape[:2])
    import math
    athetics_params["text_size"] = math.ceil(img_len/1000)
    athetics_params["text_pixel"] = math.ceil(img_len/1000*3)
    ground_tokens = None if ground_tokens.strip() == "" else ground_tokens.strip().split(";")
    result, _ = glip_demo.run_on_web_image(deepcopy(image[:, :, [2, 1, 0]]), text, 0.5, ground_tokens, **athetics_params)
    fiber_result, _ = fiber_demo.run_on_web_image(deepcopy(image[:, :, [2, 1, 0]]), text, 0.5, ground_tokens, **athetics_params)
    local_result1, _ = local_demo1.run_on_web_image(deepcopy(image[:, :, [2, 1, 0]]), text, 0.5, ground_tokens, **athetics_params)
    local_result2, _ = local_demo2.run_on_web_image(deepcopy(image[:, :, [2, 1, 0]]), text, 0.5, ground_tokens, **athetics_params)
    return result[:, :, [2, 1, 0]], fiber_result[:, :, [2, 1, 0]], local_result1[:, :, [2, 1, 0]], local_result2[:, :, [2, 1, 0]]


image = gr.inputs.Image()


gr.Interface(
    description="Object Recognition with DesCo (https://github.com/liunian-harold-li/DesCo)",
    fn=predict,
    inputs=["image", "text", "text"],
    outputs=[
        gr.outputs.Image(
            type="pil",
            label="DesCo-GLIP"
        ),
        gr.outputs.Image(
            type="pil",
            label="DesCo-FIBER"
        ),
        gr.outputs.Image(
            type="pil",
            label="local1"
        ),
        gr.outputs.Image(
            type="pil",
            label="local2"
        ),
    ],
    examples=[
        ["./1.jpg", "A clown making a balloon animal for a pretty lady.", "clown"],
        ["./1.jpg", "A clown kicking a soccer ball for a pretty lady.", "clown"],
        ["./2.jpg", "A kind of tool, wooden handle with a round head.", "tool"],
        ["./3.jpg", "Bumblebee, yellow with black accents.", "Bumblebee"],
    ],
    article=Path("docs/intro.md").read_text()
).launch()