In [None]:
HAS_CD_TO_ROOT = False

In [None]:
import sys
import os

if HAS_CD_TO_ROOT is False:
    os.chdir("../../")
    HAS_CD_TO_ROOT = True

import logging
import os
from typing import Optional, Dict

import hydra
import torch
from hydra.utils import instantiate
from datasets import DatasetDict, load_dataset, IterableDatasetDict
from omegaconf import DictConfig, OmegaConf
from src.data.transforms import SamCaptionerDataTransform
from src.data.collator import SamCaptionerDataCollator
from src.arguments import Arguments, global_setup, SAMCaptionerModelArguments, SCAModelArguments, SCAModelBaseArguments
from src.models.sam_captioner import SAMCaptionerConfig, SAMCaptionerModel, SAMCaptionerProcessor
from src.models.sca import ScaProcessor

from transformers.trainer_utils import get_last_checkpoint
from transformers import set_seed, Trainer
import gradio as gr
from dataclasses import dataclass
import numpy as np
from functools import partial
import pandas as pd
from src.train import prepare_datasets, prepare_data_transform, prepare_processor
import pycocotools.mask
from PIL import Image

from hydra import initialize, compose
import json
import tqdm
import hashlib
import glob
import cv2  
import numpy as np  
from PIL import Image, ImageDraw, ImageFont  
import random
import pycocotools.mask
import sqlite3
from contextlib import closing
import dotenv

os.getcwd()

In [None]:
DATASET='vg-densecap-local'
with initialize(version_base="1.3", config_path="../../src/conf"):
    args = compose(
        config_name="conf",
        overrides=[
            f"train_data=[{DATASET}]",
            f"eval_data=[{DATASET}]",
            "+model=base_sam_captioner",
            "training.output_dir=tmp/visualization"
            # "training.do_train=True",
            # "training.do_eval=True",
        ],
    )

In [None]:
args, training_args, model_args = global_setup(args)
os.makedirs(training_args.output_dir, exist_ok=True)

In [None]:
# Initialize our dataset and prepare it
with initialize(version_base="1.3", config_path="../../src/conf"):
    train_dataset, eval_dataset = prepare_datasets(args)

# NOTE(xiaoke): load sas_key from .env for huggingface model downloading.
dotenv.load_dotenv('.env')
use_auth_token = os.getenv("USE_AUTH_TOKEN", False)

processor = prepare_processor(model_args, use_auth_token)

train_dataset, eval_dataset = prepare_data_transform(
    training_args, model_args, train_dataset, eval_dataset, processor
)


# [NOTE] Used to restore the image tensor after transformed
# Use global to avoid passing too many arguments
global image_mean, image_std
image_mean, image_std = (
    processor.sam_processor.image_processor.image_mean,
    processor.sam_processor.image_processor.image_std,
)

In [None]:
REWRITE_MAPPING = False
image_id_to_dataset_id_mapping_file = os.path.join(training_args.output_dir, "image_id_to_dataset_id_mapping.json")

def find_json_file_with_md5(json_file):
    json_file_name, json_file_ext = os.path.splitext(json_file)
    json_file_blob = f"{json_file_name}-*{json_file_ext}"
    return glob.glob(json_file_blob)

def get_md5_from_json(json_file):
    with open(json_file, "r") as f:
        content = f.read()
    return hashlib.md5(content.encode()).hexdigest()

def get_md5_from_pyobj(pyobj):
    bytes_data = pyobj.encode()
    readable_hash = hashlib.md5(bytes_data).hexdigest()  
    return readable_hash

def save_dict_to_json_with_md5(json_file, dict_data):
    # Convert to json and bytes  
    json_data = json.dumps(dict_data)  
    json_data_md5 = get_md5_from_pyobj(json_data)
    json_file_name, json_file_ext = os.path.splitext(json_file)
    json_file_with_md5 = f"{json_file_name}-{json_data_md5}{json_file_ext}"
    with open(json_file_with_md5, 'w') as f:  
        f.write(json_data)  
    return json_file_with_md5

# Initialize our dataset and prepare it
with initialize(version_base="1.3", config_path="../../src/conf"):
    args_no_image = compose(
        config_name="conf",
        overrides=[
            f"train_data=[{DATASET}]",
            f"eval_data=[{DATASET}]",
            "+model=base_sam_captioner",
            "training.output_dir=tmp/visualization"
            # "training.do_train=True",
            # "training.do_eval=True",
        ],
    )
    args_no_image.train_data_overrides = ['data.with_image=False']
    args_no_image.eval_data_overrides = ['data.with_image=False']
    train_dataset_no_image, eval_dataset_no_image = prepare_datasets(args_no_image)

json_file_with_md5_ls = find_json_file_with_md5(image_id_to_dataset_id_mapping_file)
if len(json_file_with_md5_ls) > 1:
    raise ValueError(f"find more than one json file with md5, {json_file_with_md5_ls}")
if REWRITE_MAPPING is False and len(json_file_with_md5_ls) == 1:
    image_id_to_dataset_id_mapping_file = json_file_with_md5_ls[0]
    md5_in_name = os.path.splitext(image_id_to_dataset_id_mapping_file)[0].split("-")[-1]
    assert md5_in_name == get_md5_from_json(image_id_to_dataset_id_mapping_file), f"md5 not match for {image_id_to_dataset_id_mapping_file}"

    with open(image_id_to_dataset_id_mapping_file, "r") as f:
        image_id_to_dataset_id_mapping = json.load(f)
    print(f"Load mapping from {image_id_to_dataset_id_mapping_file}")
else:
    image_id_to_dataset_id_mapping = {
        "train": dict(),
        **{k: dict() for k in eval_dataset_no_image.keys()},
    }
    for sample_cnt, sample in enumerate(tqdm.tqdm(train_dataset_no_image)):
        image_id_to_dataset_id_mapping["train"][sample["image_id"]] = sample_cnt
    for eval_dataset_name, eval_dataset_  in eval_dataset_no_image.items():
        for sample_cnt, sample in enumerate(tqdm.tqdm(eval_dataset_)):
            image_id_to_dataset_id_mapping[eval_dataset_name][sample["image_id"]] = sample_cnt
    image_id_to_dataset_id_mapping_file = save_dict_to_json_with_md5(image_id_to_dataset_id_mapping_file, image_id_to_dataset_id_mapping)
    print(f"save mapping to {image_id_to_dataset_id_mapping_file}")


In [None]:
# Load the infer json
infer_json_path_dict = {
    "vg-gpt2l-bs_32-lsj": "/home/t-yutonglin/xiaoke/segment-caption-anything-v2/amlt/111423.exp-only_vg-finetune_vg/111323.infer-train-sca-ablat-lsj-scale_lr-110423.4x8_fin-16x4_unfin.pre/best-gpt2-large-lsj-1xlr.110423.octo-4x8-v100-16g-no_pre/vg-densecap-region_descriptions/infer-post_processed/infer-visual_genome-region_descriptions_v1.2.0-test.json",
    "vg-ollm3bv2-bs_32-lsj": "amlt/110723.exp.ablat-lsj-scale_lr-running-2/infer-train-sca-ablat-lsj-scale_lr-110423-110723.running-2/best-fp16-ollm3bv2-large-lsj-1xlr.110423.octo-4x8-v100-16g-no_pre/vg-densecap-region_descriptions/infer-post_processed/infer-visual_genome-region_descriptions_v1.2.0-test.json",
    "o365_vg-gpt2l-bs_64-lsj": "amlt/111423.exp-only_vg-finetune_vg/111323.infer-train-sca.finetune_lsj_scale_lr-o365_1e_4_1xlr_lsj.111023.4x8_fin-16x4_unfin.pre/best-111223.rr1-4x8-v100-32g-pre.fintune-gpt2_large-lr_1e_4-1xlr-lsj-bs_2-o365_1e_4_no_lsj_bs_64/vg-densecap-region_descriptions/infer-post_processed/infer-visual_genome-region_descriptions_v1.2.0-test.json",
}

for job_name, json_path in infer_json_path_dict.items():
    print(f"job_name: {job_name}")
    print(f"is exists: {os.path.exists(json_path)}")
    assert os.path.exists(json_path), f"{json_path} not exists"

infer_json_path = infer_json_path_dict["vg-gpt2l-bs_32-lsj"]
with open(infer_json_path, "r") as f:
    infer_json = json.load(f)

In [None]:
import colorsys  
colors = [  
    (235, 206, 135),  # Soft Yellow  
    (176, 224, 230),  # Powder Blue  
    (240, 230, 140),  # Khaki  
    (244, 164, 96),   # Sandy Brown  
    (144, 238, 144),  # Light Green  
    (221, 160, 221),  # Plum  
    (255, 182, 193),  # Light Pink  
    (173, 216, 230),  # Light Blue  
    (255, 235, 205),  # Blanched Almond  
    (245, 255, 250),  # Mint Cream  
]  
  
# Convert RGB to HSV and keep track of original index  
colors_hsv = [(colorsys.rgb_to_hsv(color[0]/255, color[1]/255, color[2]/255), index) for index, color in enumerate(colors)]  
  
# Sort by hue  
colors_hsv.sort()  
  
# Convert back to RGB  
harmonious_colors = [colors[index] for hsv, index in colors_hsv]  

# Your selected colors  
selected_colors = harmonious_colors
# Calculate height of each color strip  
height = 256 // len(selected_colors)  
  
# Create a new image with RGB mode  
img = Image.new('RGB', (256, 256))  
  
draw = ImageDraw.Draw(img)  
  
for i, color in enumerate(selected_colors):  
    # Calculate the start and end positions of the color strip  
    start_pos = i * height  
    end_pos = start_pos + height  
  
    # Draw the color strip  
    draw.rectangle([(0, start_pos), (256, end_pos)], fill=color)  
img

In [None]:
def hex_to_rgb(hex_color):  
    return tuple([int(hex_color[i:i+2], 16) for i in (1, 3, 5)])
  
hex_colors = ["#B0F2BCFF", "#89E8ACFF", "#67DBA5FF", "#4CC8A3FF", "#38B2A3FF", "#2C98A0FF", "#257D98FF"]  
  
rgb_colors = [hex_to_rgb(color[:-2]) for color in hex_colors]  # '[:-2]' is to remove the 'FF' at the end of each color code, which represents the alpha channel in ARGB format  
harmonious_colors = rgb_colors
  
# Create a new image with RGB mode  
img = Image.new('RGB', (256, 256))  
  
draw = ImageDraw.Draw(img)  

print(rgb_colors)  
for i, color in enumerate(rgb_colors):  
    # Calculate the start and end positions of the color strip  
    start_pos = i * height  
    end_pos = start_pos + height  
  
    # Draw the color strip  
    print(color)
    draw.rectangle([(0, start_pos), (256, end_pos)], fill=color)  
img

In [None]:
EVAL_DATASET_SPLIT = 'visual_genome-densecap-local-densecap-test'
first_sample = infer_json[3]
references = first_sample["references"]
candidates = first_sample["candidates"]

image_id = first_sample["metadata"]["metadata_image_id"]
region_id = first_sample["metadata"]["metadata_region_id"]
input_boxes = first_sample["metadata"]["metadata_input_boxes"]

sample_cnt = image_id_to_dataset_id_mapping[EVAL_DATASET_SPLIT][str(image_id)]
sample = eval_dataset[EVAL_DATASET_SPLIT][sample_cnt]
image = sample["image"]

In [None]:
from PIL import Image, ImageDraw, ImageFont  
import cv2  
import numpy as np  

FONT_PATH = "tmp/Arial.ttf"
FONT = ImageFont.truetype(FONT_PATH, 20)

def draw_bbox(pil_image, bbox, color=(30, 144, 255), thickness=1):  
    cv_image = np.array(pil_image)
    cv2.rectangle(cv_image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, thickness)  
    return Image.fromarray(cv_image)  

def draw_mask(pil_image, mask_array, color=(30, 144, 255), alpha=0.5):  
    cv_image = np.array(pil_image)
    cv_image[mask_array == 1] = cv_image[mask_array == 1] * (1 - alpha) + np.array(color) * alpha
    return Image.fromarray(cv_image)

def draw_mask_boundary(pil_image, mask_array, color=(30, 144, 255), thickness=1):  
    cv_image = np.array(pil_image)
    contours, _ = cv2.findContours(mask_array, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(cv_image, contours, -1, color, thickness)
    return Image.fromarray(cv_image)

def resize_image(image, height=None, width=None):
    """
    Resizes an image given the desired height and/or width.
    If only one of height or width is provided, the other dimension is scaled proportionally.
    If both height and width are provided, the image is resized to the exact dimensions.
    """
    if height is None and width is None:
        return image
    
    original_width, original_height = image.size
    
    if height is not None and width is not None:
        new_size = (width, height)
    elif height is not None:
        new_size = (int(original_width * height / original_height), height)
    else:
        new_size = (width, int(original_height * width / original_width))
    
    return image.resize(new_size)



def draw_captions(pil_image, captions, font_path, font_size=20, font_color=(0, 0, 0), bg_color=(255, 255, 255), margin_size=10, captions_color=None):  
    font = ImageFont.truetype(font_path, font_size)  
    # Calculate the total height of the padding for the captions  
    total_height = 0  
    for caption in captions:  
        _, _, text_width, text_height = font.getbbox(caption)
        total_height += text_height + margin_size  
  
    # Create a new image with padding at the bottom for the captions  
    new_image = Image.new('RGB', (pil_image.width, pil_image.height + total_height), bg_color)  
    new_image.paste(pil_image, (0, 0))  

    draw = ImageDraw.Draw(new_image)  
    # Draw each caption  
    y_position = pil_image.height  
    for caption_id, caption in enumerate(captions):  
        _, _, text_width, text_height = font.getbbox(caption)
        if captions_color is not None:
            text_bbox = (0, y_position, text_width, y_position + text_height)
            fill_color = captions_color[caption_id]
            draw.rectangle(text_bbox, fill=fill_color, width=0)
        draw.text((0, y_position), caption, fill=font_color, font=font)  
        y_position += text_height + margin_size
  
    return new_image  
  
def plot_bbox_and_captions(pil_image, bbox=None, captions=None, mask=None, font_path='tmp/Arial.ttf', font_size=20, font_color=(0, 0, 0), bg_color=(255, 255, 255), margin_size=0, captions_color=None):  
    if bbox is not None:
        pil_image = draw_bbox(pil_image, bbox)  
    if mask is not None:
        pil_image = draw_mask_boundary(pil_image, mask)
    pil_image = resize_image(pil_image, height=512)
    if captions is not None:
        pil_image = draw_captions(pil_image, captions, font_path, font_size, font_color, bg_color, margin_size, captions_color=captions_color)  
    return pil_image  


font_path = 'tmp/Arial.ttf'
captions = candidates + references

import random
# Calculate the number of colors  
num_colors = len(harmonious_colors)  
# Generate a random start index  
start_index = random.randint(0, num_colors - 1)  
# Select colors in a round-robin way  
selected_colors = [harmonious_colors[(start_index + i) % num_colors] for i in range(len(captions))]  
captions_color = selected_colors

pil_img_with_bbox_and_captions = plot_bbox_and_captions(image, bbox=input_boxes, captions=captions, captions_color=captions_color)  
pil_img_with_bbox_and_captions

In [None]:
mask_db_file = 'tmp/sam_mask_db/visual_genome-densecap-local-densecap-test/results.db'
with closing(sqlite3.connect(mask_db_file)) as conn:
    cursor = conn.cursor()
    cursor.execute(
        """  
        SELECT region_cnt, image_cnt, region_id, image_id, masks, scores, input_box, gt_caption
        FROM results where region_cnt = ?
    """, (3,)
    )
    results = cursor.fetchall()
    print(results)
rle_masks = results[0][4]
scores = results[0][5]
rle_masks = json.loads(rle_masks)
scores = json.loads(scores)
masks = pycocotools.mask.decode(rle_masks)

pil_img_with_bbox_and_captions = plot_bbox_and_captions(image, bbox=input_boxes, mask=masks[..., -1], captions=captions, captions_color=captions_color)  
pil_img_with_bbox_and_captions

In [None]:
# Load the infer json
infer_json_path_dict = {
    "sam_cap-git_large": "amlt/111523.exp.sam_captioner/infer_sam_captioner_region_chunkify/microsoft/git-large/infer-post_processed/infer-visual_genome-densecap-local-densecap-test.json.post.json",
    "sam_cap-blip_large": "amlt/111523.exp.sam_captioner/infer-sam_captioner-region_chunkify-eval_suite/Salesforce/blip-image-captioning-large/vg-densecap-region_descriptions/infer-post_processed/infer-visual_genome-region_descriptions_v1.2.0-test.json.post.json",
    "sam_cap-blip2_opt_2_7b": "amlt/111523.exp.sam_captioner/infer-sam_captioner-region_chunkify-eval_suite/Salesforce/blip2-opt-2.7b/infer-post_processed/infer-visual_genome-densecap-local-densecap-test.json.post.json",
    "grit": "amlt/111523.exp.grit/infer-promptable-grit/infer-post_processed/infer-visual_genome-densecap-local-densecap-test.json.post.json", 
    "vg-gpt2l-bs_32-lsj": "/home/t-yutonglin/xiaoke/segment-caption-anything-v2/amlt/111423.exp-only_vg-finetune_vg/111323.infer-train-sca-ablat-lsj-scale_lr-110423.4x8_fin-16x4_unfin.pre/best-gpt2-large-lsj-1xlr.110423.octo-4x8-v100-16g-no_pre/vg-densecap-region_descriptions/infer-post_processed/infer-visual_genome-region_descriptions_v1.2.0-test.json",
    "vg-ollm3bv2-bs_32-lsj": "/home/t-yutonglin/xiaoke/segment-caption-anything-v2/amlt/110723.exp.ablat-lsj-scale_lr-running-2/infer-train-sca-ablat-lsj-scale_lr-110423-110723.running-2/best-fp16-ollm3bv2-large-lsj-1xlr.110423.octo-4x8-v100-16g-no_pre/vg-densecap-region_descriptions/infer-post_processed/infer-visual_genome-region_descriptions_v1.2.0-test.json",
    "o365_vg-gpt2l-bs_64-lsj": "/home/t-yutonglin/xiaoke/segment-caption-anything-v2/amlt/111423.exp-only_vg-finetune_vg/111323.infer-train-sca.finetune_lsj_scale_lr-o365_1e_4_1xlr_lsj.111023.4x8_fin-16x4_unfin.pre/best-111223.rr1-4x8-v100-32g-pre.fintune-gpt2_large-lr_1e_4-1xlr-lsj-bs_2-o365_1e_4_no_lsj_bs_64/vg-densecap-region_descriptions/infer-post_processed/infer-visual_genome-region_descriptions_v1.2.0-test.json",
}

for job_name, json_path in infer_json_path_dict.items():
    print(f"job_name: {job_name}")
    print(f"is exists: {os.path.exists(json_path)}")
    assert os.path.exists(json_path), f"{json_path} not exists"

class MultiInferJson(torch.utils.data.Dataset):
    def __init__(self, infer_json_path_dict):
        self.infer_json_path_dict = infer_json_path_dict
        self.infer_json_dict = dict()
        for job_name, json_path in tqdm.tqdm(self.infer_json_path_dict.items(), desc="Load json"):
            with open(json_path, "r") as f:
                self.infer_json_dict[job_name] = json.load(f)
        
        # check their length
        first_key = next(iter(self.infer_json_dict))
        for job_name, infer_json in self.infer_json_dict.items():
            assert len(infer_json) == len(self.infer_json_dict[first_key]), f"length not match for {job_name}"
        self._len = len(self.infer_json_dict[first_key])
    
    def __len__(self):
        return self._len
    
    def __getitem__(self, idx):
        return {job_name: infer_json[idx] for job_name, infer_json in self.infer_json_dict.items()}

infer_json_dataset = MultiInferJson(infer_json_path_dict)

def check_region_id_image_id(infer_json_dataset):
    for sample in tqdm.tqdm(infer_json_dataset, desc="Check region_id and image_id"):
        first_key = next(iter(sample))
        image_id = sample[first_key]["metadata"]["metadata_image_id"]
        region_id = sample[first_key]["metadata"]["metadata_region_id"]
        for job_name, region_pred in sample.items():
            assert image_id == region_pred["metadata"]["metadata_image_id"], f"image_id not match for {job_name}"
            assert region_id == region_pred["metadata"]["metadata_region_id"], f"region_id not match for {job_name}"

check_region_id_image_id(infer_json_dataset)
infer_json_dataset_iter = iter(infer_json_dataset)

In [None]:
def plot_one_region(infer_json_dataset, region_cnt):
    samples = infer_json_dataset[region_cnt]
    first_key = next(iter(samples))
    EVAL_DATASET_SPLIT = 'visual_genome-densecap-local-densecap-test'

    first_sample = samples[first_key]

    image_id = first_sample["metadata"]["metadata_image_id"]
    region_id = first_sample["metadata"]["metadata_region_id"]
    input_boxes = first_sample["metadata"]["metadata_input_boxes"]

    sample_cnt = image_id_to_dataset_id_mapping[EVAL_DATASET_SPLIT][str(image_id)]
    sample = eval_dataset[EVAL_DATASET_SPLIT][sample_cnt]
    image = sample["image"]

    references = first_sample["references"]

    candidates = []
    for job_name, region_pred in samples.items():
        candidates.extend(region_pred["candidates"])

    font_path = 'tmp/Arial.ttf'

    # Calculate the number of colors  
    num_colors = len(harmonious_colors)  
    # Generate a random start index  
    # start_index = random.randint(0, num_colors - 1)  
    start_index = 4
    # Select colors in a round-robin way  
    selected_colors = [harmonious_colors[(start_index + i) % num_colors] for i in range(len(candidates))]  

    captions_color = selected_colors + [(255,255,255)]
    captions = candidates + references
    
    model_color_fig_path = os.path.join(training_args.output_dir, "model_color_fig.png")
    if not os.path.exists(model_color_fig_path):
        model_name = [job_name for job_name in samples.keys()]
        model_color_fig = draw_captions(Image.new('RGB', (256, 0)), model_name, font_path, captions_color=selected_colors)
        model_color_fig.save(model_color_fig_path)
        print(f"save model_color_fig to {model_color_fig_path}")

    pil_img_with_bbox_and_captions = plot_bbox_and_captions(image, input_boxes, captions, font_path, captions_color=captions_color, margin_size=5)  
    return pil_img_with_bbox_and_captions, f"{region_cnt}-{sample_cnt}-{region_id}-{image_id}.png"

region_cnt = 0
pil_img_with_bbox_and_captions, pil_img_with_bbox_and_captions_path = plot_one_region(infer_json_dataset, region_cnt)
pil_img_with_bbox_and_captions.save(os.path.join(training_args.output_dir, pil_img_with_bbox_and_captions_path))
pil_img_with_bbox_and_captions

In [None]:
def _add_prefix_suffix_to_path(path: str, prefix: str, suffix: str) -> str:
    base_dir, filename = os.path.split(path)
    return os.path.join(base_dir, prefix + filename + suffix)

score_json_path_dict = {}
# CIDEr-D-scores.infer-visual_genome-region_descriptions_v1.2.0-test.json.json
SCORE_PREFIX = "CIDEr-D-scores."
SCORE_SUFFIX = ".json"

for k, v in infer_json_path_dict.items():
    score_json_path_dict[k] = _add_prefix_suffix_to_path(v, SCORE_PREFIX, SCORE_SUFFIX)
for job_name, score_json_path in score_json_path_dict.items():
    print(f"job_name: {job_name}")
    print(f"is exists: {os.path.exists(score_json_path)}")
    assert os.path.exists(score_json_path), f"{score_json_path} not exists"

In [None]:
import pandas as pd
import json

score_json_dict = {}
for k, v in score_json_path_dict.items():
    with open(v, "r") as f:
        score_json_dict[k] = json.load(f)
def build_score_df(score_json_dict):
    return pd.DataFrame.from_dict({k: v for k, v in score_json_dict.items()})
score_df= build_score_df(score_json_dict)
score_df

In [None]:
# plot the dist of scores for different column
import matplotlib.pyplot as plt
def plot_score_desc(score_df):
    fig, ax = plt.subplots(figsize=(5, 3))
    for col_name in score_df.columns:
        col_seris = score_df[col_name].sort_values(ascending=False)
        col_values = col_seris.values
        ax.plot(col_values, label=col_name)
    ax.legend()
    ax.set_xlabel("samples")
    ax.set_ylabel("score")
    return fig, ax
fig, ax = plot_score_desc(score_df)

In [None]:
sorted_score_df = score_df.sort_values(by="o365_vg-gpt2l-bs_64-lsj", ascending=False)
sorted_score_df

In [None]:
num_regions = len(infer_json_dataset)

sorted_score_seris = sorted_score_df.iloc[int(num_regions * 0.98521)]
region_cnt = sorted_score_seris.name

# region_cnt  = random.randint(0, num_regions-1)

score_seris = score_df.iloc[region_cnt]
pil_img_with_bbox_and_captions, pil_img_with_bbox_and_captions_path = plot_one_region(infer_json_dataset, region_cnt)
# pil_img_with_bbox_and_captions.save(pil_img_with_bbox_and_captions_path)
print(f"region_cnt: {region_cnt}\nscores: {score_seris}\nsave to: {pil_img_with_bbox_and_captions_path}")
pil_img_with_bbox_and_captions

In [None]:
import base64  
from PIL import Image  
import io
from IPython.display import display, HTML  
  
def visualize_images_html(infer_json_dataset, num_images, images_per_row=5):  
    images_html = "<table>"  
    region_cnt_random_list = np.random.randint(0, len(infer_json_dataset), num_images)
    print(f"The region cnt random list: {region_cnt_random_list}")
    for region_cnt in region_cnt_random_list:
        if region_cnt % images_per_row == 0:  
            images_html += "<tr>"
        pil_img, pil_img_note = plot_one_region(infer_json_dataset, region_cnt)
  
        # Create an in-memory bytes buffer  
        buf = io.BytesIO()  
  
        # Save the PIL image to the buffer in PNG format  
        pil_img.save(buf, format='PNG')  
  
        # Get the base64 encoded string  
        img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')  
  
        images_html += '<td><img src="data:image/png;base64,{}"  height="500"><br>{}</td>'.format(img_base64, pil_img_note)  
        if region_cnt % images_per_row == images_per_row - 1:  
            images_html += "</tr>"  
  
    images_html += "</table>"  
    print(f"html is ready!")
    display(HTML(images_html))  

visualize_images_html(infer_json_dataset, 10)

In [None]:
from flask import Flask, render_template  
from PIL import Image  
import io  
import base64  
  
app = Flask(__name__)  
  
@app.route('/')  
def home():  
    num_images = 10  
    images_per_row = 5  
    images = []  
    for i in range(num_images):  
        pil_img, pil_img_note = plot_one_region(infer_json_dataset, region_cnt)  # Assuming dataset[i] returns a tuple of (image, caption)  
        buf = io.BytesIO()  
        pil_img.save(buf, format='PNG')  
        img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')  
        images.append((img_base64, pil_img_note))  
    return render_template('tmp/home.html', images=images, images_per_row=images_per_row)  
  
if __name__ == '__main__':  
    app.run(debug=True)  