import json
import os
import random

import numpy as np
import torch

from PIL import Image
from torch.utils.data import Dataset

from xtuner.registry import BUILDER

from pycocotools.coco import COCO

from projects.lisa.datasets.utils import SEG_QUESTIONS, ANSWER_LIST, DEFAULT_IMAGE_TOKEN

class ADE20kSemanticSegDataset(Dataset):

    def __init__(self,
                 data_path,
                 processor=None,
                 extra_image_processor=None,
                 image_folder=None,
                 num_classes_per_sample=3,
                 ):
        self.begin_str = f'{DEFAULT_IMAGE_TOKEN}\n'
        if processor:
            self.processor = BUILDER.build(processor)
        if extra_image_processor is not None:
            self.extra_image_processor = BUILDER.build(extra_image_processor)

        self.image_folder = image_folder
        self.num_classes_per_sample = num_classes_per_sample

        self.data = self._load_annotations(data_path, image_folder)
        self._max_refetch = 1000

    def decode_mask(self, label_path):
        label = np.array(Image.open(label_path))
        
        # ade20k
        label = np.where(label == 0, 255, label - 1)
        unique_labels = [lbl for lbl in np.unique(label) if lbl != 255]
        if not unique_labels:
            return None, None

        selected_labels = np.random.choice(unique_labels, min(
            len(unique_labels), self.num_classes_per_sample), replace=False)
        label = torch.from_numpy(label).long()
        masks = torch.stack(
            [label == class_id for class_id in selected_labels], dim=0)
        return masks, selected_labels

    def _load_annotations(self, data_path, image_folder=None):
        with open(data_path, 'r') as file:
            ade20k_classes = json.load(file)
        ade20k_image_dir = image_folder
        ade20k_images = [os.path.join(ade20k_image_dir, img) for img in os.listdir(
            ade20k_image_dir) if img.endswith('.jpg')]
        ade20k_labels = [img.replace(".jpg", ".png").replace(
            "images", "annotations") for img in ade20k_images]
        self.classes = np.array(ade20k_classes)

        ret = []
        for image, label in zip(ade20k_images, ade20k_labels):
            ret.append({"image": image, "label": label})
        return ret

    def __getitem__(self, index):
        for _ in range(self._max_refetch + 1):
            data = self.prepare_data(index)
            # Broken images may cause the returned data to be None
            if data is None:
                index = self._rand_another()
                continue
            return data

    def __len__(self):
        return len(self.data)

    @property
    def modality_length(self):
        self.group_length = []
        for data_dict in self.data:
            self.group_length.append(100)
        return self.group_length

    @property
    def length(self):
        group_length = np.array(self.group_length)
        group_length = np.abs(group_length).tolist()
        return group_length

    def _parse_annotations(self, ann_info):
        assert 'label' in ann_info
        masks, class_id = self.decode_mask(ann_info['label'])
        ann_info['masks'] = masks
        if class_id is None:
            return None
        conversation = []
        for i, c_id in enumerate(class_id):
            question = random.choice(SEG_QUESTIONS).format(
                class_name=self.classes[c_id].lower())
            answer = random.choice(ANSWER_LIST)
            if i == 0:
                question = self.begin_str + question
            conversation.append({'from': 'human', 'value': question})
            conversation.append({'from': 'gpt', 'value': answer})
        ann_info['conversations'] = conversation
        return ann_info
    
    def prepare_data(self, index):
        data_dict: dict = self.data[index]
        data_dict = self._parse_annotations(data_dict)
        if data_dict is None:
            return None
        out_data_dict = self.processor(data_dict)
        if 'masks' in data_dict:
            out_data_dict['masks'] = data_dict['masks']
        if data_dict.get('image', None) and hasattr(self, 'extra_image_processor'):
            image_file = data_dict['image']
            try:
                image = Image.open(image_file).convert('RGB')
            except Exception as e:
                return None
            g_image = np.array(image)  # for grounding
            g_image = self.extra_image_processor.apply_image(g_image)
            g_pixel_values = torch.from_numpy(g_image).permute(2, 0, 1).contiguous()
            out_data_dict['g_pixel_values'] = g_pixel_values
        return out_data_dict

    def _rand_another(self) -> int:
        return np.random.randint(0, len(self.data))


class COCOStuffSemanticSegDataset(ADE20kSemanticSegDataset):
    def __init__(self,
                 image_folder,
                 data_path=None,
                 num_classes_per_sample=3,
                 processor=None,
                 extra_image_processor=None,):
         
        super().__init__(
            image_folder=image_folder,
            data_path=data_path,
            num_classes_per_sample=num_classes_per_sample,
            processor=processor,
            extra_image_processor=extra_image_processor,
        )
        self.cocostuff_class2index = {c: i for i, c in enumerate(self.classes)}

    def _load_annotations(self, data_path, image_folder):
        # coco stuff
        with open(data_path, 'r') as file:
            cocostuff_classes = [line.strip().split(": ")[-1] for line in file.readlines()[1:]]
        files = os.listdir(image_folder)
        coco_stuff_images = [os.path.join('./data/coco/train2017/', img.replace('png', 'jpg')) for img in files]
        coco_stuff_labels = [os.path.join(image_folder, img) for img in files]

        self.classes = np.array(cocostuff_classes)

        ret = []
        for image, label in zip(coco_stuff_images, coco_stuff_labels):
            ret.append({"image": image, "label": label})
        return ret

    def decode_mask(self, label_path):
        label = np.array(Image.open(label_path))
        ignored_classes = [index for class_name,
                           index in self.cocostuff_class2index.items() if "-" in class_name]
        label = np.where(np.isin(label, ignored_classes), 255, label)

        unique_labels = [lbl for lbl in np.unique(label) if lbl != 255]
        if not unique_labels:
            print("No valid label !!!")
            return None, None

        selected_labels = np.random.choice(unique_labels, min(
            len(unique_labels), self.num_classes_per_sample), replace=False)

        label = torch.from_numpy(label).long()
        masks = torch.stack(
            [label == class_id for class_id in selected_labels], dim=0)
        return masks, selected_labels

class PascalPartSemanticSegDataset(ADE20kSemanticSegDataset):

    def _load_annotations(self, data_path, image_folder):
        self.coco_api = COCO(data_path)
        img_ids = self.coco_api.getImgIds()
        all_classes = self.coco_api.loadCats(self.coco_api.getCatIds())
        class_map_pascal_part = {}
        for cat in all_classes:
            cat_main, cat_part = cat["name"].strip().split(":")
            name = (cat_main, cat_part)
            class_map_pascal_part[cat["id"]] = name
        self.classes = class_map_pascal_part
        return img_ids

    def decode_mask(self, img_id):
        annotation_ids = self.coco_api.getAnnIds(imgIds=img_id)
        annotations = self.coco_api.loadAnns(annotation_ids)
        sampled_anns = np.random.choice(annotations, min(
            len(annotations), self.num_classes_per_sample), replace=False)
        masks = [self.coco_api.annToMask(ann) for ann in sampled_anns]
        masks = np.stack(masks, axis=0)
        masks = torch.from_numpy(masks)
        return masks, [ann['category_id'] for ann in sampled_anns]

    def _parse_annotations(self, img_id):
        masks, class_id = self.decode_mask(img_id)
        img_info = self.coco_api.loadImgs(img_id)[0]
        ann_info = {'masks': masks, 'image': os.path.join(self.image_folder, img_info['file_name'])}
        if class_id is None:
            return None
        conversation = []
        for i, c_id in enumerate(class_id):
            sampled_cls = self.classes[c_id]
            if isinstance(sampled_cls, tuple):
                obj, part = sampled_cls
                name = f"{obj} {part}" if random.random() < 0.5 else f"the {part} of the {obj}"
            else:
                name = sampled_cls
            question = random.choice(SEG_QUESTIONS).format(class_name=name)
            answer = random.choice(ANSWER_LIST)
            if i == 0:
                question = self.begin_str + question
            conversation.append({'from': 'human', 'value': question})
            conversation.append({'from': 'gpt', 'value': answer})
        ann_info['conversations'] = conversation
        return ann_info

class PacoSemanticSegDataset(PascalPartSemanticSegDataset):
    def _load_annotations(self, data_path, image_folder):
        self.coco_api = COCO(data_path)
        all_classes = self.coco_api.loadCats(self.coco_api.getCatIds())
        class_map_paco = {}
        for cat in all_classes:
            cat_split = cat["name"].strip().split(":")
            if len(cat_split) == 1:
                name = cat_split[0].split("_(")[0]
            else:
                assert len(cat_split) == 2
                obj, part = cat_split
                obj = obj.split("_(")[0]
                part = part.split("_(")[0]
                name = (obj, part)
            class_map_paco[cat["id"]] = name
        self.classes = class_map_paco
        return self.coco_api.getImgIds()

class MapillarySemanticSegDataset(ADE20kSemanticSegDataset):
    def _load_annotations(self, data_path, image_folder=None):
        mapillary_classes = [cls["readable"].lower() for cls in json.load(open(data_path))["labels"]]
        mapillary_images = [os.path.join(image_folder, img) for img in os.listdir(image_folder)]
        mapillary_labels = [img.replace(".jpg", ".png").replace("images", "v2.0/labels") for img in mapillary_images]
        self.classes = np.array(mapillary_classes)
        ret = []
        for image, label in zip(mapillary_images, mapillary_labels):
            ret.append({"image": image, "label": label})
        return ret

    def decode_mask(self, label_path):
        label = np.array(Image.open(label_path))
        unique_labels = [lbl for lbl in np.unique(label) if lbl != 255]
        if not unique_labels:
            return None, None
        selected_labels = np.random.choice(unique_labels, min(
            len(unique_labels), self.num_classes_per_sample), replace=False)
        label = torch.from_numpy(label).long()
        masks = torch.stack(
            [label == class_id for class_id in selected_labels], dim=0)
        return masks, selected_labels

class PartimagenetSemanticSegDataset(ADE20kSemanticSegDataset):
    pass

if __name__ == '__main__':
    from third_parts.segment_anything.utils.transforms import ResizeLongestSide
    from projects.lisa.processor.internvl_processor import InternVLProcessor
    processor = dict(
        type=InternVLProcessor,
        pretrained_model_name_or_path='OpenGVLab/InternVL2-4B'
    )
    extra_image_processor=dict(
        type=ResizeLongestSide,
        target_length=1024,
    )
    dataset = ADE20kSemanticSegDataset(
        data_path='projects/omg_llava/dataset/utils/ade20k_classes.json',
        image_folder='./data/ade20k/images/training/',
        extra_image_processor=extra_image_processor,
        processor=processor,
    )
    for i in range(len(dataset)):
        data = dataset[i]