Spaces:

CDAO
/

XAITK-Gradio

Running

App Files Files Community

Modify layout and add TPH-YOLOv5 model

by emilyveenhuis - opened Jan 25, 2024

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+244

-111

Files changed (3) hide show

README.md +3 -3
app.py +235 -107
requirements.txt +6 -1

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: XAITK-Gradio
-emoji: 🐢
-colorFrom: yellow
-colorTo: green
 sdk: gradio
 sdk_version: 4.7.1
 app_file: app.py

 ---
 title: XAITK-Gradio
+emoji: 🕵️‍♂️
+colorFrom: purple
+colorTo: blue
 sdk: gradio
 sdk_version: 4.7.1
 app_file: app.py

app.py CHANGED Viewed

@@ -3,10 +3,12 @@
 # This app makes use of the saliency generation example found in the base ``xaitk-saliency`` repo [here](https://github.com/XAITK/xaitk-saliency/blob/master/examples/OcclusionSaliency.ipynb), and explores integrating ``xaitk-saliency`` with ``Gradio`` to create an interactive interface for computing saliency maps.
 import os
 import PIL.Image
 import matplotlib.pyplot as plt  # type: ignore
 import urllib
 import numpy as np
 import gradio as gr
 from gradio import (  # type: ignore
@@ -49,6 +51,7 @@ import torch
 import torchvision.transforms as transforms
 import torchvision.models as models
 import torch.nn.functional
 from smqtk_detection.impls.detect_image_objects.resnet_frcnn import ResNetFRCNN
 from xaitk_saliency.impls.gen_image_classifier_blackbox_sal.slidingwindow import SlidingWindowStack
@@ -57,7 +60,9 @@ from xaitk_saliency.impls.gen_object_detector_blackbox_sal.drise import RandomGr
 from xaitk_saliency.interfaces.gen_object_detector_blackbox_sal import GenerateObjectDetectorBlackboxSaliency
 from smqtk_detection.interfaces.detect_image_objects import DetectImageObjects
 from smqtk_classifier.interfaces.classify_image import ClassifyImage
 os.makedirs('data', exist_ok=True)
 test_image_filename = 'data/catdog.jpg'
@@ -72,7 +77,7 @@ model_input_size = (224, 224)
 model_mean = [0.485, 0.456, 0.406]
 model_loader = transforms.Compose([
     transforms.ToPILImage(),
-    transforms.Resize(model_input_size),
     transforms.ToTensor(),
     transforms.Normalize(
         mean=model_mean,
@@ -84,32 +89,32 @@ def get_sal_labels(classes_file, custom_categories_list=None):
     if not os.path.isfile(classes_file):
         url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
         _ = urllib.request.urlretrieve(url, classes_file)
     f = open(classes_file, "r")
     categories = [s.strip() for s in f.readlines()]
     if not custom_categories_list == None:
         sal_class_labels = custom_categories_list
     else:
         sal_class_labels = categories
     sal_class_idxs = [categories.index(lbl) for lbl in sal_class_labels]
     return sal_class_labels, sal_class_idxs
 def get_det_sal_labels(classes_file, custom_categories_list=None):
     if not os.path.isfile(classes_file):
         url = "https://raw.githubusercontent.com/matlab-deep-learning/Object-Detection-Using-Pretrained-YOLO-v2/main/%2Bhelper/coco-classes.txt"
         _ = urllib.request.urlretrieve(url, classes_file)
     f = open(classes_file, "r")
     categories = [s.strip() for s in f.readlines()]
     if not custom_categories_list == None:
         sal_obj_labels = custom_categories_list
     else:
         sal_obj_labels = categories
     sal_obj_idxs = [categories.index(lbl) for lbl in sal_obj_labels]
     return sal_obj_labels, sal_obj_idxs
@@ -131,9 +136,160 @@ def get_detection_model(model_choice):
         blackbox_detector = ResNetFRCNN(
             box_thresh=0.05,
             img_batch_size=1,
-            use_cuda=False
         )
     else:
         raise Exception("Unknown Input")
@@ -142,21 +298,21 @@ def get_detection_model(model_choice):
 def get_saliency_algo(sal_choice):
     if sal_choice == "RISE":
         gen_sal = RISEStack(
-            n=num_masks_state[-1],
-            s=spatial_res_state[-1],
-            p1=p1_state[-1],
-            seed=seed_state[-1],
-            threads=threads_state[-1],
             debiased=debiased_state[-1]
         )
     elif sal_choice == "SlidingWindowStack":
         gen_sal = SlidingWindowStack(
             window_size=eval(window_size_state[-1]),
             stride=eval(stride_state[-1]),
             threads=threads_state[-1]
         )
     else:
         raise Exception("Unknown Input")
@@ -168,22 +324,22 @@ def get_detection_saliency_algo(sal_choice):
             n=num_masks_state[-1],
             s=eval(occlusion_grid_state[-1]),
             p1=p1_state[-1],
-            threads=threads_state[-1],
-            seed=seed_state[-1],
         )
     elif sal_choice == "DRISE":
         gen_sal = DRISEStack(
-            n=num_masks_state[-1],
-            s=spatial_res_state[-1],
-            p1=p1_state[-1],
-            seed=seed_state[-1],
             threads=threads_state[-1]
         )
     else:
         raise Exception("Unknown Input")
     return gen_sal
@@ -202,27 +358,27 @@ class TorchResnet (ClassifyImage):
     def get_labels(self):
         return self.modified_class_labels
     def set_labels(self, class_labels):
         self.modified_class_labels = [lbl for lbl in class_labels]
     @torch.no_grad()
     def classify_images(self, image_iter):
         # Input may either be an NDaray, or some arbitrary iterable of NDarray images.
         model = get_model(img_cls_model_name[-1])
         for img in image_iter:
             image_tensor = model_loader(img).unsqueeze(0)
             if CUDA_AVAILABLE:
                 image_tensor = image_tensor.cuda()
             feature_vec = model(image_tensor)
             # Converting feature extractor output to probabilities.
             class_conf = torch.nn.functional.softmax(feature_vec, dim=1).cpu().detach().numpy().squeeze()
             # Only return the confidences for the focus classes
             yield dict(zip(sal_class_labels, class_conf[sal_class_idxs]))
     def get_config(self):
         # Required by a parent class.
         return {}
@@ -256,7 +412,7 @@ def show_slider_parameters(choice):
         return Slider(visible=True), Slider(visible=False)
     else:
         raise Exception("Unknown Input")
 # Modify checkbox parameters based on chosen saliency algorithm
 def show_debiased_checkbox(choice):
     if choice == 'RISE':
@@ -268,7 +424,7 @@ def show_debiased_checkbox(choice):
 # Function that is called after clicking the "Classify" button in the demo
 def predict(x,top_n_classes):
     image_tensor = model_loader(x).unsqueeze(0)
     if CUDA_AVAILABLE:
         image_tensor = image_tensor.cuda()
@@ -277,18 +433,18 @@ def predict(x,top_n_classes):
     class_conf = torch.nn.functional.softmax(feature_vec, dim=1).cpu().detach().numpy().squeeze()
     labels = list(zip(sal_class_labels, class_conf[sal_class_idxs].tolist()))
     final_labels = dict(sorted(labels, key=lambda t: t[1],reverse=True)[:top_n_classes])
     return final_labels, Dropdown(choices=list(final_labels))
 # Interpretation function for image classification that implements the selected saliency algorithm and generates the class-wise saliency map visualizations
-def interpretation_function(image: np.ndarray,
                         labels: dict,
-                        nth_class: str,
                         img_alpha,
                         sal_alpha,
                         sal_range_min,
                         sal_range_max):
     sal_generator = get_saliency_algo(img_cls_saliency_algo_name[-1])
     sal_generator.fill = blackbox_fill
     labels_list = labels.keys()
@@ -301,10 +457,10 @@ def interpretation_function(image: np.ndarray,
                                 sal_alpha,
                                 sal_range_min,
                                 sal_range_max)
     return fig
-def visualize_saliency_plot(image: np.ndarray,
                             class_sal_map: np.ndarray,
                             img_alpha,
                             sal_alpha,
@@ -352,20 +508,20 @@ def run_detect(input_img: np.ndarray, num_detections: int):
         conf_score = str(round(score_list[int(max_scores_index[i,0])],4))
         label_with_score = str(i) + " : "+ label_name + " - " + conf_score
         final_label.append(label_with_score)
     bboxes_list = bboxes[:,:].astype(int).tolist()
     return (input_img, list(zip([f for f in bboxes_list], [l for l in final_label]))[:num_detections]), Dropdown(choices=[l for l in final_label][:num_detections])
 # Run saliency algorithm on the object detect predictions and generate corresponding visualizations
-def run_detect_saliency(input_img: np.ndarray,
                         num_predictions,
-                        obj_label,
                         img_alpha,
                         sal_alpha,
                         sal_range_min,
                         sal_range_max):
     detect_model = get_detection_model(obj_det_model_name[-1])
     img_preds = list(list(detect_model([input_img]))[0])
     ref_preds = img_preds[:int(num_predictions)]
@@ -383,15 +539,11 @@ def run_detect_saliency(input_img: np.ndarray,
     ref_bboxes = np.array(ref_bboxes)
     ref_scores = np.array(ref_scores)
-    print(f"Ref bboxes: {ref_bboxes.shape}")
-    print(f"Ref scores: {ref_scores.shape}")
     sal_generator = get_detection_saliency_algo(obj_det_saliency_algo_name[-1])
     sal_generator.fill = blackbox_fill
     sal_maps = gen_det_saliency(input_img, detect_model, sal_generator,ref_bboxes,ref_scores)
-    print(f"Saliency maps: {sal_maps.shape}")
     nth_class_index = int(obj_label.split(' : ')[0])
     scores = sal_maps[nth_class_index,:,:]
@@ -401,7 +553,7 @@ def run_detect_saliency(input_img: np.ndarray,
                                 sal_alpha,
                                 sal_range_min,
                                 sal_range_max)
     scores = np.clip(scores, sal_range_min, sal_range_max)
     return fig
@@ -421,99 +573,74 @@ def gen_det_saliency(input_img: np.ndarray,
     return sal_maps
-with gr.Blocks() as demo:
     with Tab("Image Classification"):
         with Row():
-            with Column(scale=0.5):
                 drop_list = Dropdown(value=img_cls_model_name[-1],choices=["ResNet-18","ResNet-50"],label="Choose Model",interactive="True")
-            with Column(scale=0.5):
                 drop_list_sal = Dropdown(value=img_cls_saliency_algo_name[-1],choices=["SlidingWindowStack","RISE"],label="Choose Saliency Algorithm",interactive="True")
-        with Row():
-            with Column(scale=0.33):
                 window_size = Textbox(value=window_size_state[-1],label="Tuple of window size values (Press Enter to submit the input)",interactive=True,visible=False)
                 masks = Number(value=num_masks_state[-1],label="Number of Random Masks (Press Enter to submit the input)",interactive=True,visible=True,precision=0)
-            with Column(scale=0.33):
                 stride = Textbox(value=stride_state[-1],label="Tuple of stride values (Press Enter to submit the input)" ,interactive=True,visible=False)
                 spatial_res = Number(value=spatial_res_state[-1],label="Spatial Resolution of Masking Grid (Press Enter to submit the input)" ,interactive=True,visible=True,precision=0)
-            with Column(scale=0.33):
-                threads = Slider(value=threads_state[-1],label="Threads",interactive=True,visible=True)
-        with Row():
-            with Column(scale=0.33):
                 seed = Number(value=seed_state[-1],label="Seed (Press Enter to submit the input)",interactive=True,visible=True,precision=0)
-            with Column(scale=0.33):
                 p1 = Slider(value=p1_state[-1],label="P1",interactive=True,visible=True, minimum=0,maximum=1,step=0.1)
-            with Column(scale=0.33):
-                debiased = Checkbox(value=debiased_state[-1],label="Debiased", interactive=True, visible=True)
-        with Row():
-            with Column():
-                input_img = Image(label="Saliency Map Generation", width=640, height=480)
-                num_classes = Slider(value=2,label="Top-N class labels", interactive=True,visible=True)
-                classify = Button("Classify")
-            with Column():
-                class_label = Label(label="Predicted Class")
-            with Column():
-                with Row():
-                    class_name = Dropdown(label="Class to compute saliency",interactive=True,visible=True)
                 with Row():
                     img_alpha = Slider(value=0.7,label="Image Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1)
                     sal_alpha = Slider(value=0.3,label="Saliency Map Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1)
                 with Row():
                     min_sal_range = Slider(value=0,label="Minimum Saliency Value",interactive=True,visible=True,minimum=-1,maximum=1,step=0.05)
                     max_sal_range = Slider(value=1,label="Maximum Saliency Value",interactive=True,visible=True,minimum=-1,maximum=1,step=0.05)
-                with Row():
-                    generate_saliency = Button("Generate Saliency")
-            with Column():
-                with Tabs():
-                    with TabItem("Display interpretation with plot"):
-                        interpretation_plot = Plot()
     with Tab("Object Detection"):
         with Row():
-            with Column(scale=0.5):
-                drop_list_detect_model = Dropdown(value=obj_det_model_name[-1],choices=["Faster-RCNN"],label="Choose Model",interactive="True")
-            with Column(scale=0.5):
                 drop_list_detect_sal = Dropdown(value=obj_det_saliency_algo_name[-1],choices=["RandomGridStack","DRISE"],label="Choose Saliency Algorithm",interactive="True")
-        with Row():
-            with Column(scale=0.33):
                 masks_detect = Number(value=num_masks_state[-1],label="Number of Random Masks (Press Enter to submit the input)",interactive=True,visible=True,precision=0)
                 occlusion_grid_size = Textbox(value=occlusion_grid_state[-1],label="Tuple of occlusion grid size values (Press Enter to submit the input)",interactive=True,visible=False)
                 spatial_res_detect = Number(value=spatial_res_state[-1],label="Spatial Resolution of Masking Grid (Press Enter to submit the input)" ,interactive=True,visible=True,precision=0)
-            with Column(scale=0.33):
                 seed_detect = Number(value=seed_state[-1],label="Seed (Press Enter to submit the input)",interactive=True,visible=True,precision=0)
                 p1_detect = Slider(value=p1_state[-1],label="P1",interactive=True,visible=True, minimum=0,maximum=1,step=0.1)
-            with Column(scale=0.33):
                 threads_detect = Slider(value=threads_state[-1],label="Threads",interactive=True,visible=True)
-        with Row():
-            with Column():
-                input_img_detect = Image(label="Saliency Map Generation", width=640, height=480)
-                num_detections = Slider(value=2,label="Top-N detections", interactive=True,visible=True)
-                detection = Button("Run Detection Algorithm")
-            with Column():
-                detect_label = AnnotatedImage(label="Detections")
-            with Column():
-                with Row():
-                    class_name_det = Dropdown(label="Detection to compute saliency",interactive=True,visible=True)
                 with Row():
                     img_alpha_det = Slider(value=0.7,label="Image Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1)
                     sal_alpha_det = Slider(value=0.3,label="Saliency Map Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1)
                 with Row():
                     min_sal_range_det = Slider(value=0.95,label="Minimum Saliency Value",interactive=True,visible=True,minimum=0.80,maximum=1,step=0.05)
                     max_sal_range_det = Slider(value=1,label="Maximum Saliency Value",interactive=True,visible=True,minimum=0.80,maximum=1,step=0.05)
-                with Row():
-                    generate_det_saliency = Button("Generate Saliency")
-            with Column():
-                with Tabs():
-                    with TabItem("Display saliency map plot"):
-                        det_saliency_plot = Plot()
-    # Image Classification dropdown list event listeners
     drop_list.select(select_img_cls_model,drop_list,drop_list)
     drop_list_sal.select(select_img_cls_saliency_algo,drop_list_sal,drop_list_sal)
     drop_list_sal.change(show_textbox_parameters,drop_list_sal,[window_size,stride,masks,spatial_res,seed])
     drop_list_sal.change(show_slider_parameters,drop_list_sal,[threads,p1])
     drop_list_sal.change(show_debiased_checkbox,drop_list_sal,debiased)
-    # Image Classification textbox, slider and checkbox event listeners
     window_size.submit(enter_window_size,window_size,window_size)
     masks.submit(enter_num_masks,masks,masks)
     stride.submit(enter_stride, stride, stride)
@@ -533,7 +660,7 @@ with gr.Blocks() as demo:
     drop_list_detect_sal.change(show_slider_parameters,drop_list_detect_sal,[threads_detect,p1_detect])
     drop_list_detect_sal.change(show_textbox_parameters,drop_list_detect_sal,[masks_detect,spatial_res_detect,seed_detect,occlusion_grid_size])
-    # Object detection textbox and slider event listeners
     masks_detect.submit(enter_num_masks,masks_detect,masks_detect)
     occlusion_grid_size.submit(enter_occlusion_grid_size,occlusion_grid_size,occlusion_grid_size)
     spatial_res_detect.submit(enter_spatial_res, spatial_res_detect, spatial_res_detect)
@@ -545,4 +672,5 @@ with gr.Blocks() as demo:
     detection.click(run_detect, [input_img_detect, num_detections], [detect_label,class_name_det])
     generate_det_saliency.click(run_detect_saliency,[input_img_detect, num_detections, class_name_det, img_alpha_det, sal_alpha_det, min_sal_range_det, max_sal_range_det],det_saliency_plot)
-demo.launch(share=True)

 # This app makes use of the saliency generation example found in the base ``xaitk-saliency`` repo [here](https://github.com/XAITK/xaitk-saliency/blob/master/examples/OcclusionSaliency.ipynb), and explores integrating ``xaitk-saliency`` with ``Gradio`` to create an interactive interface for computing saliency maps.
 import os
+import sys
 import PIL.Image
 import matplotlib.pyplot as plt  # type: ignore
 import urllib
 import numpy as np
+from git import Repo
 import gradio as gr
 from gradio import (  # type: ignore
 import torchvision.transforms as transforms
 import torchvision.models as models
 import torch.nn.functional
+from torch.utils.data import Dataset, DataLoader
 from smqtk_detection.impls.detect_image_objects.resnet_frcnn import ResNetFRCNN
 from xaitk_saliency.impls.gen_image_classifier_blackbox_sal.slidingwindow import SlidingWindowStack
 from xaitk_saliency.interfaces.gen_object_detector_blackbox_sal import GenerateObjectDetectorBlackboxSaliency
 from smqtk_detection.interfaces.detect_image_objects import DetectImageObjects
 from smqtk_classifier.interfaces.classify_image import ClassifyImage
+from smqtk_image_io import AxisAlignedBoundingBox
+from typing import Iterable, Dict, Hashable, Tuple
 os.makedirs('data', exist_ok=True)
 test_image_filename = 'data/catdog.jpg'
 model_mean = [0.485, 0.456, 0.406]
 model_loader = transforms.Compose([
     transforms.ToPILImage(),
+    transforms.Resize(model_input_size),
     transforms.ToTensor(),
     transforms.Normalize(
         mean=model_mean,
     if not os.path.isfile(classes_file):
         url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
         _ = urllib.request.urlretrieve(url, classes_file)
     f = open(classes_file, "r")
     categories = [s.strip() for s in f.readlines()]
     if not custom_categories_list == None:
         sal_class_labels = custom_categories_list
     else:
         sal_class_labels = categories
     sal_class_idxs = [categories.index(lbl) for lbl in sal_class_labels]
     return sal_class_labels, sal_class_idxs
 def get_det_sal_labels(classes_file, custom_categories_list=None):
     if not os.path.isfile(classes_file):
         url = "https://raw.githubusercontent.com/matlab-deep-learning/Object-Detection-Using-Pretrained-YOLO-v2/main/%2Bhelper/coco-classes.txt"
         _ = urllib.request.urlretrieve(url, classes_file)
     f = open(classes_file, "r")
     categories = [s.strip() for s in f.readlines()]
     if not custom_categories_list == None:
         sal_obj_labels = custom_categories_list
     else:
         sal_obj_labels = categories
     sal_obj_idxs = [categories.index(lbl) for lbl in sal_obj_labels]
     return sal_obj_labels, sal_obj_idxs
         blackbox_detector = ResNetFRCNN(
             box_thresh=0.05,
             img_batch_size=1,
+            use_cuda=CUDA_AVAILABLE
+        )
+    elif model_choice == "TPH-YOLOv5":
+        dest = os.path.join(data_path, 'tph-yolov5')
+        if not os.path.isdir(dest):
+            Repo.clone_from("https://github.com/cv516Buaa/tph-yolov5.git", dest)
+        sys.path.insert(1, dest)
+        # imports from TPH-YOLOv5 github repo
+        from utils.augmentations import letterbox
+        from models.experimental import attempt_load
+        from utils.datasets import LoadImages
+        from utils.general import non_max_suppression, scale_coords
+        class YOLOVisdrone(DetectImageObjects):
+            def __init__(
+                self,
+                weights,
+                img_size=(640, 640),
+                batch_size=1,
+                conf_thresh=0.5,
+                iou_thresh=0.5,
+                use_cuda=False,
+                num_workers=4
+            ):
+                """
+                img_size: size of image input to model
+                batch_size: number of images to input as once
+                conf_thresh: confidence threshold for detection results
+                iou_thresh: IOU threshold for NMS
+                use_cuda: use CUDA device to compute detections
+                num_workers: number of worker processes to use for data loading
+                """
+                self.img_size = np.array(img_size)
+                if use_cuda:
+                    self.device = torch.device('cuda:0')
+                else:
+                    self.device = torch.device('cpu')
+                self.model = attempt_load(weights).to(self.device)
+                self.model = self.model.eval()
+                self.conf_thresh = conf_thresh
+                self.iou_thresh = iou_thresh
+                self.batch_size = batch_size
+                self.num_workers = num_workers
+                with torch.no_grad():
+                    _ = self.model(torch.zeros(1, 3, *self.img_size).to(self.device))  # warm up
+            def detect_objects(
+                self,
+                imgIter: Iterable[np.ndarray]
+            ) -> Iterable[Iterable[Tuple[AxisAlignedBoundingBox, Dict[Hashable, float]]]]:
+                # pytorch DataLoader for passed images
+                dataset = DataLoader(
+                    pytorchDataset(
+                        imgIter,
+                        img_size=self.img_size,
+                    ),
+                    batch_size=self.batch_size,
+                    num_workers=self.num_workers
+                )
+                # list of AxisAlignedBoundingBox detections to return
+                preds = []
+                for i, (img_batch, hs, ws) in enumerate(dataset):
+                    # load batch and normalize
+                    img_batch = img_batch.to(self.device)
+                    img_batch = img_batch.float()
+                    img_batch /= 255
+                    # pass through model
+                    with torch.no_grad():
+                        pred_batch = self.model(img_batch)[0]
+                    # perform NMS and scale detections to original image dimensions
+                    for img_pred, h, w in zip(pred_batch, hs, ws):
+                        img_pred = non_max_suppression(
+                            img_pred[None], conf_thres=self.conf_thresh, iou_thres=self.iou_thresh)[0]
+                        img_pred[:, :4] = scale_coords(
+                            img_batch.shape[2:], img_pred[:, :4], (h, w))
+                        img_pred = img_pred.cpu().numpy()
+                        preds.append(pred_mat_to_list(img_pred))
+                return preds
+            # requried by interface
+            def get_config(self):
+                return {}
+        class pytorchDataset(Dataset):
+            """
+            pyTorch DataLoader for images. Resizes image to model input size and
+            returns original height and width as well.
+            """
+            def __init__(self, imgs, img_size=[640, 640]):
+                self.imgs = list(imgs)
+                self.img_size = img_size
+            def __getitem__(self, idx):
+                img = self.imgs[idx]
+                h = img.shape[0]
+                w = img.shape[1]
+                img = letterbox(img, new_shape=self.img_size, auto=True)[0]
+                img = img.transpose((2, 0, 1))
+                img = np.ascontiguousarray(img)
+                return img, h, w
+            def __len__(self):
+                return len(self.imgs)
+        def pred_mat_to_list(preds):
+            """
+            Convert prediction matrix model output to AxisAlignedBoundingBox format.
+            """
+            pred_list = []
+            for pred in preds:
+                bbox = AxisAlignedBoundingBox(pred[0:2], pred[2:4])
+                CLASS_NAMES = ['pedestrian', 'people', 'bicycle', 'car', 'van', 'truck',
+                               'tricycle', 'awning-tricycle', 'bus', 'motor']
+                score_dict = dict.fromkeys(CLASS_NAMES, 0)
+                score_dict[CLASS_NAMES[int(pred[5])]] = pred[4]
+                pred_list.append((bbox, score_dict))
+            return pred_list
+        model_file = os.path.join(data_path,'tph-yolov5.pth')
+        if not os.path.isfile(model_file):
+            urllib.request.urlretrieve('https://data.kitware.com/api/v1/item/623880d04acac99f429fe3bf/download', model_file)
+        blackbox_detector = YOLOVisdrone(
+            weights=model_file,
+            img_size=(1536,1536),
+            batch_size=1,
+            use_cuda=CUDA_AVAILABLE,
+            num_workers=4,
+            conf_thresh=0.1,
+            iou_thresh=0.5
         )
     else:
         raise Exception("Unknown Input")
 def get_saliency_algo(sal_choice):
     if sal_choice == "RISE":
         gen_sal = RISEStack(
+            n=num_masks_state[-1],
+            s=spatial_res_state[-1],
+            p1=p1_state[-1],
+            seed=seed_state[-1],
+            threads=threads_state[-1],
             debiased=debiased_state[-1]
         )
     elif sal_choice == "SlidingWindowStack":
         gen_sal = SlidingWindowStack(
             window_size=eval(window_size_state[-1]),
             stride=eval(stride_state[-1]),
             threads=threads_state[-1]
         )
     else:
         raise Exception("Unknown Input")
             n=num_masks_state[-1],
             s=eval(occlusion_grid_state[-1]),
             p1=p1_state[-1],
+            threads=threads_state[-1],
+            seed=seed_state[-1],
         )
     elif sal_choice == "DRISE":
         gen_sal = DRISEStack(
+            n=num_masks_state[-1],
+            s=spatial_res_state[-1],
+            p1=p1_state[-1],
+            seed=seed_state[-1],
             threads=threads_state[-1]
         )
     else:
         raise Exception("Unknown Input")
     return gen_sal
     def get_labels(self):
         return self.modified_class_labels
     def set_labels(self, class_labels):
         self.modified_class_labels = [lbl for lbl in class_labels]
     @torch.no_grad()
     def classify_images(self, image_iter):
         # Input may either be an NDaray, or some arbitrary iterable of NDarray images.
         model = get_model(img_cls_model_name[-1])
         for img in image_iter:
             image_tensor = model_loader(img).unsqueeze(0)
             if CUDA_AVAILABLE:
                 image_tensor = image_tensor.cuda()
             feature_vec = model(image_tensor)
             # Converting feature extractor output to probabilities.
             class_conf = torch.nn.functional.softmax(feature_vec, dim=1).cpu().detach().numpy().squeeze()
             # Only return the confidences for the focus classes
             yield dict(zip(sal_class_labels, class_conf[sal_class_idxs]))
     def get_config(self):
         # Required by a parent class.
         return {}
         return Slider(visible=True), Slider(visible=False)
     else:
         raise Exception("Unknown Input")
 # Modify checkbox parameters based on chosen saliency algorithm
 def show_debiased_checkbox(choice):
     if choice == 'RISE':
 # Function that is called after clicking the "Classify" button in the demo
 def predict(x,top_n_classes):
     image_tensor = model_loader(x).unsqueeze(0)
     if CUDA_AVAILABLE:
         image_tensor = image_tensor.cuda()
     class_conf = torch.nn.functional.softmax(feature_vec, dim=1).cpu().detach().numpy().squeeze()
     labels = list(zip(sal_class_labels, class_conf[sal_class_idxs].tolist()))
     final_labels = dict(sorted(labels, key=lambda t: t[1],reverse=True)[:top_n_classes])
     return final_labels, Dropdown(choices=list(final_labels))
 # Interpretation function for image classification that implements the selected saliency algorithm and generates the class-wise saliency map visualizations
+def interpretation_function(image: np.ndarray,
                         labels: dict,
+                        nth_class: str,
                         img_alpha,
                         sal_alpha,
                         sal_range_min,
                         sal_range_max):
     sal_generator = get_saliency_algo(img_cls_saliency_algo_name[-1])
     sal_generator.fill = blackbox_fill
     labels_list = labels.keys()
                                 sal_alpha,
                                 sal_range_min,
                                 sal_range_max)
     return fig
+def visualize_saliency_plot(image: np.ndarray,
                             class_sal_map: np.ndarray,
                             img_alpha,
                             sal_alpha,
         conf_score = str(round(score_list[int(max_scores_index[i,0])],4))
         label_with_score = str(i) + " : "+ label_name + " - " + conf_score
         final_label.append(label_with_score)
     bboxes_list = bboxes[:,:].astype(int).tolist()
     return (input_img, list(zip([f for f in bboxes_list], [l for l in final_label]))[:num_detections]), Dropdown(choices=[l for l in final_label][:num_detections])
 # Run saliency algorithm on the object detect predictions and generate corresponding visualizations
+def run_detect_saliency(input_img: np.ndarray,
                         num_predictions,
+                        obj_label,
                         img_alpha,
                         sal_alpha,
                         sal_range_min,
                         sal_range_max):
     detect_model = get_detection_model(obj_det_model_name[-1])
     img_preds = list(list(detect_model([input_img]))[0])
     ref_preds = img_preds[:int(num_predictions)]
     ref_bboxes = np.array(ref_bboxes)
     ref_scores = np.array(ref_scores)
     sal_generator = get_detection_saliency_algo(obj_det_saliency_algo_name[-1])
     sal_generator.fill = blackbox_fill
     sal_maps = gen_det_saliency(input_img, detect_model, sal_generator,ref_bboxes,ref_scores)
     nth_class_index = int(obj_label.split(' : ')[0])
     scores = sal_maps[nth_class_index,:,:]
                                 sal_alpha,
                                 sal_range_min,
                                 sal_range_max)
     scores = np.clip(scores, sal_range_min, sal_range_max)
     return fig
     return sal_maps
+with gr.Blocks() as xaitk_demo:
     with Tab("Image Classification"):
         with Row():
+            with Column():
                 drop_list = Dropdown(value=img_cls_model_name[-1],choices=["ResNet-18","ResNet-50"],label="Choose Model",interactive="True")
+                input_img = Image(label="Input Image")
+                num_classes = Slider(value=2,label="Top-N Class Labels", interactive=True,visible=True)
+                classify = Button("Classify")
+                class_label = Label(label="Predictions")
+                class_name = Dropdown(label="Class to Compute Saliency",interactive=True,visible=True)
+            with Column():
                 drop_list_sal = Dropdown(value=img_cls_saliency_algo_name[-1],choices=["SlidingWindowStack","RISE"],label="Choose Saliency Algorithm",interactive="True")
                 window_size = Textbox(value=window_size_state[-1],label="Tuple of window size values (Press Enter to submit the input)",interactive=True,visible=False)
                 masks = Number(value=num_masks_state[-1],label="Number of Random Masks (Press Enter to submit the input)",interactive=True,visible=True,precision=0)
                 stride = Textbox(value=stride_state[-1],label="Tuple of stride values (Press Enter to submit the input)" ,interactive=True,visible=False)
                 spatial_res = Number(value=spatial_res_state[-1],label="Spatial Resolution of Masking Grid (Press Enter to submit the input)" ,interactive=True,visible=True,precision=0)
+                debiased = Checkbox(value=debiased_state[-1],label="Debiased", interactive=True, visible=True)
                 seed = Number(value=seed_state[-1],label="Seed (Press Enter to submit the input)",interactive=True,visible=True,precision=0)
                 p1 = Slider(value=p1_state[-1],label="P1",interactive=True,visible=True, minimum=0,maximum=1,step=0.1)
+                threads = Slider(value=threads_state[-1],label="Threads",interactive=True,visible=True)
+                with Tabs():
+                    with TabItem("Display Interpretation with Plot"):
+                        interpretation_plot = Plot()
                 with Row():
                     img_alpha = Slider(value=0.7,label="Image Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1)
                     sal_alpha = Slider(value=0.3,label="Saliency Map Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1)
                 with Row():
                     min_sal_range = Slider(value=0,label="Minimum Saliency Value",interactive=True,visible=True,minimum=-1,maximum=1,step=0.05)
                     max_sal_range = Slider(value=1,label="Maximum Saliency Value",interactive=True,visible=True,minimum=-1,maximum=1,step=0.05)
+                generate_saliency = Button("Generate Saliency")
     with Tab("Object Detection"):
         with Row():
+            with Column():
+                drop_list_detect_model = Dropdown(value=obj_det_model_name[-1],choices=["Faster-RCNN", "TPH-YOLOv5"],label="Choose Model",interactive="True")
+                input_img_detect = Image(label="Input Image")
+                num_detections = Slider(value=2,label="Top-N Detections", interactive=True,visible=True)
+                detection = Button("Run Detection Algorithm")
+                detect_label = AnnotatedImage(label="Detections")
+                class_name_det = Dropdown(label="Detection to Compute Saliency",interactive=True,visible=True)
+            with Column():
                 drop_list_detect_sal = Dropdown(value=obj_det_saliency_algo_name[-1],choices=["RandomGridStack","DRISE"],label="Choose Saliency Algorithm",interactive="True")
                 masks_detect = Number(value=num_masks_state[-1],label="Number of Random Masks (Press Enter to submit the input)",interactive=True,visible=True,precision=0)
                 occlusion_grid_size = Textbox(value=occlusion_grid_state[-1],label="Tuple of occlusion grid size values (Press Enter to submit the input)",interactive=True,visible=False)
                 spatial_res_detect = Number(value=spatial_res_state[-1],label="Spatial Resolution of Masking Grid (Press Enter to submit the input)" ,interactive=True,visible=True,precision=0)
                 seed_detect = Number(value=seed_state[-1],label="Seed (Press Enter to submit the input)",interactive=True,visible=True,precision=0)
                 p1_detect = Slider(value=p1_state[-1],label="P1",interactive=True,visible=True, minimum=0,maximum=1,step=0.1)
                 threads_detect = Slider(value=threads_state[-1],label="Threads",interactive=True,visible=True)
+                with Tabs():
+                    with TabItem("Display saliency map plot"):
+                        det_saliency_plot = Plot()
                 with Row():
                     img_alpha_det = Slider(value=0.7,label="Image Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1)
                     sal_alpha_det = Slider(value=0.3,label="Saliency Map Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1)
                 with Row():
                     min_sal_range_det = Slider(value=0.95,label="Minimum Saliency Value",interactive=True,visible=True,minimum=0.80,maximum=1,step=0.05)
                     max_sal_range_det = Slider(value=1,label="Maximum Saliency Value",interactive=True,visible=True,minimum=0.80,maximum=1,step=0.05)
+                generate_det_saliency = Button("Generate Saliency")
+    # Image Classification dropdown list event listeners
     drop_list.select(select_img_cls_model,drop_list,drop_list)
     drop_list_sal.select(select_img_cls_saliency_algo,drop_list_sal,drop_list_sal)
     drop_list_sal.change(show_textbox_parameters,drop_list_sal,[window_size,stride,masks,spatial_res,seed])
     drop_list_sal.change(show_slider_parameters,drop_list_sal,[threads,p1])
     drop_list_sal.change(show_debiased_checkbox,drop_list_sal,debiased)
+    # Image Classification textbox, slider and checkbox event listeners
     window_size.submit(enter_window_size,window_size,window_size)
     masks.submit(enter_num_masks,masks,masks)
     stride.submit(enter_stride, stride, stride)
     drop_list_detect_sal.change(show_slider_parameters,drop_list_detect_sal,[threads_detect,p1_detect])
     drop_list_detect_sal.change(show_textbox_parameters,drop_list_detect_sal,[masks_detect,spatial_res_detect,seed_detect,occlusion_grid_size])
+    # Object detection textbox and slider event listeners
     masks_detect.submit(enter_num_masks,masks_detect,masks_detect)
     occlusion_grid_size.submit(enter_occlusion_grid_size,occlusion_grid_size,occlusion_grid_size)
     spatial_res_detect.submit(enter_spatial_res, spatial_res_detect, spatial_res_detect)
     detection.click(run_detect, [input_img_detect, num_detections], [detect_label,class_name_det])
     generate_det_saliency.click(run_detect_saliency,[input_img_detect, num_detections, class_name_det, img_alpha_det, sal_alpha_det, min_sal_range_det, max_sal_range_det],det_saliency_plot)
+xaitk_demo.launch(show_error=True)

requirements.txt CHANGED Viewed

@@ -2,4 +2,9 @@ xaitk-saliency
 torch
 torchvision
 urllib3
-Pillow

 torch
 torchvision
 urllib3
+Pillow
+gitpython
+# tph-yolov5
+opencv-python
+seaborn