Upload 9 files

Browse files

Files changed (9) hide show

eval_tables_only.py +351 -0
five_connect.py +61 -0
grad_cam_CNN.py +74 -0
mask_connect_test.py +54 -0
params_flops.py +53 -0
requirements.txt +13 -0
test.py +128 -0
train.py +389 -0
utilss.py +249 -0

eval_tables_only.py ADDED Viewed

	@@ -0,0 +1,351 @@

+import torch
+from torch import nn
+from tqdm import tqdm
+import prettytable
+import time
+import os
+import multiprocessing.pool as mpp
+import multiprocessing as mp
+from train import *
+import argparse
+from utils.config import Config
+from tools.mask_convert import mask_save
+import numpy as np  # [PR] for histogram-based PR accumulation
+import csv
+# =========================== [PR] Utilities BEGIN ===========================
+class PRHistogram:
+    # Memory-friendly PR accumulator. Call update(probs, mask) repeatedly inside
+    # your test loop, then call export_csv(path) after the loop.
+    # - probs: torch.Tensor in [0,1], shape [B,H,W], "change" probability
+    # - mask: torch.Tensor of 0/1 (or 0/255), shape [B,H,W]
+    def __init__(self, nbins: int = 1000):
+        import numpy as _np
+        self.nbins = int(nbins)
+        self.pos_hist = _np.zeros(self.nbins, dtype=_np.int64)
+        self.neg_hist = _np.zeros(self.nbins, dtype=_np.int64)
+        self.bin_edges = _np.linspace(0.0, 1.0, self.nbins + 1)
+    def update(self, probs, mask):
+        import numpy as _np
+        p = probs.detach().float().cpu().numpy().ravel()
+        g = (mask.detach().cpu().numpy().ravel() > 0).astype(_np.uint8)
+        pos_counts, _ = _np.histogram(p[g == 1], bins=self.bin_edges)
+        neg_counts, _ = _np.histogram(p[g == 0], bins=self.bin_edges)
+        self.pos_hist += pos_counts
+        self.neg_hist += neg_counts
+    def compute_curve(self):
+        import numpy as _np
+        # 累加得到从高阈值到低阈值的 TP/FP
+        pos_cum = _np.cumsum(self.pos_hist[::-1])
+        neg_cum = _np.cumsum(self.neg_hist[::-1])
+        TP = pos_cum
+        FP = neg_cum
+        FN = self.pos_hist.sum() - TP
+        TN = None  # 曲线里用不到 TN
+        denom_prec = _np.maximum(TP + FP, 1)
+        denom_rec  = _np.maximum(TP + FN, 1)
+        precision  = TP / denom_prec
+        recall     = TP / denom_rec
+        # F1 = 2PR/(P+R)
+        denom_f1 = _np.maximum(precision + recall, 1e-12)
+        f1 = 2.0 * precision * recall / denom_f1
+        # IoU = TP / (TP + FP + FN)
+        denom_iou = _np.maximum(TP + FP + FN, 1)
+        iou = TP / denom_iou
+        thresholds = self.bin_edges[::-1][1:]  # 与上述累积方向一致的阈值序列
+        return thresholds, precision, recall, f1, iou, TP, FP, FN
+    def export_csv(self, save_path: str):
+        thresholds, precision, recall, f1, iou, TP, FP, FN = self.compute_curve()
+        import numpy as _np, os as _os
+        _os.makedirs(_os.path.dirname(save_path), exist_ok=True)
+        _np.savetxt(
+            save_path,
+            _np.column_stack([thresholds, precision, recall, f1, iou, TP, FP, FN]),
+            delimiter=",",
+            header="threshold,precision,recall,f1,iou,TP,FP,FN",
+            comments=""
+        )
+        return save_path
+# Global PR object (create when needed)
+_PR = None
+def pr_init(nbins: int = 1000):
+    global _PR
+    if _PR is None:
+        _PR = PRHistogram(nbins=nbins)
+    return _PR
+def pr_update_from_outputs(raw_predictions, mask, cfg):
+    # Try to derive probs ∈ [0,1] from various model outputs in this repo.
+    # This covers:
+    #   - cfg.argmax=True: 2-channel logits -> softmax class-1 prob
+    #   - single-channel logits -> sigmoid
+    #   - net == 'maskcd' (list/tuple outputs)
+    # Modify here if your network has a special head.
+    import torch
+    global _PR
+    if _PR is None:
+        _PR = PRHistogram(nbins=1000)
+    if getattr(cfg, 'argmax', False):
+        logits = raw_predictions
+        if logits.dim() == 4 and logits.size(1) >= 2:
+            probs = torch.softmax(logits, dim=1)[:, 1, :, :]
+        else:
+            probs = torch.sigmoid(logits.squeeze(1))
+    else:
+        if getattr(cfg, 'net', '') == 'maskcd':
+            if isinstance(raw_predictions, (list, tuple)):
+                logits = raw_predictions[0]
+            else:
+                logits = raw_predictions
+            probs = torch.sigmoid(logits).squeeze(1)
+        else:
+            logits = raw_predictions
+            if logits.dim() == 4 and logits.size(1) == 1:
+                logits = logits.squeeze(1)
+            probs = torch.sigmoid(logits)
+    if mask.dim() == 4 and mask.size(1) == 1:
+        mask_ = mask.squeeze(1)
+    else:
+        mask_ = mask
+    _PR.update(probs, (mask_ > 0).to(probs.dtype))
+def pr_export(base_dir: str, cfg):
+    # Export PR CSV to base_dir/pr_<net>.csv
+    import os
+    global _PR
+    if _PR is None:
+        return None
+    save_path = os.path.join(base_dir, f"pr_{getattr(cfg,'net','model')}.csv")
+    out = _PR.export_csv(save_path)
+    print(f"[PR] saved: {out}")
+    return out
+# ============================ [PR] Utilities END ============================
+# -------------------- [Per-Image] 逐图指标工具 --------------------
+def _safe_div(a, b, eps=1e-12):
+    return a / max(b, eps)
+def per_image_stats(pred_np: np.ndarray, gt_np: np.ndarray):
+    """
+    pred_np, gt_np: 0/1 二值 numpy 数组, shape [H,W]
+    返回: dict 包含 TP/FP/TN/FN 与各类指标
+    """
+    pred_bin = (pred_np > 0).astype(np.uint8)
+    gt_bin   = (gt_np  > 0).astype(np.uint8)
+    TP = int(((pred_bin == 1) & (gt_bin == 1)).sum())
+    FP = int(((pred_bin == 1) & (gt_bin == 0)).sum())
+    TN = int(((pred_bin == 0) & (gt_bin == 0)).sum())
+    FN = int(((pred_bin == 0) & (gt_bin == 1)).sum())
+    precision = _safe_div(TP, (TP + FP))
+    recall    = _safe_div(TP, (TP + FN))
+    f1        = _safe_div(2 * precision * recall, (precision + recall))
+    iou       = _safe_div(TP, (TP + FP + FN))
+    oa        = _safe_div(TP + TN, (TP + TN + FP + FN))
+    return {
+        "TP": TP, "FP": FP, "TN": TN, "FN": FN,
+        "OA": oa, "Precision": precision, "Recall": recall, "F1": f1, "IoU": iou
+    }
+# --------------------------------------------------------------------
+def get_args():
+    parser = argparse.ArgumentParser('description=Change detection of remote sensing images')
+    parser.add_argument("-c", "--config", type=str, default="configs/cdlama.py")
+    parser.add_argument("--ckpt", type=str, default=None)
+    parser.add_argument("--output_dir", type=str, default=None)
+    # 新增：仅生成表格模式（不导出可视化图片）
+    parser.add_argument("--tables-only", action="store_true",
+                        help="仅生成表格与CSV（总体表、逐图CSV、逐图TXT、小计PR曲线CSV），不生成mask可视化图片")
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = get_args()
+    cfg = Config.fromfile(args.config)
+    ckpt = args.ckpt
+    if ckpt is None:
+        ckpt = cfg.test_ckpt_path
+    assert ckpt is not None
+    if args.output_dir:
+        base_dir = args.output_dir
+    else:
+        base_dir = os.path.dirname(ckpt)
+    # 原图像输出目录（仅在需要写图时使用）
+    masks_output_dir = os.path.join(base_dir, "mask_rgb")
+    # 表格输出目录（逐图表格 .txt），如果 tables-only 则单独放在 tables_only 下
+    tables_output_dir = os.path.join(base_dir, "tables_only" if args.tables_only else "mask_rgb")
+    os.makedirs(tables_output_dir, exist_ok=True)
+    model = myTrain.load_from_checkpoint(ckpt, map_location={'cuda:1':'cuda:0'}, cfg = cfg)
+    model = model.to('cuda')
+    model.eval()
+    metric_cfg_1 = cfg.metric_cfg1
+    metric_cfg_2 = cfg.metric_cfg2
+    test_oa=torchmetrics.Accuracy(**metric_cfg_1).to('cuda')
+    test_prec = torchmetrics.Precision(**metric_cfg_2).to('cuda')
+    test_recall = torchmetrics.Recall(**metric_cfg_2).to('cuda')
+    test_f1 = torchmetrics.F1Score(**metric_cfg_2).to('cuda')
+    test_iou=torchmetrics.JaccardIndex(**metric_cfg_2).to('cuda')
+    results = []          # 仅在生成图片时使用
+    per_image_rows = []   # [Per-Image] 收集逐图指标
+    with torch.no_grad():
+        test_loader = build_dataloader(cfg.dataset_config, mode='test')
+        # === 调用1: 初始化 ===
+        pr_init(nbins=1000)
+        for input in tqdm(test_loader):
+            raw_predictions, mask, img_id = model(input[0].cuda(), input[1].cuda()), input[2].cuda(), input[3]
+            # === 调用2: 更新 ===
+            pr_update_from_outputs(raw_predictions, mask, cfg)
+            if cfg.net == 'SARASNet':
+                mask = Variable(resize_label(mask.data.cpu().numpy(), \
+                                        size=raw_predictions.data.cpu().numpy().shape[2:]).to('cuda')).long()
+                param = 1  # This parameter is balance precision and recall to get higher F1-score
+                raw_predictions[:,1,:,:] = raw_predictions[:,1,:,:] + param
+            if cfg.argmax:
+                pred = raw_predictions.argmax(dim=1)
+            else:
+                if cfg.net == 'maskcd':
+                    pred = raw_predictions[0]
+                    pred = pred > 0.5
+                    pred.squeeze_(1)
+                else:
+                    pred = raw_predictions.squeeze(1)
+                    pred = pred > 0.5
+            # ====== 累计整体验证指标 ======
+            test_oa(pred, mask)
+            test_iou(pred, mask)
+            test_prec(pred, mask)
+            test_f1(pred, mask)
+            test_recall(pred, mask)
+            # ====== [Per-Image] 逐图指标计算与收集 ======
+            for i in range(raw_predictions.shape[0]):
+                mask_real = mask[i].detach().cpu().numpy()
+                mask_pred = pred[i].detach().cpu().numpy()
+                mask_name = str(img_id[i])
+                # 逐图统计
+                stats = per_image_stats(mask_pred, mask_real)
+                per_image_rows.append({
+                    "img_id": mask_name,
+                    "TP": stats["TP"], "FP": stats["FP"], "TN": stats["TN"], "FN": stats["FN"],
+                    "OA": stats["OA"], "Precision": stats["Precision"],
+                    "Recall": stats["Recall"], "F1": stats["F1"], "IoU": stats["IoU"]
+                })
+                # 仅在需要生成可视化图片时才收集写图任务
+                if not args.tables_only:
+                    results.append((mask_real, mask_pred, masks_output_dir, mask_name))
+    # ====== 打印总体指标 ======
+    metrics = [test_prec.compute(),
+               test_recall.compute(),
+               test_f1.compute(),
+               test_iou.compute()]
+    total_metrics = [test_oa.compute().cpu().numpy(),
+                     np.mean([item.cpu() for item in metrics[0]]),
+                     np.mean([item.cpu() for item in metrics[1]]),
+                     np.mean([item.cpu() for item in metrics[2]]),
+                     np.mean([item.cpu() for item in metrics[3]])]
+    result_table = prettytable.PrettyTable()
+    result_table.field_names = ['Class', 'OA', 'Precision', 'Recall', 'F1_Score', 'IOU']
+    for i in range(2):
+        item = [i, '--']
+        for j in range(len(metrics)):
+            item.append(np.round(metrics[j][i].cpu().numpy(), 4))
+        result_table.add_row(item)
+    total = [np.round(v, 4) for v in total_metrics]
+    total.insert(0, 'total')
+    result_table.add_row(total)
+    print(result_table)
+    file_name = os.path.join(base_dir, "test_res.txt")
+    f = open(file_name,"a")
+    current_time = time.strftime('%Y_%m_%d %H:%M:%S {}'.format(cfg.net),time.localtime(time.time()))
+    f.write(current_time+'\n')
+    f.write(str(result_table)+'\n')
+    # ====== 根据模式选择是否写图 ======
+    if not args.tables_only:
+        if not os.path.exists(masks_output_dir):
+            os.makedirs(masks_output_dir)
+        print(masks_output_dir)
+        # 多进程写图
+        t0 = time.time()
+        mpp.Pool(processes=mp.cpu_count()).map(mask_save, results)
+        t1 = time.time()
+        img_write_time = t1 - t0
+        print('images writing spends: {} s'.format(img_write_time))
+    else:
+        print("[Mode] --tables-only: 跳过可视化图片的生成，仅导出表格/CSV。")
+    # ====== [Per-Image] 将逐图指标写成一个总 CSV ======
+    per_image_csv = os.path.join(base_dir, f"per_image_metrics_{getattr(cfg,'net','model')}.csv")
+    with open(per_image_csv, "w", newline="") as wf:
+        writer = csv.DictWriter(
+            wf,
+            fieldnames=["img_id","TP","FP","TN","FN","OA","Precision","Recall","F1","IoU"]
+        )
+        writer.writeheader()
+        for row in per_image_rows:
+            row_out = dict(row)
+            for k in ["OA","Precision","Recall","F1","IoU"]:
+                row_out[k] = float(np.round(row_out[k], 6))
+            writer.writerow(row_out)
+    print(f"[Per-Image] saved CSV: {per_image_csv}")
+    # ====== [Per-Image] 为每张图各自写一个小表（.txt） ======
+    for row in per_image_rows:
+        txt_path = os.path.join(tables_output_dir, f"{row['img_id']}_metrics.txt")
+        pt = prettytable.PrettyTable()
+        pt.field_names = ["Metric", "Value"]
+        # 先放混淆矩阵元素
+        pt.add_row(["TP", row["TP"]])
+        pt.add_row(["FP", row["FP"]])
+        pt.add_row(["TN", row["TN"]])
+        pt.add_row(["FN", row["FN"]])
+        # 再放比率类指标
+        pt.add_row(["OA",       f"{row['OA']:.6f}"])
+        pt.add_row(["Precision",f"{row['Precision']:.6f}"])
+        pt.add_row(["Recall",   f"{row['Recall']:.6f}"])
+        pt.add_row(["F1",       f"{row['F1']:.6f}"])
+        pt.add_row(["IoU",      f"{row['IoU']:.6f}"])
+        with open(txt_path, "w") as wf:
+            wf.write(str(pt))
+    print(f"[Per-Image] per-image tables saved to: {tables_output_dir}")
+# ===== [PR] Export at program end =====
+try:
+    pr_export(base_dir, cfg)
+except Exception as e:
+    print(f"[PR] export skipped or failed: {e}")

five_connect.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import cv2
+import os
+import glob
+import numpy as np
+def concat_image_heatmap(img1_path, img2_path, label_path, mask_path, heatmap_path, output_path):
+    img1 = cv2.imread(img1_path)
+    img2 = cv2.imread(img2_path)
+    mask = cv2.imread(mask_path)
+    heatmap = cv2.imread(heatmap_path)
+    label = cv2.imread(label_path) if label_path and os.path.exists(label_path) else None
+    if img1 is None or img2 is None or mask is None or heatmap is None:
+        print(f"❌ Missing image: {img1_path}, {img2_path}, {mask_path}, {heatmap_path}")
+        return
+    h, w = img1.shape[:2]
+    img2 = cv2.resize(img2, (w, h))
+    mask = cv2.resize(mask, (w, h))
+    heatmap = cv2.resize(heatmap, (w, h))
+    label = cv2.resize(label, (w, h)) if label is not None else np.zeros_like(img1)
+    top_row = np.concatenate([img1, img2, label], axis=1)
+    bottom_row = np.concatenate([mask, heatmap], axis=1)
+    # 补齐对齐
+    max_width = max(top_row.shape[1], bottom_row.shape[1])
+    if top_row.shape[1] < max_width:
+        pad = max_width - top_row.shape[1]
+        top_row = cv2.copyMakeBorder(top_row, 0, 0, 0, pad, cv2.BORDER_CONSTANT, value=0)
+    if bottom_row.shape[1] < max_width:
+        pad = max_width - bottom_row.shape[1]
+        bottom_row = cv2.copyMakeBorder(bottom_row, 0, 0, 0, pad, cv2.BORDER_CONSTANT, value=0)
+    full_image = np.concatenate([top_row, bottom_row], axis=0)
+    cv2.imwrite(output_path, full_image)
+    print(f"✅ Saved: {output_path}")
+def batch_process(img1_dir, img2_dir, label_dir, mask_dir, heatmap_dir, output_dir):
+    os.makedirs(output_dir, exist_ok=True)
+    img1_paths = glob.glob(os.path.join(img1_dir, "*.png"))
+    for img1_path in img1_paths:
+        filename = os.path.basename(img1_path)
+        img2_path = os.path.join(img2_dir, filename)
+        label_path = os.path.join(label_dir, filename) if label_dir else None
+        mask_path = os.path.join(mask_dir, filename)
+        heatmap_path = os.path.join(heatmap_dir, filename)
+        output_path = os.path.join(output_dir, filename.replace(".png", "_full.png"))
+        concat_image_heatmap(img1_path, img2_path, label_path, mask_path, heatmap_path, output_path)
+# 设置路径
+img1_dir = "data/WHU_CD/test/image1"
+img2_dir = "data/WHU_CD/test/image2"
+label_dir = "data/WHU_CD/test/label"  # 可设为 None
+mask_dir = "mask_connect_test_dir/mask_rgb"
+heatmap_dir = "mask_connect_test_dir/grad_cam/model.net.decoderhead.LHBlock2"
+output_dir = "mask_heatmap_concat_dir"
+batch_process(img1_dir, img2_dir, label_dir, mask_dir, heatmap_dir, output_dir)

grad_cam_CNN.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import os
+import sys
+sys.path.append('.')
+import matplotlib.pyplot as plt
+from utilss import GradCAM, show_cam_on_image, center_crop_img
+import argparse
+from utils.config import Config
+from train import *
+def get_args():
+    parser = argparse.ArgumentParser('description=Change detection of remote sensing images')
+    parser.add_argument("-c", "--config", type=str, default="configs\cdxformer.py")
+    parser.add_argument("--output_dir", default=None)
+    parser.add_argument("--layer", default=None)
+    return parser.parse_args()
+def main():
+    args = get_args()
+    if args.layer == None:
+        raise NameError("Please ensure the parameter '--layer' is not None!\n e.g. --layer=model.net.decoderhead.LHBlock2.mlp_l")
+    cfg = Config.fromfile(args.config)
+    model = myTrain.load_from_checkpoint(cfg.test_ckpt_path, cfg = cfg)
+    model = model.to('cuda')
+    # print(dict(model.named_modules()).keys())
+    test_loader = build_dataloader(cfg.dataset_config, mode='test')
+    if args.output_dir:
+        base_dir = args.output_dir
+    else:
+        base_dir = os.path.dirname(cfg.test_ckpt_path)
+    gradcam_output_dir = os.path.join(base_dir, "grad_cam", args.layer)
+    if os.path.exists(gradcam_output_dir):
+        raise NameError("Please ensure gradcam_output_dir does not exist!")
+    os.makedirs(gradcam_output_dir)
+    for input in tqdm(test_loader):
+        target_layers = [eval(args.layer)] # name of the network layer
+        mask, img_id =  input[2].cuda(), input[3]
+        cam = GradCAM(cfg, model=model.net, target_layers=target_layers, use_cuda=True)
+        target_category = 1  # tabby, tabby cat
+        grayscale_cam_all = cam(input_tensor=(input[0], input[1]), target_category=target_category)
+        for i in range(grayscale_cam_all.shape[0]):
+            grayscale_cam = grayscale_cam_all[i, :]
+            visualization = show_cam_on_image(0,
+                                            grayscale_cam,
+                                            use_rgb=True)
+            fig = plt.figure()
+            ax = fig.add_subplot(111)
+            ax.imshow(visualization)
+            # ax = fig.add_subplot(122)
+            # ax.imshow(mask[i].cpu().numpy())
+            ax.set_xticks([])
+            ax.set_yticks([])
+            ax.spines['top'].set_visible(False)
+            ax.spines['right'].set_visible(False)
+            ax.spines['bottom'].set_visible(False)
+            ax.spines['left'].set_visible(False)
+            plt.savefig(os.path.join(gradcam_output_dir, '{}.png'.format(img_id[i])))
+            plt.close()
+if __name__ == '__main__':
+    main()

mask_connect_test.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import cv2
+import os
+import glob
+import numpy as np
+def concat_change_detection_images(img1_path, img2_path, label_path, pred_path, output_path):
+    img1 = cv2.imread(img1_path)
+    img2 = cv2.imread(img2_path)
+    label = cv2.imread(label_path) if os.path.exists(label_path) else None
+    pred = cv2.imread(pred_path)
+    if img1 is None or img2 is None or pred is None:
+        print(f"Missing or unreadable image: {img1_path}, {img2_path}, {pred_path}")
+        return
+    # resize 所有图片为相同大小（以 img1 为基准）
+    h, w = img1.shape[:2]
+    img2 = cv2.resize(img2, (w, h))
+    pred = cv2.resize(pred, (w, h))
+    if label is not None:
+        label = cv2.resize(label, (w, h))
+    # 组合图像（无 label 时跳过）
+    if label is not None:
+        concat = np.concatenate([img1, img2, label, pred], axis=1)
+    else:
+        concat = np.concatenate([img1, img2, pred], axis=1)
+    cv2.imwrite(output_path, concat)
+def batch_process(img1_dir, img2_dir, label_dir, pred_dir, output_dir):
+    os.makedirs(output_dir, exist_ok=True)
+    img1_paths = glob.glob(os.path.join(img1_dir, "*.png"))
+    for img1_path in img1_paths:
+        filename = os.path.basename(img1_path)
+        img2_path = os.path.join(img2_dir, filename)
+        label_path = os.path.join(label_dir, filename) if label_dir else None
+        pred_path = os.path.join(pred_dir, filename)
+        output_path = os.path.join(output_dir, filename.replace(".png", "_concat.png"))
+        print(f"[INFO] img1: {img1_path}, img2: {img2_path}")
+        print(f"[INFO] label: {label_path}, pred: {pred_path}")
+        concat_change_detection_images(img1_path, img2_path, label_path, pred_path, output_path)
+        print(f"Saved: {output_path}")
+# 设置路径
+img1_dir = "data/WHU_CD/test/image1"
+img2_dir = "data/WHU_CD/test/image2"
+label_dir = "data/WHU_CD/test/label"            # 如果没有标签图可以设为 None
+pred_dir = "work_dirs/CLCD_BS4_epoch200/CDXFormer/version_0/ckpts/test/mask_rgb"
+output_dir = "mask_connect_test_dir"
+batch_process(img1_dir, img2_dir, label_dir, pred_dir, output_dir)

params_flops.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import sys
+sys.path.append('.')
+from train import *
+from fvcore.nn import FlopCountAnalysis, flop_count_table, flop_count, parameter_count
+from rscd.models.backbones.lamba_util.csms6s import flops_selective_scan_fn, flops_selective_scan_ref, selective_scan_flop_jit
+def parse_args():
+    parser = argparse.ArgumentParser(description='count params and flops')
+    parser.add_argument("-c", "--config", type=str, default="configs/cdlama.py")
+    parser.add_argument("--size", type=int, default=256)
+    args = parser.parse_args()
+    return args
+def flops_mamba(model, shape=(3, 224, 224)):
+    # shape = self.__input_shape__[1:]
+    supported_ops = {
+        "aten::silu": None,  # as relu is in _IGNORED_OPS
+        "aten::neg": None,  # as relu is in _IGNORED_OPS
+        "aten::exp": None,  # as relu is in _IGNORED_OPS
+        "aten::flip": None,  # as permute is in _IGNORED_OPS
+        # "prim::PythonOp.CrossScan": None,
+        # "prim::PythonOp.CrossMerge": None,
+        "prim::PythonOp.SelectiveScanCuda": selective_scan_flop_jit,
+        "prim::PythonOp.SelectiveScanMamba": selective_scan_flop_jit,
+        "prim::PythonOp.SelectiveScanOflex": selective_scan_flop_jit,
+        "prim::PythonOp.SelectiveScanCore": selective_scan_flop_jit,
+        "prim::PythonOp.SelectiveScanNRow": selective_scan_flop_jit,
+    }
+    model.cuda().eval()
+    input1 = torch.randn((1, *shape), device=next(model.parameters()).device)
+    input2 = torch.randn((1, *shape), device=next(model.parameters()).device)
+    params = parameter_count(model)[""]
+    Gflops, unsupported = flop_count(model=model, inputs=(input1,input2), supported_ops=supported_ops)
+    del model, input1, input2
+    # return sum(Gflops.values()) * 1e9
+    return f"params {params / 1e6} GFLOPs {sum(Gflops.values())}"
+if __name__ == "__main__":
+    args = parse_args()
+    cfg = Config.fromfile(args.config)
+    net = myTrain(cfg).net.cuda()
+    size = args.size
+    input = torch.rand((1, 3, size, size)).cuda()
+    net.eval()
+    flops = FlopCountAnalysis(net, (input, input))
+    print(flop_count_table(flops, max_depth = 2))
+    print(flops_mamba(net, (3, size, size)))

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+torchmetrics==0.11.4
+pytorch-lightning==2.0.6
+scikit-image==0.21.0
+catalyst==20.9
+albumentations==1.3.1
+ttach==0.0.3
+einops==0.6.1
+timm==0.6.7
+addict==2.4.0
+soundfile==0.12.1
+prettytable==3.8.0
+fvcore

test.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import torch
+from torch import nn
+from tqdm import tqdm
+import prettytable
+import time
+import os
+import multiprocessing.pool as mpp
+import multiprocessing as mp
+from train import *
+import argparse
+from utils.config import Config
+from tools.mask_convert import mask_save
+def get_args():
+    parser = argparse.ArgumentParser('description=Change detection of remote sensing images')
+    parser.add_argument("-c", "--config", type=str, default="configs/cdlama.py")
+    parser.add_argument("--ckpt", type=str, default=None)
+    parser.add_argument("--output_dir", type=str, default=None)
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = get_args()
+    cfg = Config.fromfile(args.config)
+    ckpt = args.ckpt
+    if ckpt is None:
+        ckpt = cfg.test_ckpt_path
+    assert ckpt is not None
+    if args.output_dir:
+        base_dir = args.output_dir
+    else:
+        base_dir = os.path.dirname(ckpt)
+    masks_output_dir = os.path.join(base_dir, "mask_rgb")
+    model = myTrain.load_from_checkpoint(ckpt, map_location={'cuda:1':'cuda:0'}, cfg = cfg)
+    model = model.to('cuda')
+    model.eval()
+    metric_cfg_1 = cfg.metric_cfg1
+    metric_cfg_2 = cfg.metric_cfg2
+    test_oa=torchmetrics.Accuracy(**metric_cfg_1).to('cuda')
+    test_prec = torchmetrics.Precision(**metric_cfg_2).to('cuda')
+    test_recall = torchmetrics.Recall(**metric_cfg_2).to('cuda')
+    test_f1 = torchmetrics.F1Score(**metric_cfg_2).to('cuda')
+    test_iou=torchmetrics.JaccardIndex(**metric_cfg_2).to('cuda')
+    results = []
+    with torch.no_grad():
+        test_loader = build_dataloader(cfg.dataset_config, mode='test')
+        for input in tqdm(test_loader):
+            raw_predictions, mask, img_id = model(input[0].cuda(), input[1].cuda()), input[2].cuda(), input[3]
+            if cfg.net == 'SARASNet':
+                mask = Variable(resize_label(mask.data.cpu().numpy(), \
+                                        size=raw_predictions.data.cpu().numpy().shape[2:]).to('cuda')).long()
+                param = 1  # This parameter is balance precision and recall to get higher F1-score
+                raw_predictions[:,1,:,:] = raw_predictions[:,1,:,:] + param
+            if cfg.argmax:
+                pred = raw_predictions.argmax(dim=1)
+            else:
+                if cfg.net == 'maskcd':
+                    pred = raw_predictions[0]
+                    pred = pred > 0.5
+                    pred.squeeze_(1)
+                else:
+                    pred = raw_predictions.squeeze(1)
+                    pred = pred > 0.5
+            test_oa(pred, mask)
+            test_iou(pred, mask)
+            test_prec(pred, mask)
+            test_f1(pred, mask)
+            test_recall(pred, mask)
+            for i in range(raw_predictions.shape[0]):
+                mask_real = mask[i].cpu().numpy()
+                mask_pred = pred[i].cpu().numpy()
+                mask_name = str(img_id[i])
+                results.append((mask_real, mask_pred, masks_output_dir, mask_name))
+    metrics = [test_prec.compute(),
+               test_recall.compute(),
+               test_f1.compute(),
+               test_iou.compute()]
+    total_metrics = [test_oa.compute().cpu().numpy(),
+                     np.mean([item.cpu() for item in metrics[0]]),
+                     np.mean([item.cpu() for item in metrics[1]]),
+                     np.mean([item.cpu() for item in metrics[2]]),
+                     np.mean([item.cpu() for item in metrics[3]])]
+    result_table = prettytable.PrettyTable()
+    result_table.field_names = ['Class', 'OA', 'Precision', 'Recall', 'F1_Score', 'IOU']
+    for i in range(2):
+        item = [i, '--']
+        for j in range(len(metrics)):
+            item.append(np.round(metrics[j][i].cpu().numpy(), 4))
+        result_table.add_row(item)
+    total = [np.round(v, 4) for v in total_metrics]
+    total.insert(0, 'total')
+    result_table.add_row(total)
+    print(result_table)
+    file_name = os.path.join(base_dir, "test_res.txt")
+    f = open(file_name,"a")
+    current_time = time.strftime('%Y_%m_%d %H:%M:%S {}'.format(cfg.net),time.localtime(time.time()))
+    f.write(current_time+'\n')
+    f.write(str(result_table)+'\n')
+    if not os.path.exists(masks_output_dir):
+        os.makedirs(masks_output_dir)
+    print(masks_output_dir)
+    t0 = time.time()
+    mpp.Pool(processes=mp.cpu_count()).map(mask_save, results)
+    t1 = time.time()
+    img_write_time = t1 - t0
+    print('images writing spends: {} s'.format(img_write_time))

train.py ADDED Viewed

	@@ -0,0 +1,389 @@

+import torch
+import torch.nn as nn
+from pytorch_lightning import LightningModule, Trainer, seed_everything
+from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint, TQDMProgressBar
+from pytorch_lightning.loggers import TensorBoardLogger
+import torchmetrics
+from tqdm import tqdm
+import prettytable
+import numpy as np
+import argparse
+from rscd.models.build_model import build_model
+from rscd.datasets import build_dataloader
+from rscd.optimizers import build_optimizer
+from rscd.losses import build_loss
+from utils.config import Config
+from torch.autograd import Variable
+import sys
+sys.path.append('rscd')
+seed_everything(1234, workers=True)
+import numpy as np
+import os
+import time  # 用于计时
+def resize_label(label, size):
+    label = np.expand_dims(label,axis=0)
+    label_resized = np.zeros((1,label.shape[1],size[0],size[1]))
+    interp = nn.Upsample(size=(size[0], size[1]),mode='bilinear')
+    labelVar = Variable(torch.from_numpy(label).float())
+    label_resized[:, :,:,:] = interp(labelVar).data.numpy()
+    label_resized = np.array(label_resized, dtype=np.int32)
+    return torch.from_numpy(np.squeeze(label_resized,axis=0)).float()
+def get_args():
+    parser = argparse.ArgumentParser('description=Change detection of remote sensing images')
+    parser.add_argument("-c", "--config", type=str, default="configs/cdlamba.py")
+    return parser.parse_args()
+class myTrain(LightningModule):
+    def __init__(self, cfg, log_dir = None):
+        super(myTrain, self).__init__()
+        self.cfg = cfg
+        self.log_dir = log_dir
+        self.net = build_model(cfg.model_config)
+        self.loss = build_loss(cfg.loss_config)
+        self.loss.to('cuda:{}'.format(cfg.gpus[0]))
+        metric_cfg1 = cfg.metric_cfg1
+        metric_cfg2 = cfg.metric_cfg2
+        self.tr_oa=torchmetrics.Accuracy(**metric_cfg1)
+        self.tr_prec = torchmetrics.Precision(**metric_cfg2)
+        self.tr_recall = torchmetrics.Recall(**metric_cfg2)
+        self.tr_f1 = torchmetrics.F1Score(**metric_cfg2)
+        self.tr_iou=torchmetrics.JaccardIndex(**metric_cfg2)
+        self.val_oa=torchmetrics.Accuracy(**metric_cfg1)
+        self.val_prec = torchmetrics.Precision(**metric_cfg2)
+        self.val_recall = torchmetrics.Recall(**metric_cfg2)
+        self.val_f1 = torchmetrics.F1Score(**metric_cfg2)
+        self.val_iou=torchmetrics.JaccardIndex(**metric_cfg2)
+        self.test_oa=torchmetrics.Accuracy(**metric_cfg1)
+        self.test_prec = torchmetrics.Precision(**metric_cfg2)
+        self.test_recall = torchmetrics.Recall(**metric_cfg2)
+        self.test_f1 = torchmetrics.F1Score(**metric_cfg2)
+        self.test_iou=torchmetrics.JaccardIndex(**metric_cfg2)
+        self.test_max_f1 = [0 for _ in range(10)]
+        self.test_loader = build_dataloader(cfg.dataset_config, mode='test')
+    def forward(self, x1, x2) :
+        pred = self.net(x1, x2)
+        return pred
+    def configure_optimizers(self):
+        optimizer, scheduler = build_optimizer(self.cfg.optimizer_config, self.net)
+        return {'optimizer':optimizer,'lr_scheduler':scheduler, 'monitor': self.cfg.monitor_val}
+    def train_dataloader(self):
+        loader = build_dataloader(self.cfg.dataset_config, mode='train')
+        return loader
+    def val_dataloader(self):
+        loader = build_dataloader(self.cfg.dataset_config, mode='val')
+        return loader
+    def output(self, metrics, total_metrics, mode, test_idx=0, test_value=None):
+        result_table = prettytable.PrettyTable()
+        result_table.field_names = ['Class', 'OA', 'Precision', 'Recall', 'F1_Score', 'IOU']
+        for i in range(len(metrics[0])):
+            item = [i, '--']
+            for j in range(len(metrics)):
+                item.append(np.round(metrics[j][i].cpu().numpy(), 4))
+            result_table.add_row(item)
+        total = list(total_metrics.values())
+        total = [np.round(v, 4) for v in total]
+        total.insert(0, 'total')
+        result_table.add_row(total)
+        if mode == 'val' or mode == 'test':
+            print(mode)
+            print(result_table)
+        if self.log_dir:
+            base_dir = self.log_dir
+        else:
+            base_dir = os.path.join('work_dirs', cfg.exp_name)
+        if mode == 'test':
+            if self.cfg.argmax:
+                file_name = os.path.join(base_dir, "test_metrics_{}.txt".format(test_idx))
+                if metrics[2][1] > self.test_max_f1[test_idx]:
+                    self.test_max_f1[test_idx] = metrics[2][1]
+                    file_name = os.path.join(base_dir, "test_max_metrics_{}.txt".format(test_idx))
+            else:
+                file_name = os.path.join(base_dir, "test_metrics_{}_{}.txt".format(test_idx, str(test_value)))
+                if metrics[2][1] > self.test_max_f1[test_idx]:
+                    self.test_max_f1[test_idx] = metrics[2][1]
+                    file_name = os.path.join(base_dir, "test_max_metrics_{}_{}.txt".format(test_idx, '%.1f' % test_value))
+        else:
+            file_name = os.path.join(base_dir, "train_metrics.txt")
+        f = open(file_name,"a")
+        f.write('epoch:{}/{} {}\n'.format(self.current_epoch, self.cfg.epoch, mode))
+        f.write(str(result_table)+'\n')
+        f.close()
+    def training_step(self, batch, batch_idx):
+        imgA, imgB, mask = batch[0], batch[1], batch[2]
+        preds = self(imgA, imgB)
+        if self.cfg.net == 'SARASNet':
+            mask = Variable(resize_label(mask.data.cpu().numpy(), \
+                                    size=preds.data.cpu().numpy().shape[2:]).to('cuda')).long()
+            param = 1  # This parameter is balance precision and recall to get higher F1-score
+            preds[:,1,:,:] = preds[:,1,:,:] + param
+        if self.cfg.argmax:
+            loss = self.loss(preds, mask)
+            pred = preds.argmax(dim=1)
+        else:
+            if self.cfg.net == 'maskcd':
+                loss = self.loss(preds[1], mask)
+                pred = preds[0]
+                pred = pred > 0.5
+                pred.squeeze_(1)
+            else:
+                pred = preds.squeeze(1)
+                loss = self.loss(pred, mask)
+                pred = pred > 0.5
+        self.tr_oa(pred, mask)
+        self.tr_prec(pred, mask)
+        self.tr_recall(pred, mask)
+        self.tr_f1(pred, mask)
+        self.tr_iou(pred, mask)
+        self.log('tr_loss', loss, on_step=True,on_epoch=True,prog_bar=True)
+        return loss
+    def on_train_epoch_end(self):
+        metrics = [self.tr_prec.compute(),
+                   self.tr_recall.compute(),
+                   self.tr_f1.compute(),
+                   self.tr_iou.compute()]
+        log = {'tr_oa': float(self.tr_oa.compute().cpu()),
+               'tr_prec': np.mean([item.cpu() for item in metrics[0]]),
+               'tr_recall': np.mean([item.cpu() for item in metrics[1]]),
+               'tr_f1': np.mean([item.cpu() for item in metrics[2]]),
+               'tr_miou': np.mean([item.cpu() for item in metrics[3]])}
+        self.output(metrics, log, 'train')
+        for key, value in zip(log.keys(), log.values()):
+            self.log(key, value, on_step=False,on_epoch=True,prog_bar=True)
+        self.log('tr_change_f1', metrics[2][1], on_step=False,on_epoch=True,prog_bar=True)
+        self.tr_oa.reset()
+        self.tr_prec.reset()
+        self.tr_recall.reset()
+        self.tr_f1.reset()
+        self.tr_iou.reset()
+    def validation_step(self, batch, batch_idx):
+        imgA, imgB, mask = batch[0], batch[1], batch[2]
+        preds = self(imgA, imgB)
+        if self.cfg.net == 'SARASNet':
+            mask = Variable(resize_label(mask.data.cpu().numpy(), \
+                                    size=preds.data.cpu().numpy().shape[2:]).to('cuda')).long()
+            param = 1  # This parameter is balance precision and recall to get higher F1-score
+            preds[:,1,:,:] = preds[:,1,:,:] + param
+        if self.cfg.argmax:
+            loss = self.loss(preds, mask)
+            pred = preds.argmax(dim=1)
+        else:
+            if self.cfg.net == 'maskcd':
+                loss = self.loss(preds[1], mask)
+                pred = preds[0]
+                pred = pred > 0.5
+                pred.squeeze_(1)
+            else:
+                pred = preds.squeeze(1)
+                loss = self.loss(pred, mask)
+                pred = pred > 0.5
+        self.val_oa(pred, mask)
+        self.val_prec(pred, mask)
+        self.val_recall(pred, mask)
+        self.val_f1(pred, mask)
+        self.val_iou(pred, mask)
+        self.log('val_loss', loss, on_step=True,on_epoch=True,prog_bar=True)
+        return loss
+    def on_validation_epoch_end(self):
+        metrics = [self.val_prec.compute(),
+                   self.val_recall.compute(),
+                   self.val_f1.compute(),
+                   self.val_iou.compute()]
+        log = {'val_oa': float(self.val_oa.compute().cpu()),
+               'val_prec': np.mean([item.cpu() for item in metrics[0]]),
+               'val_recall': np.mean([item.cpu() for item in metrics[1]]),
+               'val_f1': np.mean([item.cpu() for item in metrics[2]]),
+               'val_miou': np.mean([item.cpu() for item in metrics[3]])}
+        self.output(metrics, log, 'val')
+        for key, value in zip(log.keys(), log.values()):
+            self.log(key, value, on_step=False,on_epoch=True,prog_bar=True)
+        self.log('val_change_f1', metrics[2][1], on_step=False,on_epoch=True,prog_bar=True)
+        self.val_oa.reset()
+        self.val_prec.reset()
+        self.val_recall.reset()
+        self.val_f1.reset()
+        self.val_iou.reset()
+        for idx in range(0, len(self.cfg.monitor_test), 1):
+            if self.cfg.argmax:
+                self.log(self.cfg.monitor_test[idx], self.test(idx), on_step=False,on_epoch=True,prog_bar=True)
+            else:
+                t = 0.2 + 0.1 * idx
+                self.log(self.cfg.monitor_test[idx], self.test(idx, t), on_step=False,on_epoch=True,prog_bar=True)
+    def test(self, idx, value = None):
+        for input in tqdm(self.test_loader):
+            raw_predictions, mask_test = self(input[0].cuda(cfg.gpus[0]), input[1].cuda(cfg.gpus[0])), input[2].cuda(cfg.gpus[0])
+            if self.cfg.net == 'SARASNet':
+                mask_test = Variable(resize_label(mask_test.data.cpu().numpy(), \
+                                        size=raw_predictions.data.cpu().numpy().shape[2:]).to('cuda')).long()
+                param = 1  # This parameter is balance precision and recall to get higher F1-score
+                raw_predictions[:,1,:,:] = raw_predictions[:,1,:,:] + param
+            if self.cfg.argmax:
+                pred_test = raw_predictions.argmax(dim=1)
+            else:
+                if self.cfg.net == 'maskcd':
+                    raw_prediction = raw_predictions[0]
+                    pred_test = raw_prediction > value
+                    pred_test.squeeze_(1)
+                else:
+                    pred_test = raw_predictions.squeeze(1)
+                    pred_test = pred_test > 0.5
+            self.test_oa(pred_test, mask_test)
+            self.test_iou(pred_test, mask_test)
+            self.test_prec(pred_test, mask_test)
+            self.test_f1(pred_test, mask_test)
+            self.test_recall(pred_test, mask_test)
+        metrics_test = [self.test_prec.compute(),
+                   self.test_recall.compute(),
+                   self.test_f1.compute(),
+                   self.test_iou.compute()]
+        log = {'test_oa': float(self.test_oa.compute().cpu()),
+            'test_prec': np.mean([item.cpu() for item in metrics_test[0]]),
+            'test_recall': np.mean([item.cpu() for item in metrics_test[1]]),
+            'test_f1': np.mean([item.cpu() for item in metrics_test[2]]),
+            'test_miou': np.mean([item.cpu() for item in metrics_test[3]])}
+        self.output(metrics_test, log, 'test', idx, value)
+        self.test_oa.reset()
+        self.test_prec.reset()
+        self.test_recall.reset()
+        self.test_f1.reset()
+        self.test_iou.reset()
+        return metrics_test[2][1]
+if __name__ == "__main__":
+    args = get_args()
+    cfg = Config.fromfile(args.config)
+    logger = TensorBoardLogger(save_dir = "work_dirs",
+                               sub_dir = 'log',
+                               name = cfg.exp_name,
+                               default_hp_metric = False)
+    log_dir = os.path.dirname(logger.log_dir)
+    model = myTrain(cfg, log_dir)
+    # —— 在这里插入“推理 FPS 测试”功能 —— #
+    device = torch.device(f'cuda:{cfg.gpus[0]}' if torch.cuda.is_available() else 'cpu')
+    model = model.to(device)
+    model.eval()
+    # 从验证集 dataloader 里取一个 batch
+    val_loader = model.val_dataloader()
+    batch_iter = iter(val_loader)
+    try:
+        batch = next(batch_iter)
+        imgA_batch = batch[0]
+        imgB_batch = batch[1]
+    except StopIteration:
+        raise RuntimeError("验证集 dataloader 为空，请检查数据集配置。")
+    # 将输入搬到同一个设备
+    imgA_batch = imgA_batch.to(device)
+    imgB_batch = imgB_batch.to(device)
+    # 热身推理 10 次
+    with torch.no_grad():
+        for _ in range(10):
+            _ = model(imgA_batch, imgB_batch)
+    # 正式计时 N 次推理
+    N = 100
+    torch.cuda.synchronize(device)
+    start_time = time.time()
+    with torch.no_grad():
+        for _ in range(N):
+            _ = model(imgA_batch, imgB_batch)
+    torch.cuda.synchronize(device)
+    elapsed = time.time() - start_time
+    fps = N / elapsed
+    print(f"[推理 FPS 测试] 输入分辨率 = {imgA_batch.shape[2]}×{imgA_batch.shape[3]}，"
+          f"Batch Size = {imgA_batch.shape[0]}，推理 {N} 次总耗时：{elapsed:.4f} 秒，FPS = {fps:.2f}")
+    # —— 插入结束 —— #
+    pbar = TQDMProgressBar(refresh_rate=1)
+    lr_monitor=LearningRateMonitor(logging_interval = cfg.logging_interval)
+    callbacks = [pbar, lr_monitor]
+    ckpt_cb = ModelCheckpoint(dirpath = f'{log_dir}/ckpts/val',
+                            filename = '{' + cfg.monitor_val + ':.4f}' + '-{epoch:d}',
+                            monitor = cfg.monitor_val,
+                            mode = 'max',
+                            save_top_k = cfg.save_top_k,
+                            save_last=True)
+    callbacks.append(ckpt_cb)
+    for m_test in cfg.monitor_test:
+        ckpt_cb = ModelCheckpoint(dirpath = f'{log_dir}/ckpts/test/{m_test}',
+                                filename = '{' + m_test + ':.4f}' + '-{epoch:d}',
+                                monitor = m_test,
+                                mode = 'max',
+                                save_top_k = cfg.save_top_k,
+                                save_last=True)
+        callbacks.append(ckpt_cb)
+    trainer = Trainer(max_epochs = cfg.epoch,
+                    #   precision='16-mixed',
+                      callbacks = callbacks,
+                      logger = logger,
+                      enable_model_summary = True,
+                      accelerator = 'auto',
+                      devices = cfg.gpus,
+                      num_sanity_val_steps = 2,
+                      benchmark = True)
+    trainer.fit(model, ckpt_path=cfg.resume_ckpt_path)

utilss.py ADDED Viewed

	@@ -0,0 +1,249 @@

+import cv2
+import numpy as np
+from torch.nn import functional as F
+import torch
+class ActivationsAndGradients:
+    """ Class for extracting activations and
+    registering gradients from targeted intermediate layers """
+    def __init__(self, model, target_layers, reshape_transform):
+        self.model = model
+        self.gradients = []
+        self.activations = []
+        self.reshape_transform = reshape_transform
+        self.handles = []
+        for target_layer in target_layers:
+            self.handles.append(
+                target_layer.register_forward_hook(
+                    self.save_activation))
+            # Backward compatibility with older pytorch versions:
+            if hasattr(target_layer, 'register_full_backward_hook'):
+                self.handles.append(
+                    target_layer.register_full_backward_hook(
+                        self.save_gradient))
+            else:
+                self.handles.append(
+                    target_layer.register_backward_hook(
+                        self.save_gradient))
+    def save_activation(self, module, input, output):
+        activation = output
+        if self.reshape_transform is not None:
+            activation = self.reshape_transform(activation)
+        self.activations.append(activation.cpu().detach())
+    def save_gradient(self, module, grad_input, grad_output):
+        # Gradients are computed in reverse order
+        grad = grad_output[0]
+        if self.reshape_transform is not None:
+            grad = self.reshape_transform(grad)
+        self.gradients = [grad.cpu().detach()] + self.gradients
+    def __call__(self, x, y):
+        self.gradients = []
+        self.activations = []
+        return self.model(x, y)
+    def release(self):
+        for handle in self.handles:
+            handle.remove()
+class GradCAM:
+    def __init__(self,
+                 cfg,
+                 model,
+                 target_layers,
+                 reshape_transform=None,
+                 use_cuda=False):
+        self.cfg = cfg
+        self.model = model.eval()
+        self.target_layers = target_layers
+        self.reshape_transform = reshape_transform
+        self.cuda = use_cuda
+        if self.cuda:
+            self.model = model.cuda()
+        self.activations_and_grads = ActivationsAndGradients(
+            self.model, target_layers, reshape_transform)
+    """ Get a vector of weights for every channel in the target layer.
+        Methods that return weights channels,
+        will typically need to only implement this function. """
+    @staticmethod
+    def get_cam_weights(grads):
+        return np.mean(grads, axis=(2, 3), keepdims=True)
+    @staticmethod
+    def get_loss(output, target_category):
+        loss = 0
+        for i in range(len(target_category)):
+            loss = loss + output[i]
+        return loss
+    def get_cam_image(self, activations, grads):
+        weights = self.get_cam_weights(grads)
+        weighted_activations = weights * activations
+        cam = weighted_activations.sum(axis=1)
+        return cam
+    @staticmethod
+    def get_target_width_height(input_tensor):
+        width, height = input_tensor.size(-1), input_tensor.size(-2)
+        return width, height
+    def compute_cam_per_layer(self, input_tensor):
+        activations_list = [a.cpu().data.numpy()
+                            for a in self.activations_and_grads.activations]
+        grads_list = [g.cpu().data.numpy()
+                      for g in self.activations_and_grads.gradients]
+        target_size = self.get_target_width_height(input_tensor)
+        cam_per_target_layer = []
+        # Loop over the saliency image from every layer
+        for layer_activations, layer_grads in zip(activations_list, grads_list):
+            cam = self.get_cam_image(layer_activations, layer_grads)
+            cam[cam < 0] = 0  # works like mute the min-max scale in the function of scale_cam_image
+            scaled = self.scale_cam_image(cam, target_size)
+            cam_per_target_layer.append(scaled[:, None, :])
+        return cam_per_target_layer
+    def aggregate_multi_layers(self, cam_per_target_layer):
+        cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1)
+        cam_per_target_layer = np.maximum(cam_per_target_layer, 0)
+        result = np.mean(cam_per_target_layer, axis=1)
+        return self.scale_cam_image(result)
+    @staticmethod
+    def scale_cam_image(cam, target_size=None):
+        result = []
+        for img in cam:
+            img = img - np.min(img)
+            img = img / (1e-7 + np.max(img))
+            if target_size is not None:
+                img = cv2.resize(img, target_size)
+            result.append(img)
+        result = np.float32(result)
+        return result
+    def __call__(self, input_tensor, target_category=None):
+        x, y = input_tensor
+        if self.cuda:
+            x = x.cuda()
+            y = y.cuda()
+        # 正向传播得到网络输出logits(未经过softmax)
+        if self.cfg.net == 'cdmask':
+            o, outputs = self.activations_and_grads(x, y)
+            mask_cls_results = outputs["pred_logits"]
+            mask_pred_results = outputs["pred_masks"]
+            mask_pred_results = F.interpolate(
+                mask_pred_results,
+                scale_factor=(4,4),
+                mode="bilinear",
+                align_corners=False,
+            )
+            mask_cls = F.softmax(mask_cls_results, dim=-1)[...,1:]
+            mask_pred = mask_pred_results.sigmoid()
+            output = torch.einsum("bqc,bqhw->bchw", mask_cls, mask_pred)
+        else:
+            output = self.activations_and_grads(x, y)
+        if isinstance(target_category, int):
+            target_category = [target_category] * x.size(0)
+        if target_category is None:
+            target_category = np.argmax(output.cpu().data.numpy(), axis=-1)
+            print(f"category id: {target_category}")
+        else:
+            assert (len(target_category) == x.size(0))
+        self.model.zero_grad()
+        loss = self.get_loss(output, target_category).sum()
+        loss.backward(retain_graph=True)
+        # In most of the saliency attribution papers, the saliency is
+        # computed with a single target layer.
+        # Commonly it is the last convolutional layer.
+        # Here we support passing a list with multiple target layers.
+        # It will compute the saliency image for every image,
+        # and then aggregate them (with a default mean aggregation).
+        # This gives you more flexibility in case you just want to
+        # use all conv layers for example, all Batchnorm layers,
+        # or something else.
+        cam_per_layer = self.compute_cam_per_layer(x)
+        return self.aggregate_multi_layers(cam_per_layer)
+    def __del__(self):
+        self.activations_and_grads.release()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        self.activations_and_grads.release()
+        if isinstance(exc_value, IndexError):
+            # Handle IndexError here...
+            print(
+                f"An exception occurred in CAM with block: {exc_type}. Message: {exc_value}")
+            return True
+def show_cam_on_image(img: np.ndarray,
+                      mask: np.ndarray,
+                      use_rgb: bool = False,
+                      colormap: int = cv2.COLORMAP_JET) -> np.ndarray:
+    """ This function overlays the cam mask on the image as an heatmap.
+    By default the heatmap is in BGR format.
+    :param img: The base image in RGB or BGR format.
+    :param mask: The cam mask.
+    :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format.
+    :param colormap: The OpenCV colormap to be used.
+    :returns: The default image with the cam overlay.
+    """
+    heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap)
+    if use_rgb:
+        heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
+    heatmap = np.float32(heatmap) / 255
+    if np.max(img) > 1:
+        raise Exception(
+            "The input image should np.float32 in the range [0, 1]")
+    cam = heatmap + img
+    cam = cam / np.max(cam)
+    return np.uint8(255 * cam)
+def center_crop_img(img: np.ndarray, size: int):
+    h, w, c = img.shape
+    if w == h == size:
+        return img
+    if w < h:
+        ratio = size / w
+        new_w = size
+        new_h = int(h * ratio)
+    else:
+        ratio = size / h
+        new_h = size
+        new_w = int(w * ratio)
+    img = cv2.resize(img, dsize=(new_w, new_h))
+    if new_w == size:
+        h = (new_h - size) // 2
+        img = img[h: h+size]
+    else:
+        w = (new_w - size) // 2
+        img = img[:, w: w+size]
+    return img