Spaces:

mridulk
/

phylo-diffusion

Running

App Files Files Community

mridulk commited on Apr 15

Commit

642d5e2

•

1 Parent(s): 008150e

added few ldm files

Browse files

Files changed (5) hide show

ldm/analysis_utils.py +27 -0
ldm/loading_utils.py +38 -0
ldm/lr_scheduler.py +120 -0
ldm/plotting_utils.py +200 -0
ldm/util.py +243 -0

ldm/analysis_utils.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import torch
+EPS=1e-10
+def get_CosineDistance_matrix(features):
+    if features.dim() >2:
+        features = features.reshape(features.shape[0], -1)
+    features_norm = features / (EPS + features.norm(dim=1)[:, None])
+    ans = torch.mm(features_norm, features_norm.transpose(0,1))
+    # We want distance, not similarity.
+    ans = torch.add(-ans, 1.)
+    return ans
+def aggregatefrom_specimen_to_species(sorted_class_names_according_to_class_indx, specimen_distance_matrix, z_size, channels):
+    unique_sorted_class_names_according_to_class_indx = sorted(set(sorted_class_names_according_to_class_indx))
+    # species_dist_matrix = torch.zeros(len(unique_sorted_class_names_according_to_class_indx), 256, 16, 16)
+    species_dist_matrix = torch.zeros(len(unique_sorted_class_names_according_to_class_indx), channels, z_size, z_size)
+    for indx_i, i in enumerate(unique_sorted_class_names_according_to_class_indx):
+        class_i_indices = [idx for idx, element in enumerate(sorted_class_names_according_to_class_indx) if element == i]
+        species_dist_matrix[indx_i] = torch.mean(specimen_distance_matrix[class_i_indices,:], dim=0, keepdim=True)
+    return species_dist_matrix

ldm/loading_utils.py ADDED Viewed

	@@ -0,0 +1,38 @@

+#based on https://github.com/CompVis/taming-transformers
+import yaml
+from omegaconf import OmegaConf
+import torch
+from ldm.util import instantiate_from_config
+######### loaders
+def load_config(config_path, display=False):
+    config = OmegaConf.load(config_path)
+    if display:
+        print(yaml.dump(OmegaConf.to_container(config)))
+    return config
+def load_model_from_config(config, ckpt):
+    print(f"Loading model from {ckpt}")
+    pl_sd = torch.load(ckpt)#, map_location="cpu")
+    sd = pl_sd["state_dict"]
+    model = instantiate_from_config(config.model)
+    m, u = model.load_state_dict(sd, strict=False)
+    model.cuda()
+    model.eval()
+    return model
+def load_model(config_path, ckpt_path=None):
+# def load_model(config_path, ckpt_path=None, cuda=False, model_type=VQModel):
+    # breakpoint()
+    # model = model_type(**config.model.params)
+    # if ckpt_path is not None:
+    #     sd = torch.load(ckpt_path, map_location="cpu")["state_dict"]
+    #     missing, unexpected = model.load_state_dict(sd, strict=True)
+    # if cuda:
+    #     model = model.cuda()
+    config = OmegaConf.load(config_path)
+    model = load_model_from_config(config, ckpt_path)
+    return model

ldm/lr_scheduler.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import numpy as np
+class LambdaWarmUpCosineScheduler:
+    """
+    note: use with a base_lr of 1.0
+    """
+    def __init__(self, warm_up_steps, lr_min, lr_max, lr_start, max_decay_steps, verbosity_interval=0):
+        self.lr_warm_up_steps = warm_up_steps
+        self.lr_start = lr_start
+        self.lr_min = lr_min
+        self.lr_max = lr_max
+        self.lr_max_decay_steps = max_decay_steps
+        self.last_lr = 0.
+        self.verbosity_interval = verbosity_interval
+    def schedule(self, n, **kwargs):
+        if self.verbosity_interval > 0:
+            if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_lr}")
+        if n < self.lr_warm_up_steps:
+            lr = (self.lr_max - self.lr_start) / self.lr_warm_up_steps * n + self.lr_start
+            self.last_lr = lr
+            return lr
+        else:
+            t = (n - self.lr_warm_up_steps) / (self.lr_max_decay_steps - self.lr_warm_up_steps)
+            t = min(t, 1.0)
+            lr = self.lr_min + 0.5 * (self.lr_max - self.lr_min) * (
+                    1 + np.cos(t * np.pi))
+            self.last_lr = lr
+            return lr
+    def __call__(self, n, **kwargs):
+        return self.schedule(n,**kwargs)
+class LambdaWarmUpCosineScheduler2:
+    """
+    supports repeated iterations, configurable via lists
+    note: use with a base_lr of 1.0.
+    """
+    def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths, verbosity_interval=0, gamma=0.99, step_size=1000):
+        assert len(warm_up_steps) == len(f_min) == len(f_max) == len(f_start) == len(cycle_lengths)
+        self.lr_warm_up_steps = warm_up_steps
+        self.f_start = f_start
+        self.f_min = f_min
+        self.f_max = f_max
+        self.gamma = gamma
+        self.step_size = step_size
+        self.cycle_lengths = cycle_lengths
+        self.cum_cycles = np.cumsum([0] + list(self.cycle_lengths))
+        self.last_f = 0.
+        self.verbosity_interval = verbosity_interval
+    def find_in_interval(self, n):
+        interval = 0
+        for cl in self.cum_cycles[1:]:
+            if n <= cl:
+                return interval
+            interval += 1
+    def schedule(self, n, **kwargs):
+        cycle = self.find_in_interval(n)
+        n = n - self.cum_cycles[cycle]
+        if self.verbosity_interval > 0:
+            if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, "
+                                                       f"current cycle {cycle}")
+        if n < self.lr_warm_up_steps[cycle]:
+            f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle]
+            self.last_f = f
+            return f
+        else:
+            t = (n - self.lr_warm_up_steps[cycle]) / (self.cycle_lengths[cycle] - self.lr_warm_up_steps[cycle])
+            t = min(t, 1.0)
+            f = self.f_min[cycle] + 0.5 * (self.f_max[cycle] - self.f_min[cycle]) * (
+                    1 + np.cos(t * np.pi))
+            self.last_f = f
+            return f
+    def __call__(self, n, **kwargs):
+        return self.schedule(n, **kwargs)
+class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2):
+    def schedule(self, n, **kwargs):
+        cycle = self.find_in_interval(n)
+        n = n - self.cum_cycles[cycle]
+        if self.verbosity_interval > 0:
+            if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, "
+                                                       f"current cycle {cycle}")
+        if n < self.lr_warm_up_steps[cycle]:
+            f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle]
+            self.last_f = f
+            return f
+        else:
+            f = self.f_min[cycle] + (self.f_max[cycle] - self.f_min[cycle]) * (self.cycle_lengths[cycle] - n) / (self.cycle_lengths[cycle])
+            self.last_f = f
+            return f
+class LambdaLinearScheduler_step(LambdaWarmUpCosineScheduler2):
+    def schedule(self, n, **kwargs):
+        cycle = self.find_in_interval(n)
+        n = n - self.cum_cycles[cycle]
+        if self.verbosity_interval > 0:
+            if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, "
+                                                       f"current cycle {cycle}")
+        if n < self.lr_warm_up_steps[cycle]:
+            f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle]
+            self.last_f = f
+            return f
+        else:
+            f = self.gamma ** ((n-self.lr_warm_up_steps[cycle]) // self.step_size)
+            # f = self.f_min[cycle] + (self.f_max[cycle] - self.f_min[cycle]) * (self.cycle_lengths[cycle] - n) / (self.cycle_lengths[cycle])
+            self.last_f = f
+            return f
+# class LambdaCustomScheduler:

ldm/plotting_utils.py ADDED Viewed

	@@ -0,0 +1,200 @@

+#based on https://github.com/CompVis/taming-transformers
+import matplotlib.pyplot as plt
+import seaborn as sns
+import os
+from pathlib import Path
+import torchvision
+import torch
+import numpy as np
+from PIL import Image
+import json
+import csv
+import pandas as pd
+from sklearn.metrics import ConfusionMatrixDisplay
+def dump_to_json(dict, ckpt_path, name='results', get_fig_path=True):
+    if get_fig_path:
+        root = get_fig_pth(ckpt_path)
+    else:
+        root = ckpt_path
+        if not os.path.exists(root):
+            os.mkdir(root)
+    with open(os.path.join(root, name+".json"), "w") as outfile:
+        json.dump(dict, outfile)
+def save_to_cvs(ckpt_path, postfix, file_name, list_of_created_sequence):
+    if ckpt_path is not None:
+        root = get_fig_pth(ckpt_path, postfix=postfix)
+    else:
+        root = postfix
+    file = open(os.path.join(root, file_name), 'w')
+    with file:
+        write = csv.writer(file)
+        write.writerows(list_of_created_sequence)
+def save_to_txt(arr, ckpt_path, name='results'):
+    root = get_fig_pth(ckpt_path)
+    with open(os.path.join(root, name+".txt"), "w") as outfile:
+        outfile.write(str(arr))
+def save_image_grid(torch_images, ckpt_path=None, subfolder=None, postfix="", nrow=10):
+    if ckpt_path is not None:
+        root = get_fig_pth(ckpt_path, postfix=subfolder)
+    else:
+        root = subfolder
+    grid = torchvision.utils.make_grid(torch_images, nrow=nrow)
+    grid = torch.clamp(grid, -1., 1.)
+    grid = (grid+1.0)/2.0 # -1,1 -> 0,1; c,h,w
+    grid = grid.transpose(0,1).transpose(1,2).squeeze(-1)
+    grid = grid.cpu().numpy()
+    grid = (grid*255).astype(np.uint8)
+    filename = "code_changes_"+postfix+".png"
+    path = os.path.join(root, filename)
+    os.makedirs(os.path.split(path)[0], exist_ok=True)
+    Image.fromarray(grid).save(path, bbox_inches='tight')
+def unprocess_image(torch_image):
+    torch_image = torch.clamp(torch_image, -1., 1.)
+    torch_image = (torch_image+1.0)/2.0 # -1,1 -> 0,1; c,h,w
+    torch_image = torch_image.transpose(0,1).transpose(1,2).squeeze(-1)
+    torch_image = torch_image.cpu().numpy()
+    torch_image = (torch_image*255).astype(np.uint8)
+    return torch_image
+def save_image(torch_image, image_name, ckpt_path=None, subfolder=None):
+    if ckpt_path is not None:
+        root = get_fig_pth(ckpt_path, postfix=subfolder)
+    else:
+        root = subfolder
+    torch_image = unprocess_image(torch_image)
+    filename = image_name+".png"
+    path = os.path.join(root, filename)
+    os.makedirs(os.path.split(path)[0], exist_ok=True)
+    fig = plt.figure()
+    plt.imshow(torch_image[0].squeeze())
+    fig.savefig(path,bbox_inches='tight',dpi=300)
+def get_fig_pth(ckpt_path, postfix=None):
+    figs_postfix = 'figs'
+    postfix = os.path.join(figs_postfix, postfix) if postfix is not None else figs_postfix
+    parent_path = Path(ckpt_path).parent.parent.absolute()
+    fig_path = Path(os.path.join(parent_path, postfix))
+    os.makedirs(fig_path, exist_ok=True)
+    return fig_path
+def plot_heatmap(heatmap, ckpt_path=None, title='default', postfix=None):
+    if ckpt_path is not None:
+        path = get_fig_pth(ckpt_path, postfix=postfix)
+    else:
+        path = postfix
+    # show
+    fig = plt.figure()
+    ax = plt.imshow(heatmap, cmap='hot', interpolation='nearest')
+    plt.tick_params(left=False, bottom=False)
+    # cbar = ax.collections[0].colorbar
+    cbar = plt.colorbar(ax)
+    cbar.ax.tick_params(labelsize=15)
+    plt.axis('off')
+    plt.show()
+    fig.savefig(os.path.join(path, title+ " heat_map.png"),bbox_inches='tight',dpi=300)
+    pd.DataFrame(heatmap.numpy()).to_csv(os.path.join(path, title+ " heat_map.csv"))
+def plot_heatmap_at_path(heatmap, save_path, ckpt_path=None, title='default', postfix=None):
+    if ckpt_path is not None:
+        path = get_fig_pth(ckpt_path, postfix=postfix)
+    else:
+        path = postfix
+    # show
+    fig = plt.figure()
+    ax = plt.imshow(heatmap, cmap='hot', interpolation='nearest')
+    plt.tick_params(left=False, bottom=False)
+    # cbar = ax.collections[0].colorbar
+    cbar = plt.colorbar(ax)
+    cbar.ax.tick_params(labelsize=15)
+    plt.axis('off')
+    plt.show()
+    fig.savefig(os.path.join(save_path, title+ "_heat_map.png"),bbox_inches='tight',dpi=300)
+    pd.DataFrame(heatmap.numpy()).to_csv(os.path.join(save_path, title+ "_heat_map.csv"))
+def plot_confusionmatrix(preds, classes, classnames, ckpt_path, postfix=None, title="", get_fig_path=True):
+    fig, ax = plt.subplots(figsize=(30,30))
+    preds_max = np.argmax(preds.cpu().numpy(), axis=-1)
+    disp = ConfusionMatrixDisplay.from_predictions(classes.cpu().numpy(), preds_max, display_labels=classnames, normalize='true', xticks_rotation='vertical', ax=ax)
+    disp.plot()
+    if get_fig_path:
+        fig_path = get_fig_pth(ckpt_path, postfix=postfix)
+    else:
+        fig_path = ckpt_path
+        if not os.path.exists(fig_path):
+            os.mkdir(fig_path)
+    print(fig_path)
+    fig.savefig(os.path.join(fig_path, title+ " heat_map.png"))
+def plot_confusionmatrix_colormap(preds, classes, classnames, ckpt_path, postfix=None, title="", get_fig_path=True):
+    fig, ax = plt.subplots(figsize=(30,30))
+    preds_max = np.argmax(preds.cpu().numpy(), axis=-1)
+    class_labels = list(range(len(classnames)))
+    disp = ConfusionMatrixDisplay.from_predictions(classes.cpu().numpy(), preds_max, display_labels=class_labels, normalize='true', xticks_rotation='vertical', ax=ax, cmap='coolwarm')
+    disp.plot()
+    if get_fig_path:
+        fig_path = get_fig_pth(ckpt_path, postfix=postfix)
+    else:
+        fig_path = ckpt_path
+        if not os.path.exists(fig_path):
+            os.mkdir(fig_path)
+    print(fig_path)
+    fig.savefig(os.path.join(fig_path, title+ " heat_map_coolwarm.png"))
+class Histogram_plotter:
+    def __init__(self, codes_per_phylolevel, n_phylolevels, n_embed,
+                 converter,
+                 indx_to_label,
+                 ckpt_path, directory):
+        self.codes_per_phylolevel = codes_per_phylolevel
+        self.n_phylolevels = n_phylolevels
+        self.n_embed = n_embed
+        self.converter = converter
+        self.ckpt_path = ckpt_path
+        self.directory = directory
+        self.indx_to_label = indx_to_label
+    def plot_histograms(self, histograms, species_indx, is_nonattribute=False, prefix="species"):
+        fig, axs = plt.subplots(self.codes_per_phylolevel, self.n_phylolevels, figsize = (5*self.n_phylolevels,30))
+        for i, ax in enumerate(axs.reshape(-1)):
+            ax.hist(histograms[i], density=True, range=(0, self.n_embed-1), bins=self.n_embed)
+            if not is_nonattribute:
+                code_location, level = self.converter.get_code_reshaped_index(i)
+                ax.set_title("code "+ str(code_location) + "/level " +str(level))
+            else:
+                ax.set_title("code "+ str(i))
+        plt.show()
+        sub_dir = 'attribute' if not is_nonattribute else 'non_attribute'
+        fig.savefig(os.path.join(get_fig_pth(self.ckpt_path, postfix=self.directory+'/'+sub_dir), "{}_{}_{}_hostogram.png".format(prefix, species_indx, self.indx_to_label[species_indx])),bbox_inches='tight',dpi=300)
+        plt.close(fig)

ldm/util.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import importlib
+import os
+import torch
+import hashlib
+import requests
+import numpy as np
+from tqdm import tqdm
+from collections import abc
+from einops import rearrange
+from functools import partial
+import multiprocessing as mp
+from threading import Thread
+from queue import Queue
+from inspect import isfunction
+from PIL import Image, ImageDraw, ImageFont
+URL_MAP = {
+    "vgg_lpips": "https://heibox.uni-heidelberg.de/f/607503859c864bc1b30b/?dl=1"
+}
+CKPT_MAP = {
+    "vgg_lpips": "vgg.pth"
+}
+MD5_MAP = {
+    "vgg_lpips": "d507d7349b931f0638a25a48a722f98a"
+}
+def md5_hash(path):
+    with open(path, "rb") as f:
+        content = f.read()
+    return hashlib.md5(content).hexdigest()
+def log_txt_as_img(wh, xc, size=10):
+    # wh a tuple of (width, height)
+    # xc a list of captions to plot
+    b = len(xc)
+    txts = list()
+    for bi in range(b):
+        txt = Image.new("RGB", wh, color="white")
+        draw = ImageDraw.Draw(txt)
+        font = ImageFont.truetype('data/DejaVuSans.ttf', size=size)
+        nc = int(40 * (wh[0] / 256))
+        lines = "\n".join(xc[bi][start:start + nc] for start in range(0, len(xc[bi]), nc))
+        try:
+            draw.text((0, 0), lines, fill="black", font=font)
+        except UnicodeEncodeError:
+            print("Cant encode string for logging. Skipping.")
+        txt = np.array(txt).transpose(2, 0, 1) / 127.5 - 1.0
+        txts.append(txt)
+    txts = np.stack(txts)
+    txts = torch.tensor(txts)
+    return txts
+def download(url, local_path, chunk_size=1024):
+    os.makedirs(os.path.split(local_path)[0], exist_ok=True)
+    with requests.get(url, stream=True) as r:
+        total_size = int(r.headers.get("content-length", 0))
+        with tqdm(total=total_size, unit="B", unit_scale=True) as pbar:
+            with open(local_path, "wb") as f:
+                for data in r.iter_content(chunk_size=chunk_size):
+                    if data:
+                        f.write(data)
+                        pbar.update(chunk_size)
+def get_ckpt_path(name, root, check=False):
+    assert name in URL_MAP
+    path = os.path.join(root, CKPT_MAP[name])
+    if not os.path.exists(path) or (check and not md5_hash(path) == MD5_MAP[name]):
+        print("Downloading {} model from {} to {}".format(name, URL_MAP[name], path))
+        download(URL_MAP[name], path)
+        md5 = md5_hash(path)
+        assert md5 == MD5_MAP[name], md5
+    return path
+def ismap(x):
+    if not isinstance(x, torch.Tensor):
+        return False
+    return (len(x.shape) == 4) and (x.shape[1] > 3)
+def isimage(x):
+    if not isinstance(x, torch.Tensor):
+        return False
+    return (len(x.shape) == 4) and (x.shape[1] == 3 or x.shape[1] == 1)
+def exists(x):
+    return x is not None
+def default(val, d):
+    if exists(val):
+        return val
+    return d() if isfunction(d) else d
+def mean_flat(tensor):
+    """
+    https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/nn.py#L86
+    Take the mean over all non-batch dimensions.
+    """
+    return tensor.mean(dim=list(range(1, len(tensor.shape))))
+def count_params(model, verbose=False):
+    total_params = sum(p.numel() for p in model.parameters())
+    if verbose:
+        print(f"{model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.")
+    return total_params
+def instantiate_from_config(config):
+    if not "target" in config:
+        if config == '__is_first_stage__':
+            return None
+        elif config == "__is_unconditional__":
+            return None
+        raise KeyError("Expected key `target` to instantiate.")
+    return get_obj_from_str(config["target"])(**config.get("params", dict()))
+def get_obj_from_str(string, reload=False):
+    module, cls = string.rsplit(".", 1)
+    if reload:
+        module_imp = importlib.import_module(module)
+        importlib.reload(module_imp)
+    return getattr(importlib.import_module(module, package=None), cls)
+def _do_parallel_data_prefetch(func, Q, data, idx, idx_to_fn=False):
+    # create dummy dataset instance
+    # run prefetching
+    if idx_to_fn:
+        res = func(data, worker_id=idx)
+    else:
+        res = func(data)
+    Q.put([idx, res])
+    Q.put("Done")
+def parallel_data_prefetch(
+        func: callable, data, n_proc, target_data_type="ndarray", cpu_intensive=True, use_worker_id=False
+):
+    # if target_data_type not in ["ndarray", "list"]:
+    #     raise ValueError(
+    #         "Data, which is passed to parallel_data_prefetch has to be either of type list or ndarray."
+    #     )
+    if isinstance(data, np.ndarray) and target_data_type == "list":
+        raise ValueError("list expected but function got ndarray.")
+    elif isinstance(data, abc.Iterable):
+        if isinstance(data, dict):
+            print(
+                f'WARNING:"data" argument passed to parallel_data_prefetch is a dict: Using only its values and disregarding keys.'
+            )
+            data = list(data.values())
+        if target_data_type == "ndarray":
+            data = np.asarray(data)
+        else:
+            data = list(data)
+    else:
+        raise TypeError(
+            f"The data, that shall be processed parallel has to be either an np.ndarray or an Iterable, but is actually {type(data)}."
+        )
+    if cpu_intensive:
+        Q = mp.Queue(1000)
+        proc = mp.Process
+    else:
+        Q = Queue(1000)
+        proc = Thread
+    # spawn processes
+    if target_data_type == "ndarray":
+        arguments = [
+            [func, Q, part, i, use_worker_id]
+            for i, part in enumerate(np.array_split(data, n_proc))
+        ]
+    else:
+        step = (
+            int(len(data) / n_proc + 1)
+            if len(data) % n_proc != 0
+            else int(len(data) / n_proc)
+        )
+        arguments = [
+            [func, Q, part, i, use_worker_id]
+            for i, part in enumerate(
+                [data[i: i + step] for i in range(0, len(data), step)]
+            )
+        ]
+    processes = []
+    for i in range(n_proc):
+        p = proc(target=_do_parallel_data_prefetch, args=arguments[i])
+        processes += [p]
+    # start processes
+    print(f"Start prefetching...")
+    import time
+    start = time.time()
+    gather_res = [[] for _ in range(n_proc)]
+    try:
+        for p in processes:
+            p.start()
+        k = 0
+        while k < n_proc:
+            # get result
+            res = Q.get()
+            if res == "Done":
+                k += 1
+            else:
+                gather_res[res[0]] = res[1]
+    except Exception as e:
+        print("Exception: ", e)
+        for p in processes:
+            p.terminate()
+        raise e
+    finally:
+        for p in processes:
+            p.join()
+        print(f"Prefetching complete. [{time.time() - start} sec.]")
+    if target_data_type == 'ndarray':
+        if not isinstance(gather_res[0], np.ndarray):
+            return np.concatenate([np.asarray(r) for r in gather_res], axis=0)
+        # order outputs
+        return np.concatenate(gather_res, axis=0)
+    elif target_data_type == 'list':
+        out = []
+        for r in gather_res:
+            out.extend(r)
+        return out
+    else:
+        return gather_res