Spaces:

huzey
/

ncut-pytorch

Running on Zero

App Files Files Community

huzey commited on Oct 1, 2024

Commit

5dac5bc

1 Parent(s): 81bd021

add directed ncut (test)

Browse files

Files changed (3) hide show

app.py +382 -39
directed_ncut.py +287 -0
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -183,6 +183,84 @@ def compute_ncut(
     return rgb, logging_str, eigvecs
 def dont_use_too_much_green(image_rgb):
     # make sure the foval 40% of the image is red leading
     x1, x2 = int(image_rgb.shape[1] * 0.3), int(image_rgb.shape[1] * 0.7)
@@ -592,6 +670,8 @@ def ncut_run(
     **kwargs,
 ):
     advanced = kwargs.get("advanced", False)
     progress = gr.Progress()
     progress(0.2, desc="Feature Extraction")
@@ -640,6 +720,11 @@ def ncut_run(
         features = extract_features(
             images, model, node_type=node_type, layer=layer-1, batch_size=BATCH_SIZE
         )
     # print(f"Feature extraction time (gpu): {time.time() - start:.2f}s")
     logging_str += f"Backbone time: {time.time() - start:.2f}s\n"
     del model
@@ -768,25 +853,59 @@ def ncut_run(
     # ailgnedcut
-    rgb, _logging_str, eigvecs = compute_ncut(
-        features,
-        num_eig=num_eig,
-        num_sample_ncut=num_sample_ncut,
-        affinity_focal_gamma=affinity_focal_gamma,
-        knn_ncut=knn_ncut,
-        knn_tsne=knn_tsne,
-        num_sample_tsne=num_sample_tsne,
-        embedding_method=embedding_method,
-        embedding_metric=embedding_metric,
-        perplexity=perplexity,
-        n_neighbors=n_neighbors,
-        min_dist=min_dist,
-        sampling_method=sampling_method,
-        indirect_connection=indirect_connection,
-        make_orthogonal=make_orthogonal,
-        metric=ncut_metric,
-    )
     logging_str += _logging_str
     if "AlignedThreeModelAttnNodes" == model_name:
@@ -858,26 +977,26 @@ def ncut_run(
 def _ncut_run(*args, **kwargs):
     n_ret = kwargs.pop("n_ret", 1)
-    try:
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        ret = ncut_run(*args, **kwargs)
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        ret = list(ret)[:n_ret] + [ret[-1]]
-        return ret
-    except Exception as e:
-        gr.Error(str(e))
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        return *(None for _ in range(n_ret)), "Error: " + str(e)
-    # ret = ncut_run(*args, **kwargs)
-    # ret = list(ret)[:n_ret] + [ret[-1]]
-    # return ret
 if USE_HUGGINGFACE_ZEROGPU:
     @spaces.GPU(duration=30)
@@ -1085,12 +1204,16 @@ def run_fn(
     recursion_l1_gamma=0.5,
     recursion_l2_gamma=0.5,
     recursion_l3_gamma=0.5,
     n_ret=1,
     plot_clusters=False,
     alignedcut_eig_norm_plot=False,
     advanced=False,
 ):
     progress=gr.Progress()
     progress(0, desc="Starting")
@@ -1222,6 +1345,10 @@ def run_fn(
         "plot_clusters": plot_clusters,
         "alignedcut_eig_norm_plot": alignedcut_eig_norm_plot,
         "advanced": advanced,
     }
     # print(kwargs)
@@ -1379,7 +1506,7 @@ def fit_trans(rgb1, rgb2, num_layer=3, width=512, batch_size=256, lr=3e-4, fitti
     # Train the model
     trainer.fit(mlp, dataloader)
     results = trainer.predict(mlp, data_loader)
     A_transformed = torch.cat(results, dim=0)
@@ -2734,10 +2861,226 @@ with demo:
         buttons[-1].click(fn=lambda x: gr.update(visible=True), outputs=rows[-1])
         buttons[-1].click(fn=lambda x: gr.update(visible=False), outputs=buttons[-1])
-        # add MLP fitting buttons
     with gr.Tab('📄About'):
         with gr.Column():

     return rgb, logging_str, eigvecs
+def compute_ncut_directed(
+    features_1,
+    features_2,
+    num_eig=100,
+    num_sample_ncut=10000,
+    affinity_focal_gamma=0.3,
+    knn_ncut=10,
+    knn_tsne=10,
+    embedding_method="UMAP",
+    embedding_metric='euclidean',
+    num_sample_tsne=300,
+    perplexity=150,
+    n_neighbors=150,
+    min_dist=0.1,
+    sampling_method="QuickFPS",
+    metric="cosine",
+    indirect_connection=False,
+    make_orthogonal=False,
+    make_symmetric=False,
+    progess_start=0.4,
+):
+    print("Using directed_ncut")
+    print("features_1.shape", features_1.shape)
+    print("features_2.shape", features_2.shape)
+    from directed_ncut import nystrom_ncut
+    progress = gr.Progress()
+    logging_str = ""
+    num_nodes = np.prod(features_1.shape[:-2])
+    if num_nodes / 2 < num_eig:
+        # raise gr.Error("Number of eigenvectors should be less than half the number of nodes.")
+        gr.Warning("Number of eigenvectors should be less than half the number of nodes.\n" f"Setting num_eig to {num_nodes // 2 - 1}.")
+        num_eig = num_nodes // 2 - 1
+        logging_str += f"Number of eigenvectors should be less than half the number of nodes.\n" f"Setting num_eig to {num_nodes // 2 - 1}.\n"
+    start = time.time()
+    progress(progess_start+0.0, desc="NCut")
+    n_features = features_1.shape[-2]
+    _features_1 = rearrange(features_1, "b h w d c -> (b h w) (d c)")
+    _features_2 = rearrange(features_2, "b h w d c -> (b h w) (d c)")
+    eigvecs, eigvals, _ = nystrom_ncut(
+        _features_1,
+        features_B=_features_2,
+        num_eig=num_eig,
+        num_sample=num_sample_ncut,
+        device="cuda" if torch.cuda.is_available() else "cpu",
+        affinity_focal_gamma=affinity_focal_gamma,
+        knn=knn_ncut,
+        sample_method=sampling_method,
+        distance=metric,
+        normalize_features=False,
+        indirect_connection=indirect_connection,
+        make_orthogonal=make_orthogonal,
+        make_symmetric=make_symmetric,
+        n_features=n_features,
+    )
+    # print(f"NCUT time: {time.time() - start:.2f}s")
+    logging_str += f"NCUT time: {time.time() - start:.2f}s\n"
+    start = time.time()
+    progress(progess_start+0.01, desc="spectral-tSNE")
+    _, rgb = eigenvector_to_rgb(
+        eigvecs,
+        method=embedding_method,
+        metric=embedding_metric,
+        num_sample=num_sample_tsne,
+        perplexity=perplexity,
+        n_neighbors=n_neighbors,
+        min_distance=min_dist,
+        knn=knn_tsne,
+        device="cuda" if torch.cuda.is_available() else "cpu",
+    )
+    logging_str += f"{embedding_method} time: {time.time() - start:.2f}s\n"
+    rgb = rgb.reshape(features_1.shape[:3] + (3,))
+    return rgb, logging_str, eigvecs
 def dont_use_too_much_green(image_rgb):
     # make sure the foval 40% of the image is red leading
     x1, x2 = int(image_rgb.shape[1] * 0.3), int(image_rgb.shape[1] * 0.7)
     **kwargs,
 ):
     advanced = kwargs.get("advanced", False)
+    directed = kwargs.get("directed", False)
     progress = gr.Progress()
     progress(0.2, desc="Feature Extraction")
         features = extract_features(
             images, model, node_type=node_type, layer=layer-1, batch_size=BATCH_SIZE
         )
+        if directed:
+            node_type2 = kwargs.get("node_type2", None)
+            features_B = extract_features(
+                images, model, node_type=node_type2, layer=layer-1, batch_size=BATCH_SIZE
+            )
     # print(f"Feature extraction time (gpu): {time.time() - start:.2f}s")
     logging_str += f"Backbone time: {time.time() - start:.2f}s\n"
     del model
     # ailgnedcut
+    if not directed:
+        rgb, _logging_str, eigvecs = compute_ncut(
+            features,
+            num_eig=num_eig,
+            num_sample_ncut=num_sample_ncut,
+            affinity_focal_gamma=affinity_focal_gamma,
+            knn_ncut=knn_ncut,
+            knn_tsne=knn_tsne,
+            num_sample_tsne=num_sample_tsne,
+            embedding_method=embedding_method,
+            embedding_metric=embedding_metric,
+            perplexity=perplexity,
+            n_neighbors=n_neighbors,
+            min_dist=min_dist,
+            sampling_method=sampling_method,
+            indirect_connection=indirect_connection,
+            make_orthogonal=make_orthogonal,
+            metric=ncut_metric,
+        )
+    if directed:
+        head_index_text = kwargs.get("head_index_text", None)
+        n_heads = features.shape[-2]   # (batch, h, w, n_heads, d)
+        if head_index_text == 'all':
+            head_idx = torch.arange(n_heads)
+        else:
+            _idxs = head_index_text.split(",")
+            head_idx = torch.tensor([int(idx) for idx in _idxs])
+        features_A = features[:, :, :, head_idx, :]
+        features_B = features_B[:, :, :, head_idx, :]
+        rgb, _logging_str, eigvecs = compute_ncut_directed(
+            features_A,
+            features_B,
+            num_eig=num_eig,
+            num_sample_ncut=num_sample_ncut,
+            affinity_focal_gamma=affinity_focal_gamma,
+            knn_ncut=knn_ncut,
+            knn_tsne=knn_tsne,
+            num_sample_tsne=num_sample_tsne,
+            embedding_method=embedding_method,
+            embedding_metric=embedding_metric,
+            perplexity=perplexity,
+            n_neighbors=n_neighbors,
+            min_dist=min_dist,
+            sampling_method=sampling_method,
+            indirect_connection=False,
+            make_orthogonal=make_orthogonal,
+            metric=ncut_metric,
+            make_symmetric=kwargs.get("make_symmetric", None),
+        )
     logging_str += _logging_str
     if "AlignedThreeModelAttnNodes" == model_name:
 def _ncut_run(*args, **kwargs):
     n_ret = kwargs.pop("n_ret", 1)
+    # try:
+    #     if torch.cuda.is_available():
+    #         torch.cuda.empty_cache()
+    #     ret = ncut_run(*args, **kwargs)
+    #     if torch.cuda.is_available():
+    #         torch.cuda.empty_cache()
+    #     ret = list(ret)[:n_ret] + [ret[-1]]
+    #     return ret
+    # except Exception as e:
+    #     gr.Error(str(e))
+    #     if torch.cuda.is_available():
+    #         torch.cuda.empty_cache()
+    #     return *(None for _ in range(n_ret)), "Error: " + str(e)
+    ret = ncut_run(*args, **kwargs)
+    ret = list(ret)[:n_ret] + [ret[-1]]
+    return ret
 if USE_HUGGINGFACE_ZEROGPU:
     @spaces.GPU(duration=30)
     recursion_l1_gamma=0.5,
     recursion_l2_gamma=0.5,
     recursion_l3_gamma=0.5,
+    node_type2="k",
+    head_index_text='all',
+    make_symmetric=False,
     n_ret=1,
     plot_clusters=False,
     alignedcut_eig_norm_plot=False,
     advanced=False,
+    directed=False,
 ):
+    print(node_type2, head_index_text, make_symmetric)
     progress=gr.Progress()
     progress(0, desc="Starting")
         "plot_clusters": plot_clusters,
         "alignedcut_eig_norm_plot": alignedcut_eig_norm_plot,
         "advanced": advanced,
+        "directed": directed,
+        "node_type2": node_type2,
+        "head_index_text": head_index_text,
+        "make_symmetric": make_symmetric,
     }
     # print(kwargs)
     # Train the model
     trainer.fit(mlp, dataloader)
+    mlp.progress(0.99, desc="Applying MLP")
     results = trainer.predict(mlp, data_loader)
     A_transformed = torch.cat(results, dim=0)
         buttons[-1].click(fn=lambda x: gr.update(visible=True), outputs=rows[-1])
         buttons[-1].click(fn=lambda x: gr.update(visible=False), outputs=buttons[-1])
+    with gr.Tab('Directed (experimental)', visible=True) as tab_directed_ncut:
+        target_images = gr.State([])
+        input_images = gr.State([])
+        def add_mlp_fitting_buttons(output_gallery, mlp_gallery, target_images=target_images, input_images=input_images):
+            with gr.Row():
+                # mark_as_target_button = gr.Button("mark target", elem_id=f"mark_as_target_button_{output_gallery.elem_id}", variant='secondary')
+                # mark_as_input_button = gr.Button("mark input", elem_id=f"mark_as_input_button_{output_gallery.elem_id}", variant='secondary')
+                mark_as_target_button = gr.Button("🎯 Mark Target", elem_id=f"mark_as_target_button_{output_gallery.elem_id}", variant='secondary')
+            fit_to_target_button = gr.Button("🔴 [MLP] Fit", elem_id=f"fit_to_target_button_{output_gallery.elem_id}", variant='primary')
+            def mark_fn(images, text="target"):
+                if images is None:
+                    raise gr.Error("No images selected")
+                if len(images) == 0:
+                    raise gr.Error("No images selected")
+                num_images = len(images)
+                gr.Info(f"Marked {num_images} images as {text}")
+                images = [(Image.open(tup[0]), []) for tup in images]
+                return images
+            mark_as_target_button.click(partial(mark_fn, text="target"), inputs=[output_gallery], outputs=[target_images])
+            # mark_as_input_button.click(partial(mark_fn, text="input"), inputs=[output_gallery], outputs=[input_images])
+            with gr.Accordion("➡️ MLP Parameters", open=False):
+                num_layers_slider = gr.Slider(2, 10, step=1, label="Number of Layers", value=3, elem_id=f"num_layers_slider_{output_gallery.elem_id}")
+                width_slider = gr.Slider(128, 4096, step=128, label="Width", value=512, elem_id=f"width_slider_{output_gallery.elem_id}")
+                batch_size_slider = gr.Slider(32, 4096, step=32, label="Batch Size", value=128, elem_id=f"batch_size_slider_{output_gallery.elem_id}")
+                lr_slider = gr.Slider(1e-6, 1, step=1e-6, label="Learning Rate", value=3e-4, elem_id=f"lr_slider_{output_gallery.elem_id}")
+                fitting_steps_slider = gr.Slider(1000, 100000, step=1000, label="Fitting Steps", value=30000, elem_id=f"fitting_steps_slider_{output_gallery.elem_id}")
+                fps_sample_slider = gr.Slider(128, 50000, step=128, label="FPS Sample", value=10240, elem_id=f"fps_sample_slider_{output_gallery.elem_id}")
+                segmentation_loss_lambda_slider = gr.Slider(0, 100, step=0.01, label="Segmentation Preserving Loss Lambda", value=1, elem_id=f"segmentation_loss_lambda_slider_{output_gallery.elem_id}")
+            fit_to_target_button.click(
+                run_mlp_fit,
+                inputs=[output_gallery, target_images, num_layers_slider, width_slider, batch_size_slider, lr_slider, fitting_steps_slider, fps_sample_slider, segmentation_loss_lambda_slider],
+                outputs=[mlp_gallery],
+            )
+        def make_parameters_section_2model(model_ratio=True):
+            gr.Markdown("### Parameters <a style='color: #0044CC;' href='https://ncut-pytorch.readthedocs.io/en/latest/how_to_get_better_segmentation/' target='_blank'>Help</a>")
+            from ncut_pytorch.backbone import list_models, get_demo_model_names
+            model_names = list_models()
+            model_names = sorted(model_names)
+            # only CLIP DINO MAE is implemented for q k v
+            ok_models = ["CLIP(ViT", "DiNO(", "MAE("]
+            model_names = [m for m in model_names if any(ok in m for ok in ok_models)]
+            def get_filtered_model_names(name):
+                return [m for m in model_names if name.lower() in m.lower()]
+            def get_default_model_name(name):
+                lst = get_filtered_model_names(name)
+                if len(lst) > 1:
+                    return lst[1]
+                return lst[0]
+            model_radio = gr.Radio(["CLIP", "DiNO", "MAE"], label="Backbone", value="DiNO", elem_id="model_radio", show_label=True, visible=model_ratio)
+            model_dropdown = gr.Dropdown(get_filtered_model_names("DiNO"), label="", value="DiNO(dino_vitb8_448)", elem_id="model_name", show_label=False)
+            model_radio.change(fn=lambda x: gr.update(choices=get_filtered_model_names(x), value=get_default_model_name(x)), inputs=model_radio, outputs=[model_dropdown])
+            layer_slider = gr.Slider(1, 12, step=1, label="Backbone: Layer index", value=10, elem_id="layer")
+            positive_prompt = gr.Textbox(label="Prompt (Positive)", elem_id="prompt", placeholder="e.g. 'a photo of Gibson Les Pual guitar'")
+            positive_prompt.visible = False
+            negative_prompt = gr.Textbox(label="Prompt (Negative)", elem_id="prompt", placeholder="e.g. 'a photo from egocentric view'")
+            negative_prompt.visible = False
+            node_type_dropdown = gr.Dropdown(['q', 'k', 'v'],
+                                            label="Left-side Node Type", value="q", elem_id="node_type", info="In directed case, left-side SVD eigenvector is taken")
+            node_type_dropdown2 = gr.Dropdown(['q', 'k', 'v'],
+                                            label="Right-side Node Type", value="k", elem_id="node_type2")
+            head_index_text = gr.Textbox(value='all', label="Head Index", elem_id="head_index", type="text", info="which attention heads to use, comma separated, e.g. 0,1,2")
+            make_symmetric = gr.Checkbox(label="Make Symmetric", value=False, elem_id="make_symmetric", info="make the graph symmetric by A = (A + A.T) / 2")
+            num_eig_slider = gr.Slider(1, 1000, step=1, label="NCUT: Number of eigenvectors", value=100, elem_id="num_eig", info='increase for smaller clusters')
+            def change_layer_slider(model_name):
+                # SD2, UNET
+                if "stable" in model_name.lower() and "diffusion" in model_name.lower():
+                    from ncut_pytorch.backbone import SD_KEY_DICT
+                    default_layer = 'up_2_resnets_1_block' if 'diffusion-3' not in model_name else 'block_23'
+                    return (gr.Slider(1, 49, step=1, label="Diffusion: Timestep (Noise)", value=5, elem_id="layer", visible=True, info="Noise level, 50 is max noise"),
+                            gr.Dropdown(SD_KEY_DICT[model_name], label="Diffusion: Layer and Node", value=default_layer, elem_id="node_type", info="U-Net (v1, v2) or DiT (v3)"))
+                if model_name == "LISSL(xinlai/LISSL-7B-v1)":
+                    layer_names = ["dec_0_input", "dec_0_attn", "dec_0_block", "dec_1_input", "dec_1_attn", "dec_1_block"]
+                    default_layer = "dec_1_block"
+                    return (gr.Slider(1, 6, step=1, label="LISA decoder: Layer index", value=6, elem_id="layer", visible=False, info=""),
+                            gr.Dropdown(layer_names, label="LISA decoder: Layer and Node", value=default_layer, elem_id="node_type"))
+                layer_dict = LAYER_DICT
+                if model_name in layer_dict:
+                    value = layer_dict[model_name]
+                    return gr.Slider(1, value, step=1, label="Backbone: Layer index", value=value, elem_id="layer", visible=True, info="")
+                else:
+                    value = 12
+                    return gr.Slider(1, value, step=1, label="Backbone: Layer index", value=value, elem_id="layer", visible=True, info="")
+            model_dropdown.change(fn=change_layer_slider, inputs=model_dropdown, outputs=layer_slider)
+            def change_prompt_text(model_name):
+                if model_name in promptable_diffusion_models:
+                    return (gr.Textbox(label="Prompt (Positive)", elem_id="prompt", placeholder="e.g. 'a photo of Gibson Les Pual guitar'", visible=True),
+                            gr.Textbox(label="Prompt (Negative)", elem_id="prompt", placeholder="e.g. 'a photo from egocentric view'", visible=True))
+                return (gr.Textbox(label="Prompt (Positive)", elem_id="prompt", placeholder="e.g. 'a photo of Gibson Les Pual guitar'", visible=False),
+                        gr.Textbox(label="Prompt (Negative)", elem_id="prompt", placeholder="e.g. 'a photo from egocentric view'", visible=False))
+            model_dropdown.change(fn=change_prompt_text, inputs=model_dropdown, outputs=[positive_prompt, negative_prompt])
+            with gr.Accordion("Advanced Parameters: NCUT", open=False):
+                gr.Markdown("<a href='https://ncut-pytorch.readthedocs.io/en/latest/how_to_get_better_segmentation/' target='_blank'>Docs: How to Get Better Segmentation</a>")
+                affinity_focal_gamma_slider = gr.Slider(0.01, 1, step=0.01, label="NCUT: Affinity focal gamma", value=0.5, elem_id="affinity_focal_gamma", info="decrease for shaper segmentation")
+                num_sample_ncut_slider = gr.Slider(100, 50000, step=100, label="NCUT: num_sample", value=10000, elem_id="num_sample_ncut", info="Nyström approximation")
+                # sampling_method_dropdown = gr.Dropdown(["QuickFPS", "random"], label="NCUT: Sampling method", value="QuickFPS", elem_id="sampling_method", info="Nyström approximation")
+                sampling_method_dropdown = gr.Radio(["QuickFPS", "random"], label="NCUT: Sampling method", value="QuickFPS", elem_id="sampling_method")
+                # ncut_metric_dropdown = gr.Dropdown(["euclidean", "cosine"], label="NCUT: Distance metric", value="cosine", elem_id="ncut_metric")
+                ncut_metric_dropdown = gr.Radio(["euclidean", "cosine"], label="NCUT: Distance metric", value="cosine", elem_id="ncut_metric")
+                ncut_knn_slider = gr.Slider(1, 100, step=1, label="NCUT: KNN", value=10, elem_id="knn_ncut", info="Nyström approximation")
+                ncut_indirect_connection = gr.Checkbox(label="indirect_connection", value=False, elem_id="ncut_indirect_connection", info="TODO: Indirect connection is not implemented for directed NCUT", interactive=False)
+                ncut_make_orthogonal = gr.Checkbox(label="make_orthogonal", value=False, elem_id="ncut_make_orthogonal", info="Apply post-hoc eigenvectors orthogonalization")
+            with gr.Accordion("Advanced Parameters: Visualization", open=False):
+                # embedding_method_dropdown = gr.Dropdown(["tsne_3d", "umap_3d", "umap_sphere", "tsne_2d", "umap_2d"], label="Coloring method", value="tsne_3d", elem_id="embedding_method")
+                embedding_method_dropdown = gr.Radio(["tsne_3d", "umap_3d", "umap_sphere", "tsne_2d", "umap_2d"], label="Coloring method", value="tsne_3d", elem_id="embedding_method")
+                # embedding_metric_dropdown = gr.Dropdown(["euclidean", "cosine"], label="t-SNE/UMAP metric", value="euclidean", elem_id="embedding_metric")
+                embedding_metric_dropdown = gr.Radio(["euclidean", "cosine"], label="t-SNE/UMAP: metric", value="euclidean", elem_id="embedding_metric")
+                num_sample_tsne_slider = gr.Slider(100, 10000, step=100, label="t-SNE/UMAP: num_sample", value=300, elem_id="num_sample_tsne", info="Nyström approximation")
+                knn_tsne_slider = gr.Slider(1, 100, step=1, label="t-SNE/UMAP: KNN", value=10, elem_id="knn_tsne", info="Nyström approximation")
+                perplexity_slider = gr.Slider(10, 1000, step=10, label="t-SNE: perplexity", value=150, elem_id="perplexity")
+                n_neighbors_slider = gr.Slider(10, 1000, step=10, label="UMAP: n_neighbors", value=150, elem_id="n_neighbors")
+                min_dist_slider = gr.Slider(0.1, 1, step=0.1, label="UMAP: min_dist", value=0.1, elem_id="min_dist")
+            return [model_dropdown, layer_slider, node_type_dropdown, node_type_dropdown2, head_index_text, make_symmetric, num_eig_slider,
+                    affinity_focal_gamma_slider, num_sample_ncut_slider, ncut_knn_slider, ncut_indirect_connection, ncut_make_orthogonal,
+                    embedding_method_dropdown, embedding_metric_dropdown, num_sample_tsne_slider, knn_tsne_slider,
+                    perplexity_slider, n_neighbors_slider, min_dist_slider,
+                    sampling_method_dropdown, ncut_metric_dropdown, positive_prompt, negative_prompt]
+        def add_one_model(i_model=1):
+            with gr.Column(scale=5, min_width=200) as col:
+                gr.Markdown(f'### Output Images')
+                output_gallery = gr.Gallery(format='png', value=[], label="NCUT Embedding", show_label=True, elem_id=f"ncut{i_model}", columns=[3], rows=[1], object_fit="contain", height="auto", show_fullscreen_button=True, interactive=False)
+                submit_button = gr.Button("🔴 RUN", elem_id=f"submit_button{i_model}", variant='primary')
+                add_rotate_flip_buttons(output_gallery)
+                add_download_button(output_gallery, f"ncut_embed")
+                mlp_gallery = gr.Gallery(format='png', value=[], label="MLP color align", show_label=True, elem_id=f"mlp{i_model}", columns=[3], rows=[1], object_fit="contain", height="auto", show_fullscreen_button=True, interactive=False)
+                add_mlp_fitting_buttons(output_gallery, mlp_gallery)
+                add_download_button(mlp_gallery, f"mlp_color_align")
+                norm_gallery = gr.Gallery(value=[], label="Eigenvector Magnitude", show_label=True, elem_id=f"eig_norm{i_model}", columns=[3], rows=[1], object_fit="contain", height="auto", show_share_button=True, preview=False, interactive=False)
+                add_download_button(norm_gallery, f"eig_norm")
+                cluster_gallery = gr.Gallery(value=[], label="Clusters", show_label=True, elem_id=f"clusters{i_model}", columns=[2], rows=[4], object_fit="contain", height="auto", show_share_button=True, preview=False, interactive=False)
+                add_download_button(cluster_gallery, f"clusters")
+                [
+                    model_dropdown, layer_slider, node_type_dropdown, node_type_dropdown2, head_index_text, make_symmetric, num_eig_slider,
+                    affinity_focal_gamma_slider, num_sample_ncut_slider, ncut_knn_slider, ncut_indirect_connection, ncut_make_orthogonal,
+                    embedding_method_dropdown, embedding_metric_dropdown, num_sample_tsne_slider, knn_tsne_slider,
+                    perplexity_slider, n_neighbors_slider, min_dist_slider,
+                    sampling_method_dropdown, ncut_metric_dropdown, positive_prompt, negative_prompt
+                ] = make_parameters_section_2model()
+                # logging text box
+                logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
+                false_placeholder = gr.Checkbox(label="False", value=False, elem_id="false_placeholder", visible=False)
+                no_prompt = gr.Textbox("", label="", elem_id="empty_placeholder", type="text", placeholder="", visible=False)
+                false_placeholder = gr.Checkbox(label="False", value=False, elem_id="false_placeholder", visible=False)
+                submit_button.click(
+                    partial(run_fn, n_ret=3, plot_clusters=True, alignedcut_eig_norm_plot=True, advanced=True, directed=True),
+                    inputs=[
+                        input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
+                        positive_prompt, negative_prompt,
+                        false_placeholder, no_prompt, no_prompt, no_prompt,
+                        affinity_focal_gamma_slider, num_sample_ncut_slider, ncut_knn_slider, ncut_indirect_connection, ncut_make_orthogonal,
+                        embedding_method_dropdown, embedding_metric_dropdown, num_sample_tsne_slider, knn_tsne_slider,
+                        perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown, ncut_metric_dropdown,
+                        *[false_placeholder for _ in range(9)],
+                        node_type_dropdown2, head_index_text, make_symmetric
+                    ],
+                    outputs=[output_gallery, cluster_gallery, norm_gallery, logging_text]
+                )
+                output_gallery.change(lambda x: gr.update(value=x), inputs=[output_gallery], outputs=[mlp_gallery])
+                return output_gallery
+        galleries = []
+        with gr.Row():
+            with gr.Column(scale=5, min_width=200):
+                input_gallery, submit_button, clear_images_button, dataset_dropdown, num_images_slider, random_seed_slider, load_images_button = make_input_images_section(allow_download=True)
+                submit_button.visible = False
+            for i in range(3):
+                g = add_one_model()
+                galleries.append(g)
+        # Create rows and buttons in a loop
+        rows = []
+        buttons = []
+        for i in range(4):
+            row = gr.Row(visible=False)
+            rows.append(row)
+            with row:
+                for j in range(4):
+                    with gr.Column(scale=5, min_width=200):
+                        g = add_one_model()
+                        galleries.append(g)
+            button = gr.Button("➕ Add Compare", elem_id=f"add_button_{i}", visible=False if i > 0 else True, scale=3)
+            buttons.append(button)
+            if i > 0:
+                # Reveal the current row and next button
+                buttons[i - 1].click(fn=lambda x: gr.update(visible=True), outputs=row)
+                buttons[i - 1].click(fn=lambda x: gr.update(visible=True), outputs=button)
+                # Hide the current button
+                buttons[i - 1].click(fn=lambda x: gr.update(visible=False), outputs=buttons[i - 1])
+        # Last button only reveals the last row and hides itself
+        buttons[-1].click(fn=lambda x: gr.update(visible=True), outputs=rows[-1])
+        buttons[-1].click(fn=lambda x: gr.update(visible=False), outputs=buttons[-1])
     with gr.Tab('📄About'):
         with gr.Column():

directed_ncut.py ADDED Viewed

	@@ -0,0 +1,287 @@

+# %%
+import torch
+import torch.nn.functional as F
+def affinity_from_features(
+    features,
+    features_B=None,
+    affinity_focal_gamma=1.0,
+    distance="cosine",
+    normalize_features=False,
+    fill_diagonal=False,
+    n_features=1,
+):
+    """Compute affinity matrix from input features.
+    Args:
+        features (torch.Tensor): input features, shape (n_samples, n_features)
+        feature_B (torch.Tensor, optional): optional, if not None, compute affinity between two features
+        affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the edge weights
+            on weak connections, default 1.0
+        distance (str): distance metric, 'cosine' (default) or 'euclidean'.
+        apply_normalize (bool): normalize input features before computing affinity matrix,
+            default True
+    Returns:
+        (torch.Tensor): affinity matrix, shape (n_samples, n_samples)
+    """
+    # compute affinity matrix from input features
+    features = features.clone()
+    if features_B is not None:
+        features_B = features_B.clone()
+    # if feature_B is not provided, compute affinity matrix on features x features
+    # if feature_B is provided, compute affinity matrix on features x feature_B
+    if features_B is not None:
+        assert not fill_diagonal, "fill_diagonal should be False when feature_B is None"
+    features_B = features if features_B is None else features_B
+    if normalize_features:
+        features = F.normalize(features, dim=-1)
+        features_B = F.normalize(features_B, dim=-1)
+    if distance == "cosine":
+        # if not check_if_normalized(features):
+        # TODO: make sure features are normalized within each head
+        features = F.normalize(features, dim=-1)
+        # if not check_if_normalized(features_B):
+        features_B = F.normalize(features_B, dim=-1)
+        A = 1 - (features @ features_B.T) / n_features
+    elif distance == "euclidean":
+        A = torch.cdist(features, features_B, p=2) / n_features
+    else:
+        raise ValueError("distance should be 'cosine' or 'euclidean'")
+    if fill_diagonal:
+        A[torch.arange(A.shape[0]), torch.arange(A.shape[0])] = 0
+    # torch.exp make affinity matrix positive definite,
+    # lower affinity_focal_gamma reduce the weak edge weights
+    A = torch.exp(-((A / affinity_focal_gamma)))
+    return A
+from ncut_pytorch.ncut_pytorch import run_subgraph_sampling, propagate_knn, gram_schmidt
+import logging
+import torch
+def ncut(
+    A,
+    num_eig=20,
+    eig_solver="svd_lowrank",
+    make_symmetric=True,
+):
+    """PyTorch implementation of Normalized cut without Nystrom-like approximation.
+    Args:
+        A (torch.Tensor): affinity matrix, shape (n_samples, n_samples)
+        num_eig (int): number of eigenvectors to return
+        eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh']
+    Returns:
+        (torch.Tensor): eigenvectors corresponding to the eigenvalues, shape (n_samples, num_eig)
+        (torch.Tensor): eigenvalues of the eigenvectors, sorted in descending order
+    """
+    if make_symmetric:
+        # make sure A is symmetric
+        A = (A + A.T) / 2
+    # symmetrical normalization; A = D^(-1/2) A D^(-1/2)
+    D_r = A.sum(dim=0).detach().clone()
+    D_c = A.sum(dim=1).detach().clone()
+    A /= torch.sqrt(D_r)[:, None]
+    A /= torch.sqrt(D_c)[None, :]
+    # compute eigenvectors
+    if eig_solver == "svd_lowrank":  # default
+        # only top q eigenvectors, fastest
+        eigen_vector, eigen_value, _ = torch.svd_lowrank(A, q=num_eig)
+    elif eig_solver == "lobpcg":
+        # only top k eigenvectors, fast
+        eigen_value, eigen_vector = torch.lobpcg(A, k=num_eig)
+    elif eig_solver == "svd":
+        # all eigenvectors, slow
+        eigen_vector, eigen_value, _ = torch.svd(A)
+    elif eig_solver == "eigh":
+        # all eigenvectors, slow
+        eigen_value, eigen_vector = torch.linalg.eigh(A)
+    else:
+        raise ValueError(
+            "eigen_solver should be 'lobpcg', 'svd_lowrank', 'svd' or 'eigh'"
+        )
+    # sort eigenvectors by eigenvalues, take top (descending order)
+    eigen_value = eigen_value.real
+    eigen_vector = eigen_vector.real
+    sort_order = torch.argsort(eigen_value, descending=True)[:num_eig]
+    eigen_value = eigen_value[sort_order]
+    eigen_vector = eigen_vector[:, sort_order]
+    if eigen_value.min() < 0:
+        logging.warning(
+            "negative eigenvalues detected, please make sure the affinity matrix is positive definite"
+        )
+    return eigen_vector, eigen_value
+def nystrom_ncut(
+    features,
+    features_B=None,
+    num_eig=100,
+    num_sample=10000,
+    knn=10,
+    sample_method="farthest",
+    distance="cosine",
+    affinity_focal_gamma=1.0,
+    indirect_connection=False,
+    indirect_pca_dim=100,
+    device=None,
+    eig_solver="svd_lowrank",
+    normalize_features=False,
+    matmul_chunk_size=8096,
+    make_orthogonal=False,
+    verbose=False,
+    no_propagation=False,
+    make_symmetric=False,
+    n_features=1,
+):
+    """PyTorch implementation of Faster Nystrom Normalized cut.
+    Args:
+        features (torch.Tensor): feature matrix, shape (n_samples, n_features)
+        features_2 (torch.Tensor): feature matrix 2, for asymmetric affinity matrix, shape (n_samples2, n_features)
+        num_eig (int): default 20, number of top eigenvectors to return
+        num_sample (int): default 30000, number of samples for Nystrom-like approximation
+        knn (int): default 3, number of KNN for propagating eigenvectors from subgraph to full graph,
+            smaller knn will result in more sharp eigenvectors,
+        sample_method (str): sample method, 'farthest' (default) or 'random'
+            'farthest' is recommended for better approximation
+        distance (str): distance metric, 'cosine' (default) or 'euclidean'
+        affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the weak edge weights,
+            resulting in more sharp eigenvectors, default 1.0
+        indirect_connection (bool): include indirect connection in the subgraph, default True
+        indirect_pca_dim (int): default 100, PCA dimension to reduce the node dimension, only applied to
+            the not sampled nodes, not applied to the sampled nodes
+        device (str): device to use for computation, if None, will not change device
+            a good practice is to pass features by CPU since it's usually large,
+            and move subgraph affinity to GPU to speed up eigenvector computation
+        eig_solver (str): eigen decompose solver, 'svd_lowrank' (default), 'lobpcg', 'svd', 'eigh'
+            'svd_lowrank' is recommended for large scale graph, it's the fastest
+            they correspond to torch.svd_lowrank, torch.lobpcg, torch.svd, torch.linalg.eigh
+        normalize_features (bool): normalize input features before computing affinity matrix,
+            default True
+        matmul_chunk_size (int): chunk size for matrix multiplication
+            large matrix multiplication is chunked to reduce memory usage,
+            smaller chunk size will reduce memory usage but slower computation, default 8096
+        make_orthogonal (bool): make eigenvectors orthogonal after propagation, default True
+        verbose (bool): show progress bar when propagating eigenvectors from subgraph to full graph
+        no_propagation (bool): if True, skip the eigenvector propagation step, only return the subgraph eigenvectors
+    Returns:
+        (torch.Tensor): eigenvectors, shape (n_samples, num_eig)
+        (torch.Tensor): eigenvalues, sorted in descending order, shape (num_eig,)
+        (torch.Tensor): sampled_indices used by Nystrom-like approximation subgraph, shape (num_sample,)
+    """
+    # check if features dimension greater than num_eig
+    if eig_solver in ["svd_lowrank", "lobpcg"]:
+        assert features.shape[0] > (
+            num_eig * 2
+        ), "number of nodes should be greater than 2*num_eig"
+    if eig_solver in ["svd", "eigh"]:
+        assert (
+            features.shape[0] > num_eig
+        ), "number of nodes should be greater than num_eig"
+    features = features.clone()
+    if normalize_features:
+        # features need to be normalized for affinity matrix computation (cosine distance)
+        features = torch.nn.functional.normalize(features, dim=-1)
+    sampled_indices = run_subgraph_sampling(
+        features,
+        num_sample=num_sample,
+        sample_method=sample_method,
+    )
+    sampled_indices_B = run_subgraph_sampling(
+        features_B,
+        num_sample=num_sample,
+        sample_method=sample_method,
+    )
+    sampled_features = features[sampled_indices]
+    sampled_features_B = features_B[sampled_indices_B]
+    # move subgraph gpu to speed up
+    original_device = sampled_features.device
+    device = original_device if device is None else device
+    sampled_features = sampled_features.to(device)
+    sampled_features_B = sampled_features_B.to(device)
+    # compute affinity matrix on subgraph
+    A = affinity_from_features(
+        sampled_features, features_B=sampled_features_B,
+        affinity_focal_gamma=affinity_focal_gamma, distance=distance,
+        n_features=n_features,
+    )
+    not_sampled = torch.tensor(
+        list(set(range(features.shape[0])) - set(sampled_indices))
+    )
+    if len(not_sampled) == 0:
+        # if sampled all nodes, no need for nyström approximation
+        eigen_vector, eigen_value = ncut(A, num_eig, eig_solver=eig_solver)
+        return eigen_vector, eigen_value, sampled_indices
+    # 1) PCA to reduce the node dimension for the not sampled nodes
+    # 2) compute indirect connection on the PC nodes
+    if len(not_sampled) > 0 and indirect_connection:
+        raise NotImplementedError("indirect_connection is not implemented yet")
+        indirect_pca_dim = min(indirect_pca_dim, min(*features.shape))
+        U, S, V = torch.pca_lowrank(features[not_sampled].T, q=indirect_pca_dim)
+        feature_B = (features[not_sampled].T @ V).T  # project to PCA space
+        feature_B = feature_B.to(device)
+        B = affinity_from_features(
+            sampled_features,
+            feature_B,
+            affinity_focal_gamma=affinity_focal_gamma,
+            distance=distance,
+            fill_diagonal=False,
+        )
+        # P is 1-hop random walk matrix
+        B_row = B / B.sum(axis=1, keepdim=True)
+        B_col = B / B.sum(axis=0, keepdim=True)
+        P = B_row @ B_col.T
+        P = (P + P.T) / 2
+        # fill diagonal with 0
+        P[torch.arange(P.shape[0]), torch.arange(P.shape[0])] = 0
+        A = A + P
+    # compute normalized cut on the subgraph
+    eigen_vector, eigen_value = ncut(A, num_eig, eig_solver=eig_solver, make_symmetric=make_symmetric)
+    eigen_vector = eigen_vector.to(dtype=features.dtype, device=original_device)
+    eigen_value = eigen_value.to(dtype=features.dtype, device=original_device)
+    if no_propagation:
+        return eigen_vector, eigen_value, sampled_indices
+    # propagate eigenvectors from subgraph to full graph
+    eigen_vector = propagate_knn(
+        eigen_vector,
+        features,
+        sampled_features,
+        knn,
+        chunk_size=matmul_chunk_size,
+        device=device,
+        use_tqdm=verbose,
+    )
+    # post-hoc orthogonalization
+    if make_orthogonal:
+        eigen_vector = gram_schmidt(eigen_vector)
+    return eigen_vector, eigen_value, sampled_indices

requirements.txt CHANGED Viewed

@@ -20,4 +20,4 @@ lisa @ git+https://github.com/huzeyann/LISA.git@7211e99
 timm==0.9.2
 open-clip-torch==2.20.0
 pytorch_lightning==1.9.4
-ncut-pytorch>=1.3.15

 timm==0.9.2
 open-clip-torch==2.20.0
 pytorch_lightning==1.9.4
+ncut-pytorch>=1.4.1