# Scikit learn example https://scikit-learn.org/stable/auto_examples/cluster/plot_optics.html import gradio as gr from sklearn.cluster import OPTICS, cluster_optics_dbscan import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt import numpy as np plt.switch_backend("agg") # Theme from - https://huggingface.co/spaces/trl-lib/stack-llama/blob/main/app.py theme = gr.themes.Monochrome( primary_hue="indigo", secondary_hue="blue", neutral_hue="slate", radius_size=gr.themes.sizes.radius_sm, font=[ gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif", ], ) def do_submit(n_points_per_cluster, min_samples, xi, min_cluster_size): # # Generate sample data np.random.seed(0) n_points_per_cluster = int(n_points_per_cluster) C1 = [-5, -2] + 0.8 * np.random.randn(n_points_per_cluster, 2) C2 = [4, -1] + 0.1 * np.random.randn(n_points_per_cluster, 2) C3 = [1, -2] + 0.2 * np.random.randn(n_points_per_cluster, 2) C4 = [-2, 3] + 0.3 * np.random.randn(n_points_per_cluster, 2) C5 = [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2) C6 = [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2) X = np.vstack((C1, C2, C3, C4, C5, C6)) clust = OPTICS( min_samples=int(min_samples), xi=float(xi), min_cluster_size=float(min_cluster_size), ) # Run the fit clust.fit(X) labels_050 = cluster_optics_dbscan( reachability=clust.reachability_, core_distances=clust.core_distances_, ordering=clust.ordering_, eps=0.5, ) labels_200 = cluster_optics_dbscan( reachability=clust.reachability_, core_distances=clust.core_distances_, ordering=clust.ordering_, eps=2, ) space = np.arange(len(X)) reachability = clust.reachability_[clust.ordering_] labels = clust.labels_[clust.ordering_] plt.figure(figsize=(10, 6)) G = gridspec.GridSpec(2, 3) ax1 = plt.subplot(G[0, :]) ax2 = plt.subplot(G[1, 0]) ax3 = plt.subplot(G[1, 1]) ax4 = plt.subplot(G[1, 2]) # Reachability plot colors = ["g.", "r.", "b.", "y.", "c."] for klass, color in zip(range(0, 5), colors): Xk = space[labels == klass] Rk = reachability[labels == klass] ax1.plot(Xk, Rk, color, alpha=0.3) ax1.plot(space[labels == -1], reachability[labels == -1], "k.", alpha=0.3) ax1.plot(space, np.full_like(space, 2.0, dtype=float), "k-", alpha=0.5) ax1.plot(space, np.full_like(space, 0.5, dtype=float), "k-.", alpha=0.5) ax1.set_ylabel("Reachability (epsilon distance)") ax1.set_title("Reachability Plot") # OPTICS colors = ["g.", "r.", "b.", "y.", "c."] for klass, color in zip(range(0, 5), colors): Xk = X[clust.labels_ == klass] ax2.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3) ax2.plot(X[clust.labels_ == -1, 0], X[clust.labels_ == -1, 1], "k+", alpha=0.1) ax2.set_title("Automatic Clustering\nOPTICS") # DBSCAN at 0.5 colors = ["g.", "r.", "b.", "c."] for klass, color in zip(range(0, 4), colors): Xk = X[labels_050 == klass] ax3.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3) ax3.plot(X[labels_050 == -1, 0], X[labels_050 == -1, 1], "k+", alpha=0.1) ax3.set_title("Clustering at 0.5 epsilon cut\nDBSCAN") # DBSCAN at 2. colors = ["g.", "m.", "y.", "c."] for klass, color in zip(range(0, 4), colors): Xk = X[labels_200 == klass] ax4.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3) ax4.plot(X[labels_200 == -1, 0], X[labels_200 == -1, 1], "k+", alpha=0.1) ax4.set_title("Clustering at 2.0 epsilon cut\nDBSCAN") plt.tight_layout() return plt title = "Demo of OPTICS clustering algorithm" with gr.Blocks(title=title, theme=theme) as demo: gr.Markdown(f"## {title}") gr.Markdown( "[Scikit-learn Example](https://scikit-learn.org/stable/auto_examples/cluster/plot_optics.html)" ) gr.Markdown( "Finds core samples of high density and expands clusters from them. This example uses data that is \ generated so that the clusters have different densities. The [OPTICS](https://scikit-learn.org/stable/modules/generated/sklearn.cluster.OPTICS.html#sklearn.cluster.OPTICS) is first used with its Xi cluster detection \ method, and then setting specific thresholds on the reachability, which corresponds to [DBSCAN](https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html#sklearn.cluster.DBSCAN). We can see that \ the different clusters of OPTICS’s Xi method can be recovered with different choices of thresholds in DBSCAN." ) with gr.Row().style(equal_height=True): with gr.Column(scale=0.75): n_points_per_cluster = gr.Slider( minimum=200, maximum=500, label="Number of points per cluster", step=50, value=250, ) with gr.Row(visible=False): gr.Markdown("##") min_samples = gr.Slider( minimum=10, maximum=100, label="OPTICS - Minimum number of samples", step=5, value=50, info="The number of samples in a neighborhood for a point to be considered as a core point.", ) with gr.Row(visible=False): gr.Markdown("##") xi = gr.Slider( minimum=0, maximum=0.2, label="OPTICS - Xi", step=0.01, value=0.05, info="Determines the minimum steepness on the reachability plot that constitutes a cluster boundary. ", ) with gr.Row(visible=False): gr.Markdown("##") min_cluster_size = gr.Slider( minimum=0.01, maximum=0.1, label="OPTICS - Minimum cluster size", step=0.01, value=0.05, info="Minimum number of samples in an OPTICS cluster, expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2).", ) plt_out = gr.Plot() n_points_per_cluster.change( do_submit, inputs=[n_points_per_cluster, min_samples, xi, min_cluster_size], outputs=plt_out, ) min_samples.change( do_submit, inputs=[n_points_per_cluster, min_samples, xi, min_cluster_size], outputs=plt_out, ) xi.change( do_submit, inputs=[n_points_per_cluster, min_samples, xi, min_cluster_size], outputs=plt_out, ) min_cluster_size.change( do_submit, inputs=[n_points_per_cluster, min_samples, xi, min_cluster_size], outputs=plt_out, ) if __name__ == "__main__": demo.launch()