from functools import partial import gradio as gr import matplotlib.pyplot as plt from matplotlib.ticker import NullFormatter import numpy as np from sklearn import datasets, manifold SEED = 0 N_COMPONENTS = 2 np.random.seed(SEED) def get_circles(n_samples): X, color = datasets.make_circles( n_samples=n_samples, factor=0.5, noise=0.05, random_state=SEED ) return X, color def get_s_curve(n_samples): X, color = datasets.make_s_curve(n_samples=n_samples, random_state=SEED) X[:, 1], X[:, 2] = X[:, 2], X[:, 1].copy() return X, color def get_uniform_grid(n_samples): x = np.linspace(0, 1, int(np.sqrt(n_samples))) xx, yy = np.meshgrid(x, x) X = np.hstack( [ xx.ravel().reshape(-1, 1), yy.ravel().reshape(-1, 1), ] ) color = xx.ravel() return X, color DATA_MAPPING = { 'Circles': get_circles, 'S-curve': get_s_curve, 'Uniform Grid': get_uniform_grid, } def plot_data(dataset: str, perplexity: int, n_samples: int, tsne: bool): if isinstance(perplexity, dict): perplexity = perplexity['value'] else: perplexity = int(perplexity) X, color = DATA_MAPPING[dataset](n_samples) if tsne: tsne = manifold.TSNE( n_components=N_COMPONENTS, init="random", random_state=0, perplexity=perplexity, n_iter=400, ) Y = tsne.fit_transform(X) else: Y = X fig, ax = plt.subplots(figsize=(7, 7)) ax.scatter(Y[:, 0], Y[:, 1], c=color) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) ax.axis("tight") return fig title = "t-SNE: The effect of various perplexity values on the shape" description = """ t-Stochastic Neighborhood Embedding ([t-SNE](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html)) is a powerful technique dimensionality reduction and visualization of high dimensional datasets. One of the key parameters in t-SNE is perplexity, which controls the number of nearest neighbors used to represent each data point in the low-dimensional space. In this illustration, we explore the impact of various perplexity values on t-SNE visualizations using three commonly used datasets: Concentric Circles, S-curve and Uniform Grid. By comparing the resulting visualizations, we demonstrate how changing the perplexity value affects the shape of the visualization. Created by [@Hnabil](https://huggingface.co/Hnabil) based on [scikit-learn docs](https://scikit-learn.org/stable/auto_examples/manifold/plot_t_sne_perplexity.html) """ with gr.Blocks(title=title) as demo: gr.HTML(f"{title}") gr.Markdown(description) input_data = gr.Radio( list(DATA_MAPPING), value="Circles", label="dataset" ) n_samples = gr.Slider( minimum=100, maximum=1000, value=150, step=25, label='Number of Samples' ) perplexity = gr.Slider( minimum=2, maximum=100, value=5, step=1, label='Perplexity' ) with gr.Row(): with gr.Column(): plot = gr.Plot(label="Original data") fn = partial(plot_data, tsne=False) input_data.change(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot) n_samples.change(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot) demo.load(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot) with gr.Column(): plot = gr.Plot(label="t-SNE") fn = partial(plot_data, tsne=True) input_data.change(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot) perplexity.change(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot) n_samples.change(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot) demo.load(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot) demo.launch()