Spaces:
Runtime error
Runtime error
# pip install gradio | |
# pip install pandas | |
# pip install scikit-learn | |
# pip install matplotlib | |
import warnings | |
warnings.filterwarnings('ignore') | |
from sklearn.mixture import GaussianMixture | |
from sklearn.metrics import silhouette_score, silhouette_samples | |
from sklearn.preprocessing import StandardScaler | |
import matplotlib.pyplot as plt | |
import pathlib | |
import pandas as pd | |
import gradio as gr | |
import numpy as np | |
plt.switch_backend('agg') | |
def gmm_cluster(input_file, first_col, second_col, n_clusters): | |
data = pd.read_csv(pathlib.Path(input_file.name)) | |
features = [first_col, second_col] | |
X = data[features] | |
fig1 = plt.figure() | |
plt.scatter(X[first_col],X[second_col]) | |
z = StandardScaler() | |
X[features] = z.fit_transform(X) | |
fig2 = plt.figure() | |
plt.scatter(X[first_col],X[second_col]) | |
gm = GaussianMixture(n_components=int(n_clusters)) | |
EM = gm.fit(X) | |
cluster = gm.predict(X) | |
labels = gm.predict(X[features]) | |
frame = pd.DataFrame(X) | |
frame['cluster'] = labels | |
frame.columns = [first_col, second_col, 'cluster'] | |
fig3 = plt.figure() | |
score = silhouette_score(X, cluster) | |
for k in range(0,int(n_clusters)): | |
color = plt.cm.nipy_spectral(float(k) / n_clusters) | |
data = frame[frame["cluster"]==k] | |
plt.scatter(data[first_col],data[second_col],color=color) | |
fig4 = plt.figure(figsize = (6,6)) | |
plt.gca().set_xlim([-0.1,1]) | |
plt.gca().set_ylim([0, len(X) + (n_clusters + 1) * 10]) | |
# gm = GaussianMixture(n_components=n_clusters) | |
# labels = gm.fit_predict(X) | |
y_lower = 10 | |
sample_silhouette_values = silhouette_samples(X, labels) | |
for i in range(0,int(n_clusters)): | |
ith_cluster_silhouette_values = \ | |
sample_silhouette_values[labels == i] | |
ith_cluster_silhouette_values.sort() | |
size_cluster_i = ith_cluster_silhouette_values.shape[0] | |
y_upper = y_lower + size_cluster_i | |
color = plt.cm.nipy_spectral(float(i) / n_clusters) | |
plt.gca().fill_betweenx(np.arange(y_lower, y_upper), | |
0, ith_cluster_silhouette_values, | |
facecolor=color, edgecolor=color, alpha=0.7) | |
plt.gca().text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) | |
y_lower = y_upper + 10 | |
plt.gca().axvline(x=silhouette_score(X, labels), color="red", linestyle="--", label = "Avg silhouette score") | |
plt.title("The silhouette plot") | |
plt.xlabel("The silhouette coefficient values") | |
plt.ylabel("Cluster label") | |
plt.legend() | |
return fig1, fig3, score, fig4 | |
def show_dataset(input_file): | |
new_data = pd.read_csv(pathlib.Path(input_file.name)) | |
data_set = new_data.head(9) | |
return data_set | |
title = 'Gaussian Mixture Model' | |
with gr.Blocks(title=title) as demo: | |
gr.HTML(f"{title}") | |
with gr.Row(): | |
with gr.Column(): | |
inp_file = gr.File(label="Input") | |
inp_col_1 = gr.Text(label='Feature 1') | |
inp_col_2 = gr.Text(label='Feature 2') | |
inp_num_clusters = gr.Slider( | |
minimum=2, | |
maximum=7, | |
value=0, | |
step=1, | |
label='Number of clusters' | |
) | |
out3 = gr.Textbox(label='Silhouette score') | |
output_file = gr.Dataframe(label='Dataset') | |
with gr.Row(): | |
out1 = gr.Plot() | |
out2 = gr.Plot() | |
out4 = gr.Plot() | |
inp_file.change(fn=show_dataset, inputs=[inp_file], outputs=[output_file]) | |
inp_num_clusters.change(fn=gmm_cluster, inputs=[inp_file, inp_col_1, inp_col_2, inp_num_clusters], outputs=[out1, out2, out3, out4]) | |
demo.launch() | |