Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import plotly.express as px | |
from sklearn.svm import LinearSVC | |
from sklearn.pipeline import make_pipeline | |
from sklearn.datasets import make_classification | |
from sklearn.metrics import classification_report | |
from sklearn.model_selection import train_test_split | |
from sklearn.feature_selection import SelectKBest, f_classif | |
def app_fn(k: int, n_features: int, n_informative: int, n_redundant: int): | |
X, y = make_classification( | |
n_features=n_features, | |
n_informative=n_informative, | |
n_redundant=n_redundant, | |
n_classes=2, | |
n_clusters_per_class=2, | |
random_state=42, | |
) | |
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) | |
anova_filter = SelectKBest(f_classif, k=k) | |
clf = LinearSVC() | |
anova_svm = make_pipeline(anova_filter, clf) | |
anova_svm.fit(X_train, y_train) | |
y_pred = anova_svm.predict(X_test) | |
report = classification_report(y_test, y_pred, output_dict=True) | |
report_df = pd.DataFrame(report).transpose() | |
report_df = report_df.reset_index().rename(columns={"index": "class"}).round(2) | |
report_df["accuracy"] = report_df.loc[report_df["class"]=="accuracy"].values.flatten()[-1] | |
report_df = report_df.loc[report_df["class"]!="accuracy"] | |
features = anova_svm[:-1].inverse_transform(anova_svm[-1].coef_).flatten() > 0 | |
features = features.astype(int) | |
fig = px.bar(y=features) | |
# Changing y-axis ticks to show 0 and 1 instead of False and True | |
fig.update_yaxes(ticktext=["False", "True"], tickvals=[0, 1]) | |
fig.update_layout( | |
title="Selected Features", | |
xaxis_title="Feature Index", | |
yaxis_title="Selected", | |
legend_title="Selected", | |
) | |
return report_df, fig | |
title = "Pipeline ANOVA SVM" | |
with gr.Blocks() as demo: | |
gr.Markdown(f"# {title}") | |
gr.Markdown( | |
""" | |
### This example creates a pipeline where in the first step k features are selected with ANOVA and then we pass the selected features \ | |
to a Linear SVM. This pipeline is then trained using a synthetic dataset and evaluated on a test holdout. \ | |
A table displaying the classification report with the metrics and a char showing the index of the selected features are shown at the bottom. | |
See original example [here](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_feature_selection_pipeline.html#sphx-glr-auto-examples-feature-selection-plot-feature-selection-pipeline-py) | |
""" | |
) | |
with gr.Row(): | |
k = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Number of Features to Select") | |
n_features = gr.inputs.Slider(minimum=1, maximum=20, default=20, step=1, label="Total Features") | |
n_informative = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Informative Features") | |
n_redundant = gr.inputs.Slider(minimum=0, maximum=20, default=0, step=1, label="Redundant Features") | |
btn = gr.Button(label="Run") | |
with gr.Row(): | |
report = gr.DataFrame(label="Classification Report") | |
features = gr.Plot(label="Selected Features") | |
btn.click( | |
fn=app_fn, | |
inputs=[k, n_features, n_informative, n_redundant], | |
outputs=[report, features], | |
) | |
demo.load( | |
fn=app_fn, | |
inputs=[k, n_features, n_informative, n_redundant], | |
outputs=[report, features], | |
) | |
demo.launch() | |