EduardoPacheco's picture
Update app.py
761c88a
raw
history blame contribute delete
No virus
3.43 kB
import gradio as gr
import pandas as pd
import plotly.express as px
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.datasets import make_classification
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_classif
def app_fn(k: int, n_features: int, n_informative: int, n_redundant: int):
X, y = make_classification(
n_features=n_features,
n_informative=n_informative,
n_redundant=n_redundant,
n_classes=2,
n_clusters_per_class=2,
random_state=42,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
anova_filter = SelectKBest(f_classif, k=k)
clf = LinearSVC()
anova_svm = make_pipeline(anova_filter, clf)
anova_svm.fit(X_train, y_train)
y_pred = anova_svm.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df = report_df.reset_index().rename(columns={"index": "class"}).round(2)
report_df["accuracy"] = report_df.loc[report_df["class"]=="accuracy"].values.flatten()[-1]
report_df = report_df.loc[report_df["class"]!="accuracy"]
features = anova_svm[:-1].inverse_transform(anova_svm[-1].coef_).flatten() > 0
features = features.astype(int)
fig = px.bar(y=features)
# Changing y-axis ticks to show 0 and 1 instead of False and True
fig.update_yaxes(ticktext=["False", "True"], tickvals=[0, 1])
fig.update_layout(
title="Selected Features",
xaxis_title="Feature Index",
yaxis_title="Selected",
legend_title="Selected",
)
return report_df, fig
title = "Pipeline ANOVA SVM"
with gr.Blocks() as demo:
gr.Markdown(f"# {title}")
gr.Markdown(
"""
### This example creates a pipeline where in the first step k features are selected with ANOVA and then we pass the selected features \
to a Linear SVM. This pipeline is then trained using a synthetic dataset and evaluated on a test holdout. \
A table displaying the classification report with the metrics and a char showing the index of the selected features are shown at the bottom.
See original example [here](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_feature_selection_pipeline.html#sphx-glr-auto-examples-feature-selection-plot-feature-selection-pipeline-py)
"""
)
with gr.Row():
k = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Number of Features to Select")
n_features = gr.inputs.Slider(minimum=1, maximum=20, default=20, step=1, label="Total Features")
n_informative = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Informative Features")
n_redundant = gr.inputs.Slider(minimum=0, maximum=20, default=0, step=1, label="Redundant Features")
btn = gr.Button(label="Run")
with gr.Row():
report = gr.DataFrame(label="Classification Report")
features = gr.Plot(label="Selected Features")
btn.click(
fn=app_fn,
inputs=[k, n_features, n_informative, n_redundant],
outputs=[report, features],
)
demo.load(
fn=app_fn,
inputs=[k, n_features, n_informative, n_redundant],
outputs=[report, features],
)
demo.launch()