import gradio as gr import pandas as pd import plotly.express as px from sklearn.svm import LinearSVC from sklearn.pipeline import make_pipeline from sklearn.datasets import make_classification from sklearn.metrics import classification_report from sklearn.model_selection import train_test_split from sklearn.feature_selection import SelectKBest, f_classif def app_fn(k: int, n_features: int, n_informative: int, n_redundant: int): X, y = make_classification( n_features=n_features, n_informative=n_informative, n_redundant=n_redundant, n_classes=2, n_clusters_per_class=2, random_state=42, ) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) anova_filter = SelectKBest(f_classif, k=k) clf = LinearSVC() anova_svm = make_pipeline(anova_filter, clf) anova_svm.fit(X_train, y_train) y_pred = anova_svm.predict(X_test) report = classification_report(y_test, y_pred, output_dict=True) report_df = pd.DataFrame(report).transpose() report_df = report_df.reset_index().rename(columns={"index": "class"}).round(2) report_df["accuracy"] = report_df.loc[report_df["class"]=="accuracy"].values.flatten()[-1] report_df = report_df.loc[report_df["class"]!="accuracy"] features = anova_svm[:-1].inverse_transform(anova_svm[-1].coef_).flatten() > 0 features = features.astype(int) fig = px.bar(y=features) # Changing y-axis ticks to show 0 and 1 instead of False and True fig.update_yaxes(ticktext=["False", "True"], tickvals=[0, 1]) fig.update_layout( title="Selected Features", xaxis_title="Feature Index", yaxis_title="Selected", legend_title="Selected", ) return report_df, fig title = "Pipeline ANOVA SVM" with gr.Blocks() as demo: gr.Markdown(f"# {title}") gr.Markdown( """ ### This example creates a pipeline where in the first step k features are selected with ANOVA and then we pass the selected features \ to a Linear SVM. This pipeline is then trained using a synthetic dataset and evaluated on a test holdout. \ A table displaying the classification report with the metrics and a char showing the index of the selected features are shown at the bottom. See original example [here](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_feature_selection_pipeline.html#sphx-glr-auto-examples-feature-selection-plot-feature-selection-pipeline-py) """ ) with gr.Row(): k = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Number of Features to Select") n_features = gr.inputs.Slider(minimum=1, maximum=20, default=20, step=1, label="Total Features") n_informative = gr.inputs.Slider(minimum=1, maximum=20, default=3, step=1, label="Informative Features") n_redundant = gr.inputs.Slider(minimum=0, maximum=20, default=0, step=1, label="Redundant Features") btn = gr.Button(label="Run") with gr.Row(): report = gr.DataFrame(label="Classification Report") features = gr.Plot(label="Selected Features") btn.click( fn=app_fn, inputs=[k, n_features, n_informative, n_redundant], outputs=[report, features], ) demo.load( fn=app_fn, inputs=[k, n_features, n_informative, n_redundant], outputs=[report, features], ) demo.launch()