Spaces:
Build error
Build error
| import gradio as gr | |
| import numpy as np | |
| from sklearn.datasets import load_diabetes | |
| from sklearn.linear_model import RidgeCV | |
| from sklearn.feature_selection import SelectFromModel | |
| from time import time | |
| from sklearn.feature_selection import SequentialFeatureSelector | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| def select_features(method,num_features): | |
| diabetes = load_diabetes() | |
| X, y = diabetes.data, diabetes.target | |
| ridge = RidgeCV(alphas=np.logspace(-6, 6, num=5)).fit(X, y) | |
| feature_names = np.array(diabetes.feature_names) | |
| if method == 'model': | |
| importance = np.abs(ridge.coef_) | |
| tic = time() | |
| sfm = SelectFromModel(ridge, threshold=-np.inf,max_features=num_features).fit(X, y) | |
| toc = time() | |
| selected_features = feature_names[sfm.get_support()] | |
| if int(num_features) < len(selected_features): | |
| selected_features = selected_features[:int(num_features)] | |
| execution_time = toc - tic | |
| fig, ax = plt.subplots() | |
| ax.bar(height=importance, x=feature_names) | |
| ax.set_title("Feature importances via coefficients") | |
| ax.set_ylabel("Importance coefficient") | |
| ax.set_xlabel("Features") | |
| elif method == 'sfs-forward': | |
| tic_fwd = time() | |
| sfs_forward = SequentialFeatureSelector( | |
| ridge, n_features_to_select=int(num_features), direction="forward" | |
| ).fit(X, y) | |
| toc_fwd = time() | |
| selected_features = feature_names[sfs_forward.get_support()] | |
| execution_time = toc_fwd - tic_fwd | |
| importance = np.abs(sfs_forward.get_params()['estimator'].coef_) | |
| fig = None | |
| elif method == 'sfs-backward': | |
| tic_bwd = time() | |
| sfs_backward = SequentialFeatureSelector( | |
| ridge, n_features_to_select=int(num_features), direction="backward" | |
| ).fit(X, y) | |
| toc_bwd = time() | |
| selected_features = feature_names[sfs_backward.get_support()] | |
| execution_time = toc_bwd - tic_bwd | |
| importance = np.abs(sfs_backward.get_params()['estimator'].coef_) | |
| fig = None | |
| return f"Selected the following features: {', '.join(selected_features)} in {execution_time:.3f} seconds", fig | |
| title = "Selecting features with Sequential Feature Selection" | |
| with gr.Blocks(title=title) as demo: | |
| gr.Markdown(f"## {title}") | |
| gr.Markdown(""" | |
| This app demonstrates feature selection techniques using model based selection and sequential feature selection.\n\n | |
| Model based selection is based on feature importance. Each feature is assigned a score on how much influence they have on the model output. | |
| The feature with highest score is considered the most important feature.\n\n | |
| Sequential feature selection is based on greedy approach. In greedy approach, the feature is added or removed to the selected features at each iteration | |
| based on the model performance score.\n\n | |
| This app uses Ridge estimator and the diabetes dataset from sklearn. Diabetes dataset consist of quantitative measure of diabetes progression and | |
| 10 following variables obtained from 442 diabetes patients: | |
| 1. Age (age) | |
| 2. Sex (sex) | |
| 3. Body mass index (bmi) | |
| 4. Average blood pressure (bp) | |
| 5. Total serum cholesterol (s1) | |
| 6. Low-density lipoproteins (s2) | |
| 7. High-density lipoproteins (s3) | |
| 8. Total cholesterol / HDL (s4) | |
| 9. Possibly log of serum triglycerides level (s5) | |
| 10. Blood sugar level (s6)\n\n | |
| This app is developed based on [scikit-learn example](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_select_from_model_diabetes.html#sphx-glr-auto-examples-feature-selection-plot-select-from-model-diabetes-py) | |
| """) | |
| method = gr.Radio(["model", "sfs-forward", "sfs-backward"], label="Method") | |
| num_features = gr.Slider(minimum=2, maximum=10, step=1, label = "Number of features") | |
| output = gr.Textbox(label="Selected features") | |
| plot = gr.Plot(label="Feature importance plot") | |
| num_features.change(fn=select_features, inputs=[method,num_features], outputs=[output,plot]) | |
| demo.launch() | |