Spaces:
Runtime error
Runtime error
from sklearn.pipeline import make_pipeline | |
from sklearn.preprocessing import PolynomialFeatures, StandardScaler | |
import numpy as np | |
from sklearn.datasets import make_regression | |
import pandas as pd | |
from sklearn.linear_model import ARDRegression, LinearRegression, BayesianRidge | |
import matplotlib.pyplot as plt | |
from matplotlib.colors import SymLogNorm | |
import gradio as gr | |
import seaborn as sns | |
X, y, true_weights = make_regression( | |
n_samples=100, | |
n_features=100, | |
n_informative=10, | |
noise=8, | |
coef=True, | |
random_state=42, | |
) | |
# Fit the regressors | |
# ------------------ | |
# | |
# We now fit both Bayesian models and the OLS to later compare the models' | |
# coefficients. | |
def fit_regression_models(n_iter=30, X=X, y=y, true_weights=true_weights): | |
olr = LinearRegression().fit(X, y) | |
print(f"inside fit_regression n_iter={n_iter}") | |
brr = BayesianRidge(compute_score=True, n_iter=n_iter).fit(X, y) | |
ard = ARDRegression(compute_score=True, n_iter=n_iter).fit(X, y) | |
df = pd.DataFrame( | |
{ | |
"Weights of true generative process": true_weights, | |
"ARDRegression": ard.coef_, | |
"BayesianRidge": brr.coef_, | |
"LinearRegression": olr.coef_, | |
} | |
) | |
return df, olr, brr, ard | |
# %% | |
# Plot the true and estimated coefficients | |
# ---------------------------------------- | |
# | |
# Now we compare the coefficients of each model with the weights of | |
# the true generative model. | |
def visualize_coefficients(df=None): | |
fig = plt.figure(figsize=(10, 6)) | |
ax = sns.heatmap( | |
df.T, | |
norm=SymLogNorm(linthresh=10e-4, vmin=-80, vmax=80), | |
cbar_kws={"label": "coefficients' values"}, | |
cmap="seismic_r", | |
) | |
plt.ylabel("linear model") | |
plt.xlabel("coefficients") | |
plt.tight_layout(rect=(0, 0, 1, 0.95)) | |
_ = plt.title("Models' coefficients") | |
return fig | |
# %% | |
# Due to the added noise, none of the models recover the true weights. Indeed, | |
# all models always have more than 10 non-zero coefficients. Compared to the OLS | |
# estimator, the coefficients using a Bayesian Ridge regression are slightly | |
# shifted toward zero, which stabilises them. The ARD regression provides a | |
# sparser solution: some of the non-informative coefficients are set exactly to | |
# zero, while shifting others closer to zero. Some non-informative coefficients | |
# are still present and retain large values. | |
# %% | |
# Plot the marginal log-likelihood | |
# -------------------------------- | |
def plot_marginal_log_likelihood(ard=None, brr=None, n_iter=30): | |
fig = plt.figure(figsize=(10, 6)) | |
ard_scores = -np.array(ard.scores_) | |
brr_scores = -np.array(brr.scores_) | |
# print(f"ard_scores = {ard_scores}") | |
# print(f"brr_scores = {brr_scores}") | |
plt.plot(ard_scores, color="navy", label="ARD") | |
plt.plot(brr_scores, color="red", label="BayesianRidge") | |
plt.ylabel("Log-likelihood") | |
plt.xlabel("Iterations") | |
plt.xlim(1, n_iter) | |
plt.legend() | |
_ = plt.title("Models log-likelihood") | |
print("fig inside plot marginal = ", fig) | |
return fig | |
def make_regression_comparison_plot(n_iter=30): | |
# print(f"n_iter = {n_iter}") | |
# fit models | |
df, olr, brr, ard = fit_regression_models(n_iter=n_iter, X=X, y=y, true_weights=true_weights) | |
# print(f"df = {df}") | |
# get figure | |
fig = visualize_coefficients(df=df) | |
return fig | |
def make_log_likelihood_plot(n_iter=30): | |
# print(f"n_iter = {n_iter}") | |
# fit models | |
df, olr, brr, ard = fit_regression_models(n_iter=n_iter, X=X, y=y, true_weights=true_weights) | |
# print(f"df = {df}") | |
# get figure | |
fig = plot_marginal_log_likelihood(ard=ard, brr=brr, n_iter=n_iter) | |
print(f"fig = {fig}") | |
return fig | |
# visualize coefficients | |
# # %% | |
# # Indeed, both models minimize the log-likelihood up to an arbitrary cutoff | |
# # defined by the `n_iter` parameter. | |
# # | |
# # Bayesian regressions with polynomial feature expansion | |
# # ====================================================== | |
# Generate synthetic dataset | |
# -------------------------- | |
# We create a target that is a non-linear function of the input feature. | |
# Noise following a standard uniform distribution is added. | |
rng = np.random.RandomState(0) | |
n_samples = 110 | |
# sort the data to make plotting easier later | |
g_X = np.sort(-10 * rng.rand(n_samples) + 10) | |
noise = rng.normal(0, 1, n_samples) * 1.35 | |
g_y = np.sqrt(g_X) * np.sin(g_X) + noise | |
full_data = pd.DataFrame({"input_feature": g_X, "target": g_y}) | |
g_X = g_X.reshape((-1, 1)) | |
# extrapolation | |
X_plot = np.linspace(10, 10.4, 10) | |
y_plot = np.sqrt(X_plot) * np.sin(X_plot) | |
X_plot = np.concatenate((g_X, X_plot.reshape((-1, 1)))) | |
y_plot = np.concatenate((g_y - noise, y_plot)) | |
# %% | |
# Fit the regressors | |
# ------------------ | |
# | |
# Here we try a degree 10 polynomial to potentially overfit, though the bayesian | |
# linear models regularize the size of the polynomial coefficients. As | |
# `fit_intercept=True` by default for | |
# :class:`~sklearn.linear_model.ARDRegression` and | |
# :class:`~sklearn.linear_model.BayesianRidge`, then | |
# :class:`~sklearn.preprocessing.PolynomialFeatures` should not introduce an | |
# additional bias feature. By setting `return_std=True`, the bayesian regressors | |
# return the standard deviation of the posterior distribution for the model | |
# parameters. | |
#TODO - make this function that can be adapted with the gr.slider | |
def generate_polynomial_dataset(degree = 10): | |
ard_poly = make_pipeline( | |
PolynomialFeatures(degree=degree, include_bias=False), | |
StandardScaler(), | |
ARDRegression(), | |
).fit(g_X, g_y) | |
brr_poly = make_pipeline( | |
PolynomialFeatures(degree=degree, include_bias=False), | |
StandardScaler(), | |
BayesianRidge(), | |
).fit(g_X, g_y) | |
y_ard, y_ard_std = ard_poly.predict(X_plot, return_std=True) | |
y_brr, y_brr_std = brr_poly.predict(X_plot, return_std=True) | |
return y_ard, y_ard_std, y_brr, y_brr_std | |
# %% | |
# Plotting polynomial regressions with std errors of the scores | |
# ------------------------------------------------------------- | |
def visualize_bayes_regressions_polynomial_features(degree = 10): | |
#TODO - get data dynamically from the gr.slider | |
y_ard, y_ard_std, y_brr, y_brr_std = generate_polynomial_dataset(degree) | |
fig = plt.figure(figsize=(10, 6)) | |
ax = sns.scatterplot( | |
data=full_data, x="input_feature", y="target", color="black", alpha=0.75) | |
ax.plot(X_plot, y_plot, color="black", label="Ground Truth") | |
ax.plot(X_plot, y_brr, color="red", label="BayesianRidge with polynomial features") | |
ax.plot(X_plot, y_ard, color="navy", label="ARD with polynomial features") | |
ax.fill_between( | |
X_plot.ravel(), | |
y_ard - y_ard_std, | |
y_ard + y_ard_std, | |
color="navy", | |
alpha=0.3, | |
) | |
ax.fill_between( | |
X_plot.ravel(), | |
y_brr - y_brr_std, | |
y_brr + y_brr_std, | |
color="red", | |
alpha=0.3, | |
) | |
ax.legend() | |
_ = ax.set_title("Polynomial fit of a non-linear feature") | |
# print(f"ax = {ax}") | |
return fig | |
# def make_polynomial_comparison_plot(): | |
# return fig | |
title = " Illustration of Comparing Linear Bayesian Regressors with synthetic data" | |
with gr.Blocks(title=title) as demo: | |
gr.Markdown(f"# {title}") | |
gr.Markdown(""" This example shows a comparison of two different bayesian regressors: | |
Automatic Relevance Determination - ARD see [sklearn-docs](https://scikit-learn.org/stable/modules/linear_model.html#automatic-relevance-determination) | |
Bayesian Ridge Regression - see [sklearn-docs](https://scikit-learn.org/stable/modules/linear_model.html#bayesian-ridge-regression) | |
The tutorial is split into sections, with the first comparing model coeffecients produced by Ordinary Least Squares (OLS), Bayesian Ridge Regression, and ARD with the known true coefficients. For this | |
We generated a dataset where X and y are linearly linked: 10 of the features of X will be used to generate y. The other features are not useful at predicting y. | |
n addition, we generate a dataset where n_samples == n_features. Such a setting is challenging for an OLS model and leads potentially to arbitrary large weights. | |
Having a prior on the weights and a penalty alleviates the problem. Finally, gaussian noise is added. | |
For the final tab, we investigate bayesian regressors with polynomial features and generate an additional dataset where the target is a non-linear function of the input feature, with | |
added noise following a standard uniform distribution. | |
For further details please see the sklearn docs: | |
""") | |
gr.Markdown(" **[Demo is based on sklearn docs found here](https://scikit-learn.org/stable/auto_examples/linear_model/plot_ard.html#sphx-glr-auto-examples-linear-model-plot-ard-py)** <br>") | |
with gr.Tab("# Plot true and estimated coefficients"): | |
with gr.Row(): | |
n_iter = gr.Slider(value=5, minimum=5, maximum=50, step=1, label="n_iterations") | |
btn = gr.Button(value="Plot true and estimated coefficients") | |
btn.click(make_regression_comparison_plot, inputs = [n_iter], outputs= gr.Plot(label='Plot true and estimated coefficients') ) | |
gr.Markdown( | |
""" | |
# Details | |
One can observe that with the added noise, none of the models can perfectly recover the coefficients of the original model. All models have more thab 10 non-zero coefficients, | |
where only 10 are useful. The Bayesian Ridge Regression manages to recover most of the coefficients, while the ARD is more conservative. | |
""") | |
with gr.Tab("# Plot marginal log likelihoods"): | |
with gr.Row(): | |
n_iter = gr.Slider(value=5, minimum=5, maximum=50, step=1, label="n_iterations") | |
btn = gr.Button(value="Plot marginal log likelihoods") | |
btn.click(make_log_likelihood_plot, inputs = [n_iter], outputs= gr.Plot(label='Plot marginal log likelihoods') ) | |
gr.Markdown( | |
""" | |
# Confirm with marginal log likelihoods | |
Both ARD and Bayesian Ridge minimized the log-likelihood upto an arbitrary cuttoff defined the the n_iter parameter. | |
""" | |
) | |
with gr.Tab("# Plot bayesian regression with polynomial features"): | |
with gr.Row(): | |
degree = gr.Slider(value=5, minimum=5, maximum=50, step=1, label="n_degrees") | |
btn = gr.Button(value="Plot bayesian regression with polynomial features") | |
btn.click(visualize_bayes_regressions_polynomial_features, inputs = [degree], outputs= gr.Plot(label='Plot bayesian regression with polynomial features') ) | |
gr.Markdown( | |
""" | |
# Details | |
Here we try a degree 10 polynomial to potentially overfit, though the bayesian linear models regularize the size of the polynomial coefficients. | |
As fit_intercept=True by default for ARDRegression and BayesianRidge, then PolynomialFeatures should not introduce an additional bias feature. By setting return_std=True, | |
the bayesian regressors return the standard deviation of the posterior distribution for the model parameters. | |
""") | |
demo.launch() |