Spaces:

helboukkouri
/

regression-models

Build error

File size: 8,833 Bytes

0a25afe

import gradio as gr
import numpy as np
import sympy as sp
import seaborn as sns
from matplotlib import pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline


sns.set_style(style="darkgrid")
sns.set_context(context="notebook", font_scale=0.7)

MAX_NOISE = 20
DEFAULT_NOISE = 6
SLIDE_NOISE_STEP = 2

MAX_POINTS = 100
DEFAULT_POINTS = 20
SLIDE_POINTS_STEP = 5

def generate_equation(process_params):
    process_params = process_params.astype(float).values.tolist()

    # Define symbols
    x = sp.symbols('x')
    coefficients = sp.symbols('a b c d e')

    # Create the polynomial expression
    polynomial_expression = None
    for i, coef in enumerate(reversed(coefficients)):
        polynomial_expression = polynomial_expression + coef * x**i if polynomial_expression else coef * x**i

    # Parameter mapping
    parameters = {coef: value for coef, value in zip(coefficients, process_params[0])}

    # Substitute parameter values into the expression
    polynomial_with_values = polynomial_expression.subs(parameters)
    latex_representation = sp.latex(polynomial_with_values)
    return fr"$${latex_representation}$$"


def true_process(x, process_params):
    """The true process we want to model."""
    process_params = process_params.astype(float).values.tolist()
    return (
        process_params[0][0] * (x ** 4)
        + process_params[0][1] * (x ** 3)
        + process_params[0][2] * (x ** 2)
        + process_params[0][3] * x
        + process_params[0][4]
    )


def generate_data(num_points, noise_level, process_params):

    # x is the list of input values
    input_values = np.linspace(-5, 2, num_points)
    input_values_dense = np.linspace(-5, 2, MAX_POINTS)

    # y = f(x) is the underlying process we want to model
    y = [true_process(x, process_params) for x in input_values]
    y_dense = [true_process(x, process_params) for x in input_values_dense]

    # however, we can only observe a noisy version of f(x)
    noise = np.random.normal(0, noise_level, len(input_values))
    y_noisy = y + noise

    return input_values, input_values_dense, y, y_dense, y_noisy

    
def make_plot(
        num_points, noise_level, process_params,
        show_true_process, show_original_points,
        show_noisy_points, show_added_noise,
        show_learned_process, show_predicted_points,
        show_prediction_error,
        polynomial_degree=None
    ):

    x, x_dense, y, y_dense, y_noisy = generate_data(num_points, noise_level, process_params)

    fig = plt.figure(dpi=400)
    if show_true_process:
        plt.plot(
            x_dense, y_dense, "-", color="#363A4F",
            label="True Process",
            lw=1.5,
        )
    if show_added_noise:
        plt.vlines(
            x, y, y_noisy, color="#556D9A",
            linestyles="dashed",
            alpha=0.75,
            lw=1,
            label="Added Noise",
        )
    if show_original_points:
        plt.plot(
            x, y, "-o", color="none",
            ms=6,
            markerfacecolor="white",
            markeredgecolor="#556D9A",
            markeredgewidth=1.2,
            label="Original Points",
        )
    if show_noisy_points and not polynomial_degree:
        plt.plot(
            x, y_noisy, "-o", color="none",
            ms=6.5,
            markerfacecolor="#556D9A",
            markeredgecolor="none",
            markeredgewidth=1.5,
            alpha=1,
            label="Noisy Points",
        )

    # Fit the selected regression model
    if polynomial_degree:
        
        degree = polynomial_degree
        model = make_pipeline(PolynomialFeatures(degree), LinearRegression())
        model.fit(x.reshape(-1, 1), y_noisy)

        # Plot the fitted regression model
        y_pred_dense = model.predict(x_dense.reshape(-1, 1))
        y_pred = model.predict(x.reshape(-1, 1))
        if show_learned_process:
            plt.plot(
                x_dense, y_pred_dense, "-", color="#327747",
                label="Learned Process",
                lw=1.5,
                alpha=0.75,
            )
        if show_prediction_error:
            plt.vlines(
                x, y_pred, y_noisy, color="#43A461",
                linestyles="dashed",
                alpha=0.75,
                lw=1,
                label="Prediction Error",
            )
        if show_noisy_points:
            plt.plot(
                x, y_noisy, "-o", color="none",
                ms=6.5,
                markerfacecolor="#556D9A",
                markeredgecolor="none",
                markeredgewidth=1.5,
                alpha=1,
                label="Training Points",
            )
        if show_predicted_points:
            plt.plot(
                x, y_pred, "-o", color="none",
                ms=6.5,
                markerfacecolor="#43A461",
                markeredgecolor="none",
                markeredgewidth=1.5,
                label="Predicted Points",
                alpha=1,
            )

    plt.xlabel("x")
    plt.ylabel("y") 
    plt.legend(fontsize=7.5)
    plt.tight_layout()
    return fig

# Custom CSS
css = """
.train-button {
    font-size: 1.2em;
    width: 20%!important;
    margin: 0;
}
.model-section {
    font-size: 1em;
    width: 100%!important;
    margin: 0 0 1em 0;
}
.gradio-container {
    width: 40%!important;
    min-width: 800px;
}
"""
with gr.Blocks(css=css) as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown("## Underlying Process")
            with gr.Row():
                process_params = gr.DataFrame(
                    value=[[0.5, 2, -0.5, -2, 1]],
                    label="Polynomial Coefficients",
                    type="pandas",
                    column_widths=("2", "1", "1", "1", "1w"),
                    headers=["x ** 4", "x ** 3", "x ** 2", "x", "1"],
                    interactive=True
                )
            equation = gr.Markdown()

            gr.Markdown("## Data Generation")
            with gr.Row():
                num_points = gr.Slider(
                    minimum=5,
                    maximum=MAX_POINTS,
                    value=DEFAULT_POINTS,
                    step=SLIDE_POINTS_STEP,
                    label="Number of Points"
                )

                noise_level = gr.Slider(
                    minimum=0,
                    maximum=MAX_NOISE,
                    value=DEFAULT_NOISE,
                    step=SLIDE_NOISE_STEP,
                    label="Noise Level"
                    )

            show_params = []
            with gr.Row():
                with gr.Column():
                    show_params.append(gr.Checkbox(label="Underlying Process", value=True))
                    show_params.append(gr.Checkbox(label="Original Points", value=True))
                    show_params.append(gr.Checkbox(label="Noisy Points", value=True))
                    show_params.append(gr.Checkbox(label="Added Noise", value=True))
                with gr.Column():
                    show_params.append(gr.Checkbox(label="Learned Process", value=True))
                    show_params.append(gr.Checkbox(label="Predicted Points", value=True))
                    show_params.append(gr.Checkbox(label="Prediction Error", value=True))

            # Add model choice dropdown and training trigger button
            gr.Markdown("## Modelisation")
            with gr.Row(elem_classes=["model-section"]):
                polynomial_degree = gr.Number(label="Choose the degree of your regression model", value=1, minimum=1, maximum=15, step=1, scale=2)
                train_button = gr.Button(value="Train Model", elem_classes=["train-button"], scale=1)

            scatter_plot = gr.Plot(elem_classes=["main-plot"])

    num_points.change(fn=make_plot, inputs=[num_points, noise_level, process_params, *show_params], outputs=scatter_plot)
    noise_level.change(fn=make_plot, inputs=[num_points, noise_level, process_params, *show_params], outputs=scatter_plot)
    process_params.change(fn=make_plot, inputs=[num_points, noise_level, process_params, *show_params], outputs=scatter_plot)
    process_params.change(fn=generate_equation, inputs=[process_params], outputs=equation)
    train_button.click(make_plot, inputs=[num_points, noise_level, process_params, *show_params, polynomial_degree], outputs=scatter_plot)
    for component in show_params:
        component.change(fn=make_plot, inputs=[num_points, noise_level, process_params, *show_params], outputs=scatter_plot)
    demo.load(fn=make_plot, inputs=[num_points, noise_level, process_params, *show_params], outputs=scatter_plot)
    demo.load(fn=generate_equation, inputs=[process_params], outputs=equation)

if __name__ == "__main__":
    demo.launch()