Spaces:

sklearn-docs
/

sklearn-decision-tree-regression

Runtime error

File size: 3,647 Bytes

471733b

import gradio as gr
import numpy as np
import plotly.graph_objects as go
from sklearn.tree import DecisionTreeRegressor

rng = np.random.default_rng(0)

X = np.sort(5 * rng.random((80, 1)), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.random(16))

md_description = """
# A 1D regression with decision tree.

The [decision trees](https://scikit-learn.org/stable/modules/tree.html#tree) is used to fit a sine curve with addition noisy observation. As a result, it learns local linear regressions approximating the sine curve.

We can see that if the maximum depth of the tree (controlled by the max_depth parameter) is set too high, the decision trees learn too fine details of the training data and learn from the noise, i.e. they overfit.
"""


def make_regression(model_1_depth, model_2_depth):
    regr_1 = DecisionTreeRegressor(max_depth=model_1_depth, random_state=0)
    regr_2 = DecisionTreeRegressor(max_depth=model_2_depth, random_state=0)
    regr_1.fit(X, y)
    regr_2.fit(X, y)

    # Predict
    X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
    y_1 = regr_1.predict(X_test)
    y_2 = regr_2.predict(X_test)

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X[:, 0], y=y, mode="markers", name="data"))
    fig.add_trace(
        go.Scatter(x=X_test[:, 0], y=y_1, mode="lines", name=f"max_depth={model_1_depth}")
    )
    fig.add_trace(
        go.Scatter(x=X_test[:, 0], y=y_2, mode="lines", name=f"max_depth={model_2_depth}")
    )

    fig.update_layout(title="Decision Tree Regression")
    fig.update_xaxes(title_text="data")
    fig.update_yaxes(title_text="target")

    return fig, make_example(model_1_depth, model_2_depth)


def make_example(model_1_depth, model_2_depth):
    return f"""
    With the following code you can reproduce this example with the current values of the sliders and the same data in a notebook:

    ```python
    import numpy as np
    import plotly.graph_objects as go
    from sklearn.tree import DecisionTreeRegressor

    rng = np.random.default_rng(0)

    X = np.sort(5 * rng.random((80, 1)), axis=0)
    y = np.sin(X).ravel()
    y[::5] += 3 * (0.5 - rng.random(16))

    regr_1 = DecisionTreeRegressor(max_depth={model_1_depth}, random_state=0)
    regr_2 = DecisionTreeRegressor(max_depth={model_2_depth}, random_state=0)
    regr_1.fit(X, y)
    regr_2.fit(X, y)

    # Predict
    X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
    y_1 = regr_1.predict(X_test)
    y_2 = regr_2.predict(X_test)


    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X[:,0], y=y, mode='markers', name='data'))
    fig.add_trace(go.Scatter(x=X_test[:,0], y=y_1, mode='lines', name=f"Model 1 max_depth={model_1_depth}"))
    fig.add_trace(go.Scatter(x=X_test[:,0], y=y_2, mode='lines', name=f"Model 2 max_depth={model_2_depth}"))

    fig.update_layout(title='Decision Tree Regression')
    fig.update_xaxes(title_text='data')
    fig.update_yaxes(title_text='target')
    fig.show()
    ```
    """


with gr.Blocks() as demo:
    with gr.Row():
        gr.Markdown(md_description)
    with gr.Row():
        with gr.Column():
            slider_1 = gr.Slider(minimum=1, maximum=10, label="Model 1 max_depth", step=1)
            slider_2 = gr.Slider(minimum=1, maximum=10, label="Model 2 max_depth", step=1)
        with gr.Column():
            plot = gr.Plot(label="Output")

    with gr.Row():
        example = gr.Markdown(make_example(slider_1.value, slider_2.value))
        slider_1.change(fn=make_regression, inputs=[slider_1, slider_2], outputs=[plot, example])
        slider_2.change(fn=make_regression, inputs=[slider_1, slider_2], outputs=[plot, example])

demo.launch()