File size: 4,009 Bytes
c704bfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68f1328
c704bfb
 
 
 
 
 
 
 
 
 
 
6574282
 
c704bfb
 
 
 
 
 
 
 
 
8024bf2
 
c704bfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8024bf2
c704bfb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import streamlit as st
import numpy as np
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt

# Function to build the model
def build_model(my_learning_rate):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(units=1, input_shape=(1,)))
    model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=my_learning_rate),
                  loss='mean_squared_error',
                  metrics=[tf.keras.metrics.RootMeanSquaredError()])
    return model

# Function to train the model
def train_model(model, df, feature, label, epochs, batch_size):
    history = model.fit(x=df[feature], y=df[label], batch_size=batch_size, epochs=epochs)
    trained_weight = model.get_weights()[0][0]
    trained_bias = model.get_weights()[1]
    epochs = history.epoch
    hist = pd.DataFrame(history.history)
    rmse = hist["root_mean_squared_error"]
    return trained_weight, trained_bias, epochs, rmse

# Function to plot the model
def plot_the_model(trained_weight, trained_bias, feature, label, df):
    plt.figure(figsize=(10, 6))
    plt.xlabel(feature)
    plt.ylabel(label)

    random_examples = df.sample(n=200)
    plt.scatter(random_examples[feature], random_examples[label])

    x0 = 0
    y0 = trained_bias
    x1 = random_examples[feature].max()
    y1 = trained_bias + (trained_weight * x1)
    plt.plot([x0, x1], [y0, y1], c='r')

    st.pyplot(plt)

# Function to plot the loss curve
def plot_the_loss_curve(epochs, rmse):
    plt.figure(figsize=(10, 6))
    plt.xlabel("Epoch")
    plt.ylabel("Root Mean Squared Error")

    plt.plot(epochs, rmse, label="Loss")
    plt.legend()
    plt.ylim([rmse.min()*0.97, rmse.max()])
    st.pyplot(plt)

# Load the dataset
@st.cache_data
def load_data():
    url = "https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv"
    df = pd.read_csv(url)
    df["median_house_value"] /= 1000.0
    return df

training_df = load_data()

# Streamlit interface
st.title("Simple Linear Regression with Real Data")

st.write("https://colab.research.google.com/github/google/eng-edu/blob/main/ml/cc/exercises/linear_regression_with_a_real_dataset.ipynb?utm_source=mlcc&utm_campaign=colab-external&utm_medium=referral&utm_content=linear_regression_real_tf2-colab&hl=en")

if st.checkbox('Show raw data'):
    st.write(training_df.head())

learning_rate = st.sidebar.slider('Learning Rate', min_value=0.001, max_value=1.0, value=0.01, step=0.01)
epochs = st.sidebar.slider('Epochs', min_value=1, max_value=1000, value=30, step=1)
batch_size = st.sidebar.slider('Batch Size', min_value=1, max_value=len(training_df), value=30, step=1)
feature = st.sidebar.selectbox('Select Feature', training_df.columns)
label = 'median_house_value'

my_model = None  # Initialize the model variable

if st.sidebar.button('Run'):
    my_model = build_model(learning_rate)
    weight, bias, epochs, rmse = train_model(my_model, training_df, feature, label, epochs, batch_size)

    st.subheader('Model Plot')
    plot_the_model(weight, bias, feature, label, training_df)

    st.subheader('Loss Curve')
    plot_the_loss_curve(epochs, rmse)

# Function to make predictions
def predict_house_values(n, feature, label):
    batch = training_df[feature][10000:10000 + n]
    predicted_values = my_model.predict_on_batch(x=batch)

    st.write("feature   label          predicted")
    st.write("  value   value          value")
    st.write("          in thousand$   in thousand$")
    st.write("--------------------------------------")
    for i in range(n):
        st.write("%5.0f %6.0f %15.0f" % (training_df[feature][10000 + i],
                                        training_df[label][10000 + i],
                                        predicted_values[i][0] ))

n_predictions = st.sidebar.slider('Number of Predictions', min_value=1, max_value=100, value=10)
if my_model is not None and st.sidebar.button('Predict'):
    st.subheader('Predictions')
    predict_house_values(n_predictions, feature, label)