File size: 14,039 Bytes
11d96dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
# -*- coding: utf-8 -*-
"""Lab2222.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1OUGOeTdmMbccW_st3Ao8nHDR5wm_VUNg
"""

from google.colab import drive

drive.mount("/content/ML_Course")

cd /content/ML_Course/MyDrive/ML_Course

import pandas as pd
housing = pd.read_csv("housing.csv")
housing.head(n = 5)

housing.columns

housing.describe()

housing.info()

# Commented out IPython magic to ensure Python compatibility.
# %matplotlib inline
import matplotlib.pyplot as plt
housing.hist(bins=50, figsize=(20,15))
plt.show()

# to make this notebook's output identical at every run
import numpy as np
np.random.seed(10)

# For illustration only. Sklearn has train_test_split()
def split_train_test(data, test_ratio):
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data) * test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

# run the function to get the train & test set
train_set, test_set = split_train_test(housing, 0.2)

train_set.info()

test_set.info()

from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(housing, test_size=0.2, random_state=10)

train_set.info()

test_set.info()

test_set.to_csv('blind_test.csv', index = False)

train_set.plot(kind="scatter", x="longitude", y="latitude", alpha=0.4, 
    s=train_set["population"]/100, label="population", figsize=(10,7),
    c="median_house_value", cmap=plt.get_cmap("jet"), colorbar=True,
    sharex=False)
plt.legend()
plt.show()

train_set.info()

train_set[train_set.isna().any(axis=1)]

train_set_clean = train_set.dropna(subset=["total_bedrooms"])
train_set_clean

train_set_clean.info()

train_labels = train_set_clean["median_house_value"].copy() # get labels for output label Y
train_features = train_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set
train_features.info()

train_features.head()

train_features.columns

train_features.info()

train_features.describe()

train_labels

train_features.hist(bins=50, figsize=(12,9))

train_features.describe()

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler() ## define the transformer
scaler.fit(train_features) ## call .fit() method to calculate the min and max value for each column in dataset

print("Min of each column: ",scaler.data_min_)
print("Max of each column: ",scaler.data_max_)

train_features.describe()

train_features_normalized = scaler.transform(train_features)
train_features_normalized

pd.DataFrame(train_features_normalized).hist(bins=50, figsize=(12,9))
plt.show()

## 1. split data to get train and test set
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(housing, test_size=0.2, random_state=10)

## 2. clean the missing values
train_set_clean = train_set.dropna(subset=["total_bedrooms"])
train_set_clean

## 2. derive training features and training labels 
train_labels = train_set_clean["median_house_value"].copy() # get labels for output label Y
train_features = train_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set


## 4. scale the numeric features in training set
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler() ## define the transformer
scaler.fit(train_features) ## call .fit() method to calculate the min and max value for each column in dataset

train_features_normalized = scaler.transform(train_features)
train_features_normalized

from sklearn.linear_model import LinearRegression ## import the LinearRegression Function
lin_reg = LinearRegression() ## Initialize the class
lin_reg.fit(train_features_normalized, train_labels) # feed the training data X, and label Y for supervised learning
# feed the training data X, and label Y for supervised learning

training_predictions = lin_reg.predict(train_features_normalized)
training_predictions.shape

train_labels

## plot scatter plot 
import matplotlib.pyplot as plt
plt.scatter(training_predictions, train_labels )
plt.xlabel('training_predictions', fontsize=15,color="red")
plt.ylabel('train_label', fontsize=15,color="green")
plt.title('Scatter plot for training_predictions and train_label', fontsize=15)
plt.xlim(0,np.max(training_predictions)) # remove the predictions that have negative prices
plt.show()

import numpy as np
np.corrcoef(training_predictions, train_labels)

import pandas as pd 
prediction_summary = pd.DataFrame({'predicted_label':training_predictions, 'actual_label':train_labels})
prediction_summary

prediction_summary['error'] = prediction_summary['actual_label'] - prediction_summary['predicted_label']
prediction_summary

from sklearn.metrics import mean_squared_error
lin_mse = mean_squared_error(train_labels, training_predictions)
lin_rmse = np.sqrt(lin_mse)
lin_rmse

## Step 1: training the data using decision tree algorithm
from sklearn.tree import DecisionTreeRegressor ## import the DecisionTree Function
tree_reg = DecisionTreeRegressor(random_state=10) ## Initialize the class
tree_reg.fit(train_features_normalized, train_labels) # feed the training data X, and label Y for supervised learning

### Step 2: make a prediction using tree model
training_predictions_trees = tree_reg.predict(train_features_normalized)
training_predictions_trees

## Step 3: visualize the scatter plot between predictions and actual labels
import matplotlib.pyplot as plt
plt.scatter(training_predictions_trees, train_labels )
plt.xlabel('training_predictions_trees', fontsize=15,color="red")
plt.ylabel('train_label', fontsize=15,color="green")
plt.title('Scatter plot for training_predictions_trees and train_label', fontsize=15)
plt.xlim(0,np.max(training_predictions_trees)) # remove the predictions that have negative prices
plt.show()

from sklearn.metrics import mean_squared_error
tree_mse = mean_squared_error(train_labels, training_predictions_trees)
tree_rmse = np.sqrt(tree_mse)
tree_rmse

## 1. clean the missing values in test set
test_set_clean = test_set.dropna(subset=["total_bedrooms"])
test_set_clean

## 2. derive test features and test labels. In this case, test labels are only used for evaluation
test_labels = test_set_clean["median_house_value"].copy() # get labels for output label Y
test_features = test_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set


## 4. scale the numeric features in test set. 
## important note: do not apply fit function on the test set, using same scalar from training set
test_features_normalized = scaler.transform(test_features)
test_features_normalized

### Step 5: make a prediction using tree model
test_predictions_trees = tree_reg.predict(test_features_normalized)
test_predictions_trees

from sklearn.metrics import mean_squared_error
test_tree_mse = mean_squared_error(test_labels, test_predictions_trees)
test_tree_rmse = np.sqrt(test_tree_mse)
test_tree_rmse

# Step 1: install Gradio
!pip install --quiet gradio

# Step 2: import library
import gradio as gr
print(gr.__version__)

# Step 3.1: Define a simple "Hello World" function
# requirement: input is text, output is text
def greet(name):
      return "Hello " + name + "!!"

# Step 3.2: Define the input component (text style) and output component (text style) to create a simple GUI
import gradio as gr
input_module = gr.inputs.Textbox(label = "Input Text")
output_module = gr.outputs.Textbox(label = "Output Text")

# Step 3.3: Put all three component together into the gradio's interface function 
gr.Interface(fn=greet, inputs=input_module, outputs=output_module).launch()

# Step 5.1: Define a simple "image-to-text" function
# requirement: input is text, output is text

def caption(image):
    return "Image is processed!!"

# Step 5.2: Define the input component (image style) and output component (text style) to create a simple GUI
import gradio as gr
input_module = gr.inputs.Image(label = "Input Image")

output_module = gr.outputs.Textbox(label = "Output Text")

# Step 5.3: Put all three component together into the gradio's interface function 
gr.Interface(fn=caption, inputs=input_module, outputs=output_module).launch()

# Step 6.1: Define different input components
import gradio as gr

# a. define text data type
input_module1 = gr.inputs.Textbox(label = "Input Text")

# b. define image data type
input_module2 = gr.inputs.Image(label = "Input Image")

# c. define Number data type
input_module3 = gr.inputs.Number(label = "Input Number")

# d. define Slider data type
input_module4 = gr.inputs.Slider(1, 100, step=5, label = "Input Slider")

# e. define Checkbox data type
input_module5 = gr.inputs.Checkbox(label = "Does it work?")

# f. define Radio data type
input_module6 = gr.inputs.Radio(choices=["park", "zoo", "road"], label = "Input Radio")

# g. define Dropdown data type
input_module7 = gr.inputs.Dropdown(choices=["park", "zoo", "road"], label = "Input Dropdown")

# Step 6.2: Define different output components
# a. define text data type
output_module1 = gr.outputs.Textbox(label = "Output Text")

# b. define image data type
output_module2 = gr.outputs.Image(label = "Output Image")

# you can define more output components

# Step 6.3: Define a new function that accommodates the input modules.
def multi_inputs(input1, input2, input3, input4, input5, input6, input7 ):
    import numpy as np
    ## processing inputs

    ## return outputs
    output1 = "Processing inputs and return outputs" # text output example
    output2 = np.random.rand(6,6) # image-like array output example
    return output1,output2

# Step 6.4: Put all three component together into the gradio's interface function 
gr.Interface(fn=multi_inputs, 
             inputs=[input_module1, input_module2, input_module3,
                     input_module4, input_module5, input_module6,
                     input_module7], 
             outputs=[output_module1, output_module2]
            ).launch()

# Step 6.1: Define different input components
import gradio as gr

# a. define text data type
input_module1 = gr.inputs.Slider(-124.35,-114.35, step =0.5,label = "Longitude")

# b. define image data type
input_module2 = gr.inputs.Slider(32,41, step =0.5,label = "Latitude")

# c. define Number data type
input_module3 = gr.inputs.Slider(1,52, step = 1,label = "Housing_median_age(Year)")

# d. define Slider data type
input_module4 = gr.inputs.Slider(1, 40000, step=1, label = "Total_rooms")

# e. define Checkbox data type
input_module5 = gr.inputs.Slider(1, 6441,label = "Total_bedrooms")

# f. define Radio data type
input_module6 = gr.inputs.Slider(1,6441,step = 1,label = "Population")

# g. define Dropdown data type
input_module7 = gr.inputs.Slider(1,6081,step = 1,label = "Households")

input_module8 = gr.inputs.Slider(0,15,step = 1,label = "Median_income")

# Step 6.2: Define different output components
# a. define text data type
output_module1 = gr.outputs.Textbox(label = "Predicted Housing Prices")

# b. define image data type
output_module2 = gr.outputs.Image(label = "Output Image")

# you can define more output components

train_set.columns

#save machinel earning model to local drive
import pickle
#save 
with open('tree_reg.pkl','wb') as f:
  pickle.dump(tree_reg,f)

ls

# Step 6.3: Define a new function that accommodates the input modules.
def machine_learning_model(input1, input2, input3, input4, input5, input6, input7, input8):
    print('Start ML process')
    import numpy as np
    import pandas as pd
    print(input1, input2, input3, input4, input5, input6, input7, input8)
    #1. process the user submission
    new_feature = np.array([[input1, input2, input3, input4, input5, input6, input7, input8]])
    print(new_feature)
    
    test_set = pd.DataFrame(new_feature, columns = ['longitude', 'latitude', 'housing_median_age', 'total_rooms',
       'total_bedrooms', 'population', 'households', 'median_income'])
    
    ## 1. clean the missing values in test set
    test_set_clean = test_set.dropna(subset=["total_bedrooms"])
    test_set_clean

    ## 2. derive test features and test labels. In this case, test labels are only used for evaluation
    #test_labels = test_set_clean["median_house_value"].copy() # get labels for output label Y
    #test_features = test_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set

    test_features_normalized = scaler.transform(test_set_clean)
    print("test_features_normalized: ", test_features_normalized)
    
    with open('tree_reg.pkl','rb') as f:
      tree_reg = pickle.load(f)
    print("Start processing")

    output1 = 'This is the output'
    output2 = np.random.rand(28,28)

    #2. follow the data preprocessing steps as we have done in the test data
    #2.2 Check missing values in total_bedrroms
    # 2.2 feature normalization

    #3. load pre trained machine learning

    
    #4 apply loaded modeld
    test_predictions_trees = tree_reg.predict(test_features_normalized)
    print("Predicition is :",test_predictions_trees)
    
    import matplotlib.pyplot as plt

    train_set.plot(kind="scatter", x="longitude", y="latitude", alpha=0.4, 
    s=train_set["population"]/100, label="population", figsize=(10,7),
    c="median_house_value", cmap=plt.get_cmap("jet"), colorbar=True,
    sharex=False)
    plt.legend()
    
    #plt.show()
    plt.xlim(-124.35,-114.35)
    plt.ylim(32,41)
    plt.plot([input1],[input2],marker = "X",markersize = 20, markeredgecolor="yellow", markerfacecolor="black")
    plt.savefig('test.png')
    #5 send back the prediciton
    return test_predictions_trees,'test.png'

gr.Interface(fn=machine_learning_model, 
             inputs=[input_module1, input_module2, input_module3,
                     input_module4, input_module5, input_module6,
                     input_module7, input_module8], 
             outputs=[output_module1, output_module2]
            ).launch(debug = True)