mdreyer5 commited on
Commit
11d96dc
1 Parent(s): 9654a66

Initial commit

Browse files
Files changed (1) hide show
  1. lab2222.py +404 -0
lab2222.py ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Lab2222.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1OUGOeTdmMbccW_st3Ao8nHDR5wm_VUNg
8
+ """
9
+
10
+ from google.colab import drive
11
+
12
+ drive.mount("/content/ML_Course")
13
+
14
+ cd /content/ML_Course/MyDrive/ML_Course
15
+
16
+ import pandas as pd
17
+ housing = pd.read_csv("housing.csv")
18
+ housing.head(n = 5)
19
+
20
+ housing.columns
21
+
22
+ housing.describe()
23
+
24
+ housing.info()
25
+
26
+ # Commented out IPython magic to ensure Python compatibility.
27
+ # %matplotlib inline
28
+ import matplotlib.pyplot as plt
29
+ housing.hist(bins=50, figsize=(20,15))
30
+ plt.show()
31
+
32
+ # to make this notebook's output identical at every run
33
+ import numpy as np
34
+ np.random.seed(10)
35
+
36
+ # For illustration only. Sklearn has train_test_split()
37
+ def split_train_test(data, test_ratio):
38
+ shuffled_indices = np.random.permutation(len(data))
39
+ test_set_size = int(len(data) * test_ratio)
40
+ test_indices = shuffled_indices[:test_set_size]
41
+ train_indices = shuffled_indices[test_set_size:]
42
+ return data.iloc[train_indices], data.iloc[test_indices]
43
+
44
+ # run the function to get the train & test set
45
+ train_set, test_set = split_train_test(housing, 0.2)
46
+
47
+ train_set.info()
48
+
49
+ test_set.info()
50
+
51
+ from sklearn.model_selection import train_test_split
52
+ train_set, test_set = train_test_split(housing, test_size=0.2, random_state=10)
53
+
54
+ train_set.info()
55
+
56
+ test_set.info()
57
+
58
+ test_set.to_csv('blind_test.csv', index = False)
59
+
60
+ train_set.plot(kind="scatter", x="longitude", y="latitude", alpha=0.4,
61
+ s=train_set["population"]/100, label="population", figsize=(10,7),
62
+ c="median_house_value", cmap=plt.get_cmap("jet"), colorbar=True,
63
+ sharex=False)
64
+ plt.legend()
65
+ plt.show()
66
+
67
+ train_set.info()
68
+
69
+ train_set[train_set.isna().any(axis=1)]
70
+
71
+ train_set_clean = train_set.dropna(subset=["total_bedrooms"])
72
+ train_set_clean
73
+
74
+ train_set_clean.info()
75
+
76
+ train_labels = train_set_clean["median_house_value"].copy() # get labels for output label Y
77
+ train_features = train_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set
78
+ train_features.info()
79
+
80
+ train_features.head()
81
+
82
+ train_features.columns
83
+
84
+ train_features.info()
85
+
86
+ train_features.describe()
87
+
88
+ train_labels
89
+
90
+ train_features.hist(bins=50, figsize=(12,9))
91
+
92
+ train_features.describe()
93
+
94
+ from sklearn.preprocessing import MinMaxScaler
95
+ scaler = MinMaxScaler() ## define the transformer
96
+ scaler.fit(train_features) ## call .fit() method to calculate the min and max value for each column in dataset
97
+
98
+ print("Min of each column: ",scaler.data_min_)
99
+ print("Max of each column: ",scaler.data_max_)
100
+
101
+ train_features.describe()
102
+
103
+ train_features_normalized = scaler.transform(train_features)
104
+ train_features_normalized
105
+
106
+ pd.DataFrame(train_features_normalized).hist(bins=50, figsize=(12,9))
107
+ plt.show()
108
+
109
+ ## 1. split data to get train and test set
110
+ from sklearn.model_selection import train_test_split
111
+ train_set, test_set = train_test_split(housing, test_size=0.2, random_state=10)
112
+
113
+ ## 2. clean the missing values
114
+ train_set_clean = train_set.dropna(subset=["total_bedrooms"])
115
+ train_set_clean
116
+
117
+ ## 2. derive training features and training labels
118
+ train_labels = train_set_clean["median_house_value"].copy() # get labels for output label Y
119
+ train_features = train_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set
120
+
121
+
122
+ ## 4. scale the numeric features in training set
123
+ from sklearn.preprocessing import MinMaxScaler
124
+ scaler = MinMaxScaler() ## define the transformer
125
+ scaler.fit(train_features) ## call .fit() method to calculate the min and max value for each column in dataset
126
+
127
+ train_features_normalized = scaler.transform(train_features)
128
+ train_features_normalized
129
+
130
+ from sklearn.linear_model import LinearRegression ## import the LinearRegression Function
131
+ lin_reg = LinearRegression() ## Initialize the class
132
+ lin_reg.fit(train_features_normalized, train_labels) # feed the training data X, and label Y for supervised learning
133
+ # feed the training data X, and label Y for supervised learning
134
+
135
+ training_predictions = lin_reg.predict(train_features_normalized)
136
+ training_predictions.shape
137
+
138
+ train_labels
139
+
140
+ ## plot scatter plot
141
+ import matplotlib.pyplot as plt
142
+ plt.scatter(training_predictions, train_labels )
143
+ plt.xlabel('training_predictions', fontsize=15,color="red")
144
+ plt.ylabel('train_label', fontsize=15,color="green")
145
+ plt.title('Scatter plot for training_predictions and train_label', fontsize=15)
146
+ plt.xlim(0,np.max(training_predictions)) # remove the predictions that have negative prices
147
+ plt.show()
148
+
149
+ import numpy as np
150
+ np.corrcoef(training_predictions, train_labels)
151
+
152
+ import pandas as pd
153
+ prediction_summary = pd.DataFrame({'predicted_label':training_predictions, 'actual_label':train_labels})
154
+ prediction_summary
155
+
156
+ prediction_summary['error'] = prediction_summary['actual_label'] - prediction_summary['predicted_label']
157
+ prediction_summary
158
+
159
+ from sklearn.metrics import mean_squared_error
160
+ lin_mse = mean_squared_error(train_labels, training_predictions)
161
+ lin_rmse = np.sqrt(lin_mse)
162
+ lin_rmse
163
+
164
+ ## Step 1: training the data using decision tree algorithm
165
+ from sklearn.tree import DecisionTreeRegressor ## import the DecisionTree Function
166
+ tree_reg = DecisionTreeRegressor(random_state=10) ## Initialize the class
167
+ tree_reg.fit(train_features_normalized, train_labels) # feed the training data X, and label Y for supervised learning
168
+
169
+ ### Step 2: make a prediction using tree model
170
+ training_predictions_trees = tree_reg.predict(train_features_normalized)
171
+ training_predictions_trees
172
+
173
+ ## Step 3: visualize the scatter plot between predictions and actual labels
174
+ import matplotlib.pyplot as plt
175
+ plt.scatter(training_predictions_trees, train_labels )
176
+ plt.xlabel('training_predictions_trees', fontsize=15,color="red")
177
+ plt.ylabel('train_label', fontsize=15,color="green")
178
+ plt.title('Scatter plot for training_predictions_trees and train_label', fontsize=15)
179
+ plt.xlim(0,np.max(training_predictions_trees)) # remove the predictions that have negative prices
180
+ plt.show()
181
+
182
+ from sklearn.metrics import mean_squared_error
183
+ tree_mse = mean_squared_error(train_labels, training_predictions_trees)
184
+ tree_rmse = np.sqrt(tree_mse)
185
+ tree_rmse
186
+
187
+ ## 1. clean the missing values in test set
188
+ test_set_clean = test_set.dropna(subset=["total_bedrooms"])
189
+ test_set_clean
190
+
191
+ ## 2. derive test features and test labels. In this case, test labels are only used for evaluation
192
+ test_labels = test_set_clean["median_house_value"].copy() # get labels for output label Y
193
+ test_features = test_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set
194
+
195
+
196
+ ## 4. scale the numeric features in test set.
197
+ ## important note: do not apply fit function on the test set, using same scalar from training set
198
+ test_features_normalized = scaler.transform(test_features)
199
+ test_features_normalized
200
+
201
+ ### Step 5: make a prediction using tree model
202
+ test_predictions_trees = tree_reg.predict(test_features_normalized)
203
+ test_predictions_trees
204
+
205
+ from sklearn.metrics import mean_squared_error
206
+ test_tree_mse = mean_squared_error(test_labels, test_predictions_trees)
207
+ test_tree_rmse = np.sqrt(test_tree_mse)
208
+ test_tree_rmse
209
+
210
+ # Step 1: install Gradio
211
+ !pip install --quiet gradio
212
+
213
+ # Step 2: import library
214
+ import gradio as gr
215
+ print(gr.__version__)
216
+
217
+ # Step 3.1: Define a simple "Hello World" function
218
+ # requirement: input is text, output is text
219
+ def greet(name):
220
+ return "Hello " + name + "!!"
221
+
222
+ # Step 3.2: Define the input component (text style) and output component (text style) to create a simple GUI
223
+ import gradio as gr
224
+ input_module = gr.inputs.Textbox(label = "Input Text")
225
+ output_module = gr.outputs.Textbox(label = "Output Text")
226
+
227
+ # Step 3.3: Put all three component together into the gradio's interface function
228
+ gr.Interface(fn=greet, inputs=input_module, outputs=output_module).launch()
229
+
230
+ # Step 5.1: Define a simple "image-to-text" function
231
+ # requirement: input is text, output is text
232
+
233
+ def caption(image):
234
+ return "Image is processed!!"
235
+
236
+ # Step 5.2: Define the input component (image style) and output component (text style) to create a simple GUI
237
+ import gradio as gr
238
+ input_module = gr.inputs.Image(label = "Input Image")
239
+
240
+ output_module = gr.outputs.Textbox(label = "Output Text")
241
+
242
+ # Step 5.3: Put all three component together into the gradio's interface function
243
+ gr.Interface(fn=caption, inputs=input_module, outputs=output_module).launch()
244
+
245
+ # Step 6.1: Define different input components
246
+ import gradio as gr
247
+
248
+ # a. define text data type
249
+ input_module1 = gr.inputs.Textbox(label = "Input Text")
250
+
251
+ # b. define image data type
252
+ input_module2 = gr.inputs.Image(label = "Input Image")
253
+
254
+ # c. define Number data type
255
+ input_module3 = gr.inputs.Number(label = "Input Number")
256
+
257
+ # d. define Slider data type
258
+ input_module4 = gr.inputs.Slider(1, 100, step=5, label = "Input Slider")
259
+
260
+ # e. define Checkbox data type
261
+ input_module5 = gr.inputs.Checkbox(label = "Does it work?")
262
+
263
+ # f. define Radio data type
264
+ input_module6 = gr.inputs.Radio(choices=["park", "zoo", "road"], label = "Input Radio")
265
+
266
+ # g. define Dropdown data type
267
+ input_module7 = gr.inputs.Dropdown(choices=["park", "zoo", "road"], label = "Input Dropdown")
268
+
269
+ # Step 6.2: Define different output components
270
+ # a. define text data type
271
+ output_module1 = gr.outputs.Textbox(label = "Output Text")
272
+
273
+ # b. define image data type
274
+ output_module2 = gr.outputs.Image(label = "Output Image")
275
+
276
+ # you can define more output components
277
+
278
+ # Step 6.3: Define a new function that accommodates the input modules.
279
+ def multi_inputs(input1, input2, input3, input4, input5, input6, input7 ):
280
+ import numpy as np
281
+ ## processing inputs
282
+
283
+ ## return outputs
284
+ output1 = "Processing inputs and return outputs" # text output example
285
+ output2 = np.random.rand(6,6) # image-like array output example
286
+ return output1,output2
287
+
288
+ # Step 6.4: Put all three component together into the gradio's interface function
289
+ gr.Interface(fn=multi_inputs,
290
+ inputs=[input_module1, input_module2, input_module3,
291
+ input_module4, input_module5, input_module6,
292
+ input_module7],
293
+ outputs=[output_module1, output_module2]
294
+ ).launch()
295
+
296
+ # Step 6.1: Define different input components
297
+ import gradio as gr
298
+
299
+ # a. define text data type
300
+ input_module1 = gr.inputs.Slider(-124.35,-114.35, step =0.5,label = "Longitude")
301
+
302
+ # b. define image data type
303
+ input_module2 = gr.inputs.Slider(32,41, step =0.5,label = "Latitude")
304
+
305
+ # c. define Number data type
306
+ input_module3 = gr.inputs.Slider(1,52, step = 1,label = "Housing_median_age(Year)")
307
+
308
+ # d. define Slider data type
309
+ input_module4 = gr.inputs.Slider(1, 40000, step=1, label = "Total_rooms")
310
+
311
+ # e. define Checkbox data type
312
+ input_module5 = gr.inputs.Slider(1, 6441,label = "Total_bedrooms")
313
+
314
+ # f. define Radio data type
315
+ input_module6 = gr.inputs.Slider(1,6441,step = 1,label = "Population")
316
+
317
+ # g. define Dropdown data type
318
+ input_module7 = gr.inputs.Slider(1,6081,step = 1,label = "Households")
319
+
320
+ input_module8 = gr.inputs.Slider(0,15,step = 1,label = "Median_income")
321
+
322
+ # Step 6.2: Define different output components
323
+ # a. define text data type
324
+ output_module1 = gr.outputs.Textbox(label = "Predicted Housing Prices")
325
+
326
+ # b. define image data type
327
+ output_module2 = gr.outputs.Image(label = "Output Image")
328
+
329
+ # you can define more output components
330
+
331
+ train_set.columns
332
+
333
+ #save machinel earning model to local drive
334
+ import pickle
335
+ #save
336
+ with open('tree_reg.pkl','wb') as f:
337
+ pickle.dump(tree_reg,f)
338
+
339
+ ls
340
+
341
+ # Step 6.3: Define a new function that accommodates the input modules.
342
+ def machine_learning_model(input1, input2, input3, input4, input5, input6, input7, input8):
343
+ print('Start ML process')
344
+ import numpy as np
345
+ import pandas as pd
346
+ print(input1, input2, input3, input4, input5, input6, input7, input8)
347
+ #1. process the user submission
348
+ new_feature = np.array([[input1, input2, input3, input4, input5, input6, input7, input8]])
349
+ print(new_feature)
350
+
351
+ test_set = pd.DataFrame(new_feature, columns = ['longitude', 'latitude', 'housing_median_age', 'total_rooms',
352
+ 'total_bedrooms', 'population', 'households', 'median_income'])
353
+
354
+ ## 1. clean the missing values in test set
355
+ test_set_clean = test_set.dropna(subset=["total_bedrooms"])
356
+ test_set_clean
357
+
358
+ ## 2. derive test features and test labels. In this case, test labels are only used for evaluation
359
+ #test_labels = test_set_clean["median_house_value"].copy() # get labels for output label Y
360
+ #test_features = test_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set
361
+
362
+ test_features_normalized = scaler.transform(test_set_clean)
363
+ print("test_features_normalized: ", test_features_normalized)
364
+
365
+ with open('tree_reg.pkl','rb') as f:
366
+ tree_reg = pickle.load(f)
367
+ print("Start processing")
368
+
369
+ output1 = 'This is the output'
370
+ output2 = np.random.rand(28,28)
371
+
372
+ #2. follow the data preprocessing steps as we have done in the test data
373
+ #2.2 Check missing values in total_bedrroms
374
+ # 2.2 feature normalization
375
+
376
+ #3. load pre trained machine learning
377
+
378
+
379
+ #4 apply loaded modeld
380
+ test_predictions_trees = tree_reg.predict(test_features_normalized)
381
+ print("Predicition is :",test_predictions_trees)
382
+
383
+ import matplotlib.pyplot as plt
384
+
385
+ train_set.plot(kind="scatter", x="longitude", y="latitude", alpha=0.4,
386
+ s=train_set["population"]/100, label="population", figsize=(10,7),
387
+ c="median_house_value", cmap=plt.get_cmap("jet"), colorbar=True,
388
+ sharex=False)
389
+ plt.legend()
390
+
391
+ #plt.show()
392
+ plt.xlim(-124.35,-114.35)
393
+ plt.ylim(32,41)
394
+ plt.plot([input1],[input2],marker = "X",markersize = 20, markeredgecolor="yellow", markerfacecolor="black")
395
+ plt.savefig('test.png')
396
+ #5 send back the prediciton
397
+ return test_predictions_trees,'test.png'
398
+
399
+ gr.Interface(fn=machine_learning_model,
400
+ inputs=[input_module1, input_module2, input_module3,
401
+ input_module4, input_module5, input_module6,
402
+ input_module7, input_module8],
403
+ outputs=[output_module1, output_module2]
404
+ ).launch(debug = True)