|
|
|
"""Lab2222.ipynb |
|
|
|
Automatically generated by Colaboratory. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1OUGOeTdmMbccW_st3Ao8nHDR5wm_VUNg |
|
|
|
|
|
from google.colab import drive |
|
|
|
drive.mount("/content/ML_Course") |
|
|
|
cd /content/ML_Course/MyDrive/ML_Course |
|
""" |
|
import pandas as pd |
|
housing = pd.read_csv("housing.csv") |
|
housing.head(n = 5) |
|
|
|
housing.columns |
|
|
|
housing.describe() |
|
|
|
housing.info() |
|
|
|
|
|
|
|
import matplotlib.pyplot as plt |
|
housing.hist(bins=50, figsize=(20,15)) |
|
plt.show() |
|
|
|
|
|
import numpy as np |
|
np.random.seed(10) |
|
|
|
|
|
def split_train_test(data, test_ratio): |
|
shuffled_indices = np.random.permutation(len(data)) |
|
test_set_size = int(len(data) * test_ratio) |
|
test_indices = shuffled_indices[:test_set_size] |
|
train_indices = shuffled_indices[test_set_size:] |
|
return data.iloc[train_indices], data.iloc[test_indices] |
|
|
|
|
|
train_set, test_set = split_train_test(housing, 0.2) |
|
|
|
train_set.info() |
|
|
|
test_set.info() |
|
|
|
from sklearn.model_selection import train_test_split |
|
train_set, test_set = train_test_split(housing, test_size=0.2, random_state=10) |
|
|
|
train_set.info() |
|
|
|
test_set.info() |
|
|
|
test_set.to_csv('blind_test.csv', index = False) |
|
|
|
train_set.plot(kind="scatter", x="longitude", y="latitude", alpha=0.4, |
|
s=train_set["population"]/100, label="population", figsize=(10,7), |
|
c="median_house_value", cmap=plt.get_cmap("jet"), colorbar=True, |
|
sharex=False) |
|
plt.legend() |
|
plt.show() |
|
|
|
train_set.info() |
|
|
|
train_set[train_set.isna().any(axis=1)] |
|
|
|
train_set_clean = train_set.dropna(subset=["total_bedrooms"]) |
|
train_set_clean |
|
|
|
train_set_clean.info() |
|
|
|
train_labels = train_set_clean["median_house_value"].copy() |
|
train_features = train_set_clean.drop("median_house_value", axis=1) |
|
train_features.info() |
|
|
|
train_features.head() |
|
|
|
train_features.columns |
|
|
|
train_features.info() |
|
|
|
train_features.describe() |
|
|
|
train_labels |
|
|
|
train_features.hist(bins=50, figsize=(12,9)) |
|
|
|
train_features.describe() |
|
|
|
from sklearn.preprocessing import MinMaxScaler |
|
scaler = MinMaxScaler() |
|
scaler.fit(train_features) |
|
|
|
print("Min of each column: ",scaler.data_min_) |
|
print("Max of each column: ",scaler.data_max_) |
|
|
|
train_features.describe() |
|
|
|
train_features_normalized = scaler.transform(train_features) |
|
train_features_normalized |
|
|
|
pd.DataFrame(train_features_normalized).hist(bins=50, figsize=(12,9)) |
|
plt.show() |
|
|
|
|
|
from sklearn.model_selection import train_test_split |
|
train_set, test_set = train_test_split(housing, test_size=0.2, random_state=10) |
|
|
|
|
|
train_set_clean = train_set.dropna(subset=["total_bedrooms"]) |
|
train_set_clean |
|
|
|
|
|
train_labels = train_set_clean["median_house_value"].copy() |
|
train_features = train_set_clean.drop("median_house_value", axis=1) |
|
|
|
|
|
|
|
from sklearn.preprocessing import MinMaxScaler |
|
scaler = MinMaxScaler() |
|
scaler.fit(train_features) |
|
|
|
train_features_normalized = scaler.transform(train_features) |
|
train_features_normalized |
|
|
|
from sklearn.linear_model import LinearRegression |
|
lin_reg = LinearRegression() |
|
lin_reg.fit(train_features_normalized, train_labels) |
|
|
|
|
|
training_predictions = lin_reg.predict(train_features_normalized) |
|
training_predictions.shape |
|
|
|
train_labels |
|
|
|
|
|
import matplotlib.pyplot as plt |
|
plt.scatter(training_predictions, train_labels ) |
|
plt.xlabel('training_predictions', fontsize=15,color="red") |
|
plt.ylabel('train_label', fontsize=15,color="green") |
|
plt.title('Scatter plot for training_predictions and train_label', fontsize=15) |
|
plt.xlim(0,np.max(training_predictions)) |
|
plt.show() |
|
|
|
import numpy as np |
|
np.corrcoef(training_predictions, train_labels) |
|
|
|
import pandas as pd |
|
prediction_summary = pd.DataFrame({'predicted_label':training_predictions, 'actual_label':train_labels}) |
|
prediction_summary |
|
|
|
prediction_summary['error'] = prediction_summary['actual_label'] - prediction_summary['predicted_label'] |
|
prediction_summary |
|
|
|
from sklearn.metrics import mean_squared_error |
|
lin_mse = mean_squared_error(train_labels, training_predictions) |
|
lin_rmse = np.sqrt(lin_mse) |
|
lin_rmse |
|
|
|
|
|
from sklearn.tree import DecisionTreeRegressor |
|
tree_reg = DecisionTreeRegressor(random_state=10) |
|
tree_reg.fit(train_features_normalized, train_labels) |
|
|
|
|
|
training_predictions_trees = tree_reg.predict(train_features_normalized) |
|
training_predictions_trees |
|
|
|
|
|
import matplotlib.pyplot as plt |
|
plt.scatter(training_predictions_trees, train_labels ) |
|
plt.xlabel('training_predictions_trees', fontsize=15,color="red") |
|
plt.ylabel('train_label', fontsize=15,color="green") |
|
plt.title('Scatter plot for training_predictions_trees and train_label', fontsize=15) |
|
plt.xlim(0,np.max(training_predictions_trees)) |
|
plt.show() |
|
|
|
from sklearn.metrics import mean_squared_error |
|
tree_mse = mean_squared_error(train_labels, training_predictions_trees) |
|
tree_rmse = np.sqrt(tree_mse) |
|
tree_rmse |
|
|
|
|
|
test_set_clean = test_set.dropna(subset=["total_bedrooms"]) |
|
test_set_clean |
|
|
|
|
|
test_labels = test_set_clean["median_house_value"].copy() |
|
test_features = test_set_clean.drop("median_house_value", axis=1) |
|
|
|
|
|
|
|
|
|
test_features_normalized = scaler.transform(test_features) |
|
test_features_normalized |
|
|
|
|
|
test_predictions_trees = tree_reg.predict(test_features_normalized) |
|
test_predictions_trees |
|
|
|
from sklearn.metrics import mean_squared_error |
|
test_tree_mse = mean_squared_error(test_labels, test_predictions_trees) |
|
test_tree_rmse = np.sqrt(test_tree_mse) |
|
test_tree_rmse |
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
print(gr.__version__) |
|
|
|
|
|
|
|
def greet(name): |
|
return "Hello " + name + "!!" |
|
""" |
|
# Step 3.2: Define the input component (text style) and output component (text style) to create a simple GUI |
|
import gradio as gr |
|
input_module = gr.inputs.Textbox(label = "Input Text") |
|
output_module = gr.outputs.Textbox(label = "Output Text") |
|
|
|
# Step 3.3: Put all three component together into the gradio's interface function |
|
gr.Interface(fn=greet, inputs=input_module, outputs=output_module).launch() |
|
|
|
# Step 5.1: Define a simple "image-to-text" function |
|
# requirement: input is text, output is text |
|
|
|
def caption(image): |
|
return "Image is processed!!" |
|
|
|
# Step 5.2: Define the input component (image style) and output component (text style) to create a simple GUI |
|
import gradio as gr |
|
input_module = gr.inputs.Image(label = "Input Image") |
|
|
|
output_module = gr.outputs.Textbox(label = "Output Text") |
|
|
|
# Step 5.3: Put all three component together into the gradio's interface function |
|
gr.Interface(fn=caption, inputs=input_module, outputs=output_module).launch() |
|
|
|
# Step 6.1: Define different input components |
|
import gradio as gr |
|
|
|
# a. define text data type |
|
input_module1 = gr.inputs.Textbox(label = "Input Text") |
|
|
|
# b. define image data type |
|
input_module2 = gr.inputs.Image(label = "Input Image") |
|
|
|
# c. define Number data type |
|
input_module3 = gr.inputs.Number(label = "Input Number") |
|
|
|
# d. define Slider data type |
|
input_module4 = gr.inputs.Slider(1, 100, step=5, label = "Input Slider") |
|
|
|
# e. define Checkbox data type |
|
input_module5 = gr.inputs.Checkbox(label = "Does it work?") |
|
|
|
# f. define Radio data type |
|
input_module6 = gr.inputs.Radio(choices=["park", "zoo", "road"], label = "Input Radio") |
|
|
|
# g. define Dropdown data type |
|
input_module7 = gr.inputs.Dropdown(choices=["park", "zoo", "road"], label = "Input Dropdown") |
|
|
|
# Step 6.2: Define different output components |
|
# a. define text data type |
|
output_module1 = gr.outputs.Textbox(label = "Output Text") |
|
|
|
# b. define image data type |
|
output_module2 = gr.outputs.Image(label = "Output Image") |
|
|
|
# you can define more output components |
|
|
|
# Step 6.3: Define a new function that accommodates the input modules. |
|
def multi_inputs(input1, input2, input3, input4, input5, input6, input7 ): |
|
import numpy as np |
|
## processing inputs |
|
|
|
## return outputs |
|
output1 = "Processing inputs and return outputs" # text output example |
|
output2 = np.random.rand(6,6) # image-like array output example |
|
return output1,output2 |
|
|
|
# Step 6.4: Put all three component together into the gradio's interface function |
|
gr.Interface(fn=multi_inputs, |
|
inputs=[input_module1, input_module2, input_module3, |
|
input_module4, input_module5, input_module6, |
|
input_module7], |
|
outputs=[output_module1, output_module2] |
|
).launch() |
|
""" |
|
|
|
import gradio as gr |
|
|
|
|
|
input_module1 = gr.inputs.Slider(-124.35,-114.35, step =0.5,label = "Longitude") |
|
|
|
|
|
input_module2 = gr.inputs.Slider(32,41, step =0.5,label = "Latitude") |
|
|
|
|
|
input_module3 = gr.inputs.Slider(1,52, step = 1,label = "Housing_median_age(Year)") |
|
|
|
|
|
input_module4 = gr.inputs.Slider(1, 40000, step=1, label = "Total_rooms") |
|
|
|
|
|
input_module5 = gr.inputs.Slider(1, 6441,label = "Total_bedrooms") |
|
|
|
|
|
input_module6 = gr.inputs.Slider(1,6441,step = 1,label = "Population") |
|
|
|
|
|
input_module7 = gr.inputs.Slider(1,6081,step = 1,label = "Households") |
|
|
|
input_module8 = gr.inputs.Slider(0,15,step = 1,label = "Median_income") |
|
|
|
|
|
|
|
output_module1 = gr.outputs.Textbox(label = "Predicted Housing Prices") |
|
|
|
|
|
output_module2 = gr.outputs.Image(label = "Output Image") |
|
|
|
|
|
|
|
train_set.columns |
|
|
|
|
|
import pickle |
|
|
|
with open('tree_reg.pkl','wb') as f: |
|
pickle.dump(tree_reg,f) |
|
|
|
|
|
|
|
def machine_learning_model(input1, input2, input3, input4, input5, input6, input7, input8): |
|
print('Start ML process') |
|
import numpy as np |
|
import pandas as pd |
|
print(input1, input2, input3, input4, input5, input6, input7, input8) |
|
|
|
new_feature = np.array([[input1, input2, input3, input4, input5, input6, input7, input8]]) |
|
print(new_feature) |
|
|
|
test_set = pd.DataFrame(new_feature, columns = ['longitude', 'latitude', 'housing_median_age', 'total_rooms', |
|
'total_bedrooms', 'population', 'households', 'median_income']) |
|
|
|
|
|
test_set_clean = test_set.dropna(subset=["total_bedrooms"]) |
|
test_set_clean |
|
|
|
|
|
|
|
|
|
|
|
test_features_normalized = scaler.transform(test_set_clean) |
|
print("test_features_normalized: ", test_features_normalized) |
|
|
|
with open('tree_reg.pkl','rb') as f: |
|
tree_reg = pickle.load(f) |
|
print("Start processing") |
|
|
|
output1 = 'This is the output' |
|
output2 = np.random.rand(28,28) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_predictions_trees = tree_reg.predict(test_features_normalized) |
|
print("Predicition is :",test_predictions_trees) |
|
|
|
import matplotlib.pyplot as plt |
|
|
|
train_set.plot(kind="scatter", x="longitude", y="latitude", alpha=0.4, |
|
s=train_set["population"]/100, label="population", figsize=(10,7), |
|
c="median_house_value", cmap=plt.get_cmap("jet"), colorbar=True, |
|
sharex=False) |
|
plt.legend() |
|
|
|
|
|
plt.xlim(-124.35,-114.35) |
|
plt.ylim(32,41) |
|
plt.plot([input1],[input2],marker = "X",markersize = 20, markeredgecolor="yellow", markerfacecolor="black") |
|
plt.savefig('test.png') |
|
|
|
return test_predictions_trees,'test.png' |
|
|
|
gr.Interface(fn=machine_learning_model, |
|
inputs=[input_module1, input_module2, input_module3, |
|
input_module4, input_module5, input_module6, |
|
input_module7, input_module8], |
|
outputs=[output_module1, output_module2] |
|
).launch(debug = True) |
|
|