Spaces:

mdreyer5
/

Week1-Lab

Runtime error

App Files Files Community

mdreyer5 commited on Jun 2, 2022

Commit

11d96dc

•

1 Parent(s): 9654a66

Initial commit

Browse files

Files changed (1) hide show

lab2222.py +404 -0

lab2222.py ADDED Viewed

	@@ -0,0 +1,404 @@

+# -*- coding: utf-8 -*-
+"""Lab2222.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1OUGOeTdmMbccW_st3Ao8nHDR5wm_VUNg
+"""
+from google.colab import drive
+drive.mount("/content/ML_Course")
+cd /content/ML_Course/MyDrive/ML_Course
+import pandas as pd
+housing = pd.read_csv("housing.csv")
+housing.head(n = 5)
+housing.columns
+housing.describe()
+housing.info()
+# Commented out IPython magic to ensure Python compatibility.
+# %matplotlib inline
+import matplotlib.pyplot as plt
+housing.hist(bins=50, figsize=(20,15))
+plt.show()
+# to make this notebook's output identical at every run
+import numpy as np
+np.random.seed(10)
+# For illustration only. Sklearn has train_test_split()
+def split_train_test(data, test_ratio):
+    shuffled_indices = np.random.permutation(len(data))
+    test_set_size = int(len(data) * test_ratio)
+    test_indices = shuffled_indices[:test_set_size]
+    train_indices = shuffled_indices[test_set_size:]
+    return data.iloc[train_indices], data.iloc[test_indices]
+# run the function to get the train & test set
+train_set, test_set = split_train_test(housing, 0.2)
+train_set.info()
+test_set.info()
+from sklearn.model_selection import train_test_split
+train_set, test_set = train_test_split(housing, test_size=0.2, random_state=10)
+train_set.info()
+test_set.info()
+test_set.to_csv('blind_test.csv', index = False)
+train_set.plot(kind="scatter", x="longitude", y="latitude", alpha=0.4,
+    s=train_set["population"]/100, label="population", figsize=(10,7),
+    c="median_house_value", cmap=plt.get_cmap("jet"), colorbar=True,
+    sharex=False)
+plt.legend()
+plt.show()
+train_set.info()
+train_set[train_set.isna().any(axis=1)]
+train_set_clean = train_set.dropna(subset=["total_bedrooms"])
+train_set_clean
+train_set_clean.info()
+train_labels = train_set_clean["median_house_value"].copy() # get labels for output label Y
+train_features = train_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set
+train_features.info()
+train_features.head()
+train_features.columns
+train_features.info()
+train_features.describe()
+train_labels
+train_features.hist(bins=50, figsize=(12,9))
+train_features.describe()
+from sklearn.preprocessing import MinMaxScaler
+scaler = MinMaxScaler() ## define the transformer
+scaler.fit(train_features) ## call .fit() method to calculate the min and max value for each column in dataset
+print("Min of each column: ",scaler.data_min_)
+print("Max of each column: ",scaler.data_max_)
+train_features.describe()
+train_features_normalized = scaler.transform(train_features)
+train_features_normalized
+pd.DataFrame(train_features_normalized).hist(bins=50, figsize=(12,9))
+plt.show()
+## 1. split data to get train and test set
+from sklearn.model_selection import train_test_split
+train_set, test_set = train_test_split(housing, test_size=0.2, random_state=10)
+## 2. clean the missing values
+train_set_clean = train_set.dropna(subset=["total_bedrooms"])
+train_set_clean
+## 2. derive training features and training labels
+train_labels = train_set_clean["median_house_value"].copy() # get labels for output label Y
+train_features = train_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set
+## 4. scale the numeric features in training set
+from sklearn.preprocessing import MinMaxScaler
+scaler = MinMaxScaler() ## define the transformer
+scaler.fit(train_features) ## call .fit() method to calculate the min and max value for each column in dataset
+train_features_normalized = scaler.transform(train_features)
+train_features_normalized
+from sklearn.linear_model import LinearRegression ## import the LinearRegression Function
+lin_reg = LinearRegression() ## Initialize the class
+lin_reg.fit(train_features_normalized, train_labels) # feed the training data X, and label Y for supervised learning
+# feed the training data X, and label Y for supervised learning
+training_predictions = lin_reg.predict(train_features_normalized)
+training_predictions.shape
+train_labels
+## plot scatter plot
+import matplotlib.pyplot as plt
+plt.scatter(training_predictions, train_labels )
+plt.xlabel('training_predictions', fontsize=15,color="red")
+plt.ylabel('train_label', fontsize=15,color="green")
+plt.title('Scatter plot for training_predictions and train_label', fontsize=15)
+plt.xlim(0,np.max(training_predictions)) # remove the predictions that have negative prices
+plt.show()
+import numpy as np
+np.corrcoef(training_predictions, train_labels)
+import pandas as pd
+prediction_summary = pd.DataFrame({'predicted_label':training_predictions, 'actual_label':train_labels})
+prediction_summary
+prediction_summary['error'] = prediction_summary['actual_label'] - prediction_summary['predicted_label']
+prediction_summary
+from sklearn.metrics import mean_squared_error
+lin_mse = mean_squared_error(train_labels, training_predictions)
+lin_rmse = np.sqrt(lin_mse)
+lin_rmse
+## Step 1: training the data using decision tree algorithm
+from sklearn.tree import DecisionTreeRegressor ## import the DecisionTree Function
+tree_reg = DecisionTreeRegressor(random_state=10) ## Initialize the class
+tree_reg.fit(train_features_normalized, train_labels) # feed the training data X, and label Y for supervised learning
+### Step 2: make a prediction using tree model
+training_predictions_trees = tree_reg.predict(train_features_normalized)
+training_predictions_trees
+## Step 3: visualize the scatter plot between predictions and actual labels
+import matplotlib.pyplot as plt
+plt.scatter(training_predictions_trees, train_labels )
+plt.xlabel('training_predictions_trees', fontsize=15,color="red")
+plt.ylabel('train_label', fontsize=15,color="green")
+plt.title('Scatter plot for training_predictions_trees and train_label', fontsize=15)
+plt.xlim(0,np.max(training_predictions_trees)) # remove the predictions that have negative prices
+plt.show()
+from sklearn.metrics import mean_squared_error
+tree_mse = mean_squared_error(train_labels, training_predictions_trees)
+tree_rmse = np.sqrt(tree_mse)
+tree_rmse
+## 1. clean the missing values in test set
+test_set_clean = test_set.dropna(subset=["total_bedrooms"])
+test_set_clean
+## 2. derive test features and test labels. In this case, test labels are only used for evaluation
+test_labels = test_set_clean["median_house_value"].copy() # get labels for output label Y
+test_features = test_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set
+## 4. scale the numeric features in test set.
+## important note: do not apply fit function on the test set, using same scalar from training set
+test_features_normalized = scaler.transform(test_features)
+test_features_normalized
+### Step 5: make a prediction using tree model
+test_predictions_trees = tree_reg.predict(test_features_normalized)
+test_predictions_trees
+from sklearn.metrics import mean_squared_error
+test_tree_mse = mean_squared_error(test_labels, test_predictions_trees)
+test_tree_rmse = np.sqrt(test_tree_mse)
+test_tree_rmse
+# Step 1: install Gradio
+!pip install --quiet gradio
+# Step 2: import library
+import gradio as gr
+print(gr.__version__)
+# Step 3.1: Define a simple "Hello World" function
+# requirement: input is text, output is text
+def greet(name):
+      return "Hello " + name + "!!"
+# Step 3.2: Define the input component (text style) and output component (text style) to create a simple GUI
+import gradio as gr
+input_module = gr.inputs.Textbox(label = "Input Text")
+output_module = gr.outputs.Textbox(label = "Output Text")
+# Step 3.3: Put all three component together into the gradio's interface function
+gr.Interface(fn=greet, inputs=input_module, outputs=output_module).launch()
+# Step 5.1: Define a simple "image-to-text" function
+# requirement: input is text, output is text
+def caption(image):
+    return "Image is processed!!"
+# Step 5.2: Define the input component (image style) and output component (text style) to create a simple GUI
+import gradio as gr
+input_module = gr.inputs.Image(label = "Input Image")
+output_module = gr.outputs.Textbox(label = "Output Text")
+# Step 5.3: Put all three component together into the gradio's interface function
+gr.Interface(fn=caption, inputs=input_module, outputs=output_module).launch()
+# Step 6.1: Define different input components
+import gradio as gr
+# a. define text data type
+input_module1 = gr.inputs.Textbox(label = "Input Text")
+# b. define image data type
+input_module2 = gr.inputs.Image(label = "Input Image")
+# c. define Number data type
+input_module3 = gr.inputs.Number(label = "Input Number")
+# d. define Slider data type
+input_module4 = gr.inputs.Slider(1, 100, step=5, label = "Input Slider")
+# e. define Checkbox data type
+input_module5 = gr.inputs.Checkbox(label = "Does it work?")
+# f. define Radio data type
+input_module6 = gr.inputs.Radio(choices=["park", "zoo", "road"], label = "Input Radio")
+# g. define Dropdown data type
+input_module7 = gr.inputs.Dropdown(choices=["park", "zoo", "road"], label = "Input Dropdown")
+# Step 6.2: Define different output components
+# a. define text data type
+output_module1 = gr.outputs.Textbox(label = "Output Text")
+# b. define image data type
+output_module2 = gr.outputs.Image(label = "Output Image")
+# you can define more output components
+# Step 6.3: Define a new function that accommodates the input modules.
+def multi_inputs(input1, input2, input3, input4, input5, input6, input7 ):
+    import numpy as np
+    ## processing inputs
+    ## return outputs
+    output1 = "Processing inputs and return outputs" # text output example
+    output2 = np.random.rand(6,6) # image-like array output example
+    return output1,output2
+# Step 6.4: Put all three component together into the gradio's interface function
+gr.Interface(fn=multi_inputs,
+             inputs=[input_module1, input_module2, input_module3,
+                     input_module4, input_module5, input_module6,
+                     input_module7],
+             outputs=[output_module1, output_module2]
+            ).launch()
+# Step 6.1: Define different input components
+import gradio as gr
+# a. define text data type
+input_module1 = gr.inputs.Slider(-124.35,-114.35, step =0.5,label = "Longitude")
+# b. define image data type
+input_module2 = gr.inputs.Slider(32,41, step =0.5,label = "Latitude")
+# c. define Number data type
+input_module3 = gr.inputs.Slider(1,52, step = 1,label = "Housing_median_age(Year)")
+# d. define Slider data type
+input_module4 = gr.inputs.Slider(1, 40000, step=1, label = "Total_rooms")
+# e. define Checkbox data type
+input_module5 = gr.inputs.Slider(1, 6441,label = "Total_bedrooms")
+# f. define Radio data type
+input_module6 = gr.inputs.Slider(1,6441,step = 1,label = "Population")
+# g. define Dropdown data type
+input_module7 = gr.inputs.Slider(1,6081,step = 1,label = "Households")
+input_module8 = gr.inputs.Slider(0,15,step = 1,label = "Median_income")
+# Step 6.2: Define different output components
+# a. define text data type
+output_module1 = gr.outputs.Textbox(label = "Predicted Housing Prices")
+# b. define image data type
+output_module2 = gr.outputs.Image(label = "Output Image")
+# you can define more output components
+train_set.columns
+#save machinel earning model to local drive
+import pickle
+#save
+with open('tree_reg.pkl','wb') as f:
+  pickle.dump(tree_reg,f)
+ls
+# Step 6.3: Define a new function that accommodates the input modules.
+def machine_learning_model(input1, input2, input3, input4, input5, input6, input7, input8):
+    print('Start ML process')
+    import numpy as np
+    import pandas as pd
+    print(input1, input2, input3, input4, input5, input6, input7, input8)
+    #1. process the user submission
+    new_feature = np.array([[input1, input2, input3, input4, input5, input6, input7, input8]])
+    print(new_feature)
+    test_set = pd.DataFrame(new_feature, columns = ['longitude', 'latitude', 'housing_median_age', 'total_rooms',
+       'total_bedrooms', 'population', 'households', 'median_income'])
+    ## 1. clean the missing values in test set
+    test_set_clean = test_set.dropna(subset=["total_bedrooms"])
+    test_set_clean
+    ## 2. derive test features and test labels. In this case, test labels are only used for evaluation
+    #test_labels = test_set_clean["median_house_value"].copy() # get labels for output label Y
+    #test_features = test_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set
+    test_features_normalized = scaler.transform(test_set_clean)
+    print("test_features_normalized: ", test_features_normalized)
+    with open('tree_reg.pkl','rb') as f:
+      tree_reg = pickle.load(f)
+    print("Start processing")
+    output1 = 'This is the output'
+    output2 = np.random.rand(28,28)
+    #2. follow the data preprocessing steps as we have done in the test data
+    #2.2 Check missing values in total_bedrroms
+    # 2.2 feature normalization
+    #3. load pre trained machine learning
+    #4 apply loaded modeld
+    test_predictions_trees = tree_reg.predict(test_features_normalized)
+    print("Predicition is :",test_predictions_trees)
+    import matplotlib.pyplot as plt
+    train_set.plot(kind="scatter", x="longitude", y="latitude", alpha=0.4,
+    s=train_set["population"]/100, label="population", figsize=(10,7),
+    c="median_house_value", cmap=plt.get_cmap("jet"), colorbar=True,
+    sharex=False)
+    plt.legend()
+    #plt.show()
+    plt.xlim(-124.35,-114.35)
+    plt.ylim(32,41)
+    plt.plot([input1],[input2],marker = "X",markersize = 20, markeredgecolor="yellow", markerfacecolor="black")
+    plt.savefig('test.png')
+    #5 send back the prediciton
+    return test_predictions_trees,'test.png'
+gr.Interface(fn=machine_learning_model,
+             inputs=[input_module1, input_module2, input_module3,
+                     input_module4, input_module5, input_module6,
+                     input_module7, input_module8],
+             outputs=[output_module1, output_module2]
+            ).launch(debug = True)