Spaces:
Runtime error
Runtime error
File size: 8,541 Bytes
c7de09c d861707 c7de09c cc6a81a c7de09c d382b6d c7de09c 99c0161 c7de09c 99c0161 c7de09c 13245e3 d6c5440 13245e3 c7de09c d44f918 c7de09c d44f918 c7de09c d44f918 1232431 c7de09c 99c0161 c7de09c 5deb571 8ce5d31 c7de09c 0142c26 c7de09c 07b6785 c7de09c d382b6d e4f9dbf c7de09c 85d23bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
import os
import numpy as np
import matplotlib.pyplot as plt
import gradio as gr
import pandas as pd
import tarfile
import urllib.request
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml2/master/"
HOUSING_PATH = os.path.join("datasets", "housing")
HOUSING_URL = DOWNLOAD_ROOT + "datasets/housing/housing.tgz"
def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
if not os.path.isdir(housing_path):
os.makedirs(housing_path)
tgz_path = os.path.join(housing_path, "housing.tgz")
urllib.request.urlretrieve(housing_url, tgz_path)
housing_tgz = tarfile.open(tgz_path)
housing_tgz.extractall(path=housing_path)
housing_tgz.close()
def load_housing_data(housing_path=HOUSING_PATH):
csv_path = os.path.join(housing_path, "housing.csv")
return pd.read_csv(csv_path)
#1. Download the data
fetch_housing_data()
housing_pd = load_housing_data()
housing_pd.head()
## tentatively drop categorical feature
housing = housing_pd.drop('ocean_proximity', axis=1)
housing
#2. Prepare the Data for Machine Learning Algorithms
## 1. split data to get train and test set
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(housing, test_size=0.2, random_state=10)
## 2. clean the missing values
train_set_clean = train_set.dropna(subset=["total_bedrooms"])
train_set_clean
## 2. derive training features and training labels
train_labels = train_set_clean["median_house_value"].copy() # get labels for output label Y
train_features = train_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set
## 4. scale the numeric features in training set
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler() ## define the transformer
scaler.fit(train_features) ## call .fit() method to calculate the min and max value for each column in dataset
train_features_normalized = scaler.transform(train_features)
train_features_normalized
#3. Training ML model on the Training Set
from sklearn.linear_model import LinearRegression ## import the LinearRegression Function
lin_reg = LinearRegression() ## Initialize the class
lin_reg.fit(train_features_normalized, train_labels) # feed the training data X, and label Y for supervised learning
### visualize the data
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
print("Saving figure", fig_id, ' to ',path)
if tight_layout:
plt.tight_layout()
plt.savefig(path, format=fig_extension, dpi=resolution)
PROJECT_ROOT_DIR='./'
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images")
os.makedirs(IMAGES_PATH, exist_ok=True)
images_path = os.path.join(PROJECT_ROOT_DIR, "images", "end_to_end_project")
os.makedirs(images_path, exist_ok=True)
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml2/master/"
filename = "california.png"
print("Downloading", filename)
url = DOWNLOAD_ROOT + "images/end_to_end_project/" + filename
urllib.request.urlretrieve(url, os.path.join(images_path, filename))
### written by Jie
def draw_map_customize(longitude,latitude, fig_id='test',fig_extension='png' ):
import matplotlib.image as mpimg
california_img=mpimg.imread(os.path.join(images_path, filename))
ax = housing.plot(kind="scatter", x="longitude", y="latitude", figsize=(10,7),
s=housing['population']/100, label="Population",
c="median_house_value", cmap="jet",
colorbar=False, alpha=0.4)
plt.imshow(california_img, extent=[-124.55, -113.80, 32.45, 42.05], alpha=0.5,
cmap=plt.get_cmap("jet"))
plt.ylabel("Latitude", fontsize=18)
plt.xlabel("Longitude", fontsize=18)
plt.xticks(fontsize=18, rotation=0)
plt.yticks(fontsize=18, rotation=0)
plt.plot(longitude,latitude, "ro", alpha=0.7, marker=r'$\clubsuit$', markersize=30)
plt.annotate("Your location is here", xy=(longitude,latitude), xytext=(longitude+1,latitude+1), fontsize=20,
arrowprops=dict(arrowstyle="->"))
prices = housing["median_house_value"]
tick_values = np.linspace(prices.min(), prices.max(), 11)
cbar = plt.colorbar(ticks=tick_values/prices.max())
cbar.ax.set_yticklabels(["$%dk"%(round(v/1000)) for v in tick_values], fontsize=14)
cbar.set_label('Median House Value', fontsize=16)
plt.legend(fontsize=16)
save_fig(fig_id)
#plt.show()
path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
return path
def get_sample_data(num_data):
sample_data = []
for i in range(num_data):
samp = housing.sample(1)
longitude = float(samp['longitude'].values[0])
latitude = float(samp['latitude'].values[0])
housing_median_age = float(samp['housing_median_age'].values[0])
total_rooms = float(samp['total_rooms'].values[0])
total_bedrooms = float(samp['total_bedrooms'].values[0])
population = float(samp['population'].values[0])
households = float(samp['households'].values[0])
median_income = float(samp['median_income'].values[0])
sample_data.append([longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income])
return sample_data
def predict_price(longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income):
#import pickle
#loaded_model = pickle.load(open('KNN_classifier.pickle', 'rb'))
#print(loaded_model)
# initialize data of lists.
data = {'longitude':[float(longitude)],
'latitude':[float(latitude)],
'housing_median_age':[float(housing_median_age)],
'total_rooms':[float(total_rooms)],
'total_bedrooms':[float(total_bedrooms)],
'population':[float(population)],
'households':[float(households)],
'median_income':[float(median_income)],
}
#test_features = pd.DataFrame(data, columns=['longitude', 'latitude', 'housing_median_age', 'total_rooms',
# 'total_bedrooms', 'population', 'households', 'median_income'])
# Create DataFrame
test_features = pd.DataFrame(data)
#test_features = test_features.append(data,ignore_index=True)
test_features = test_features.dropna(subset=["total_bedrooms"])
## 3. scale the numeric features in test set.
## important note: do not apply fit function on the test set, using same scalar from training set
test_features_normalized = scaler.transform(test_features)
test_features_normalized
pred = lin_reg.predict(test_features_normalized)[0]
map_file = draw_map_customize(longitude,latitude, fig_id='test',fig_extension='png' )
return pred,map_file
### configure inputs/outputs
set_longitude = gr.inputs.Slider(-124.350000, -114.310000, step=0.5, default=-120, label = 'Longitude')
set_latitude = gr.inputs.Slider(32, 41, step=0.5, default=33, label = 'Latitude')
set_housing_median_age = gr.inputs.Slider(1, 52, step=1, default=10, label = 'Housing_median_age (Year)')
set_total_rooms = gr.inputs.Slider(1, 20, step=5, default=12, label = 'Total_rooms')
set_total_bedrooms = gr.inputs.Slider(1, 10, step=5, default=9, label = 'Total_bedrooms')
set_population = gr.inputs.Slider(3, 35682, step=5, default=10, label = 'Population')
set_households = gr.inputs.Slider(1, 6082, step=5, default=10, label = 'Households')
set_median_income = gr.inputs.Slider(0, 25000000, step=0.5, default=10, label = 'Median_income')
set_label = gr.outputs.Textbox(label="Predicted Housing Prices")
# define output as the single class text
set_out_images = gr.outputs.Image(label="Visualize your location")
### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
interface = gr.Interface(fn=predict_price,
inputs=[set_longitude, set_latitude,set_housing_median_age,set_total_rooms,set_total_bedrooms,set_population,set_households,set_median_income],
outputs=[set_label,set_out_images],
examples_per_page = 2,
examples = get_sample_data(10),
title="Housing Price Prediction",
description= "Click examples below for a quick demo",
theme = 'huggingface',
layout = 'vertical'
)
interface.launch(debug=True) |