saratha18's picture
Update app.py
e4f9dbf verified
raw
history blame contribute delete
No virus
8.54 kB
import os
import numpy as np
import matplotlib.pyplot as plt
import gradio as gr
import pandas as pd
import tarfile
import urllib.request
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml2/master/"
HOUSING_PATH = os.path.join("datasets", "housing")
HOUSING_URL = DOWNLOAD_ROOT + "datasets/housing/housing.tgz"
def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
if not os.path.isdir(housing_path):
os.makedirs(housing_path)
tgz_path = os.path.join(housing_path, "housing.tgz")
urllib.request.urlretrieve(housing_url, tgz_path)
housing_tgz = tarfile.open(tgz_path)
housing_tgz.extractall(path=housing_path)
housing_tgz.close()
def load_housing_data(housing_path=HOUSING_PATH):
csv_path = os.path.join(housing_path, "housing.csv")
return pd.read_csv(csv_path)
#1. Download the data
fetch_housing_data()
housing_pd = load_housing_data()
housing_pd.head()
## tentatively drop categorical feature
housing = housing_pd.drop('ocean_proximity', axis=1)
housing
#2. Prepare the Data for Machine Learning Algorithms
## 1. split data to get train and test set
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(housing, test_size=0.2, random_state=10)
## 2. clean the missing values
train_set_clean = train_set.dropna(subset=["total_bedrooms"])
train_set_clean
## 2. derive training features and training labels
train_labels = train_set_clean["median_house_value"].copy() # get labels for output label Y
train_features = train_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set
## 4. scale the numeric features in training set
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler() ## define the transformer
scaler.fit(train_features) ## call .fit() method to calculate the min and max value for each column in dataset
train_features_normalized = scaler.transform(train_features)
train_features_normalized
#3. Training ML model on the Training Set
from sklearn.linear_model import LinearRegression ## import the LinearRegression Function
lin_reg = LinearRegression() ## Initialize the class
lin_reg.fit(train_features_normalized, train_labels) # feed the training data X, and label Y for supervised learning
### visualize the data
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
print("Saving figure", fig_id, ' to ',path)
if tight_layout:
plt.tight_layout()
plt.savefig(path, format=fig_extension, dpi=resolution)
PROJECT_ROOT_DIR='./'
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images")
os.makedirs(IMAGES_PATH, exist_ok=True)
images_path = os.path.join(PROJECT_ROOT_DIR, "images", "end_to_end_project")
os.makedirs(images_path, exist_ok=True)
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml2/master/"
filename = "california.png"
print("Downloading", filename)
url = DOWNLOAD_ROOT + "images/end_to_end_project/" + filename
urllib.request.urlretrieve(url, os.path.join(images_path, filename))
### written by Jie
def draw_map_customize(longitude,latitude, fig_id='test',fig_extension='png' ):
import matplotlib.image as mpimg
california_img=mpimg.imread(os.path.join(images_path, filename))
ax = housing.plot(kind="scatter", x="longitude", y="latitude", figsize=(10,7),
s=housing['population']/100, label="Population",
c="median_house_value", cmap="jet",
colorbar=False, alpha=0.4)
plt.imshow(california_img, extent=[-124.55, -113.80, 32.45, 42.05], alpha=0.5,
cmap=plt.get_cmap("jet"))
plt.ylabel("Latitude", fontsize=18)
plt.xlabel("Longitude", fontsize=18)
plt.xticks(fontsize=18, rotation=0)
plt.yticks(fontsize=18, rotation=0)
plt.plot(longitude,latitude, "ro", alpha=0.7, marker=r'$\clubsuit$', markersize=30)
plt.annotate("Your location is here", xy=(longitude,latitude), xytext=(longitude+1,latitude+1), fontsize=20,
arrowprops=dict(arrowstyle="->"))
prices = housing["median_house_value"]
tick_values = np.linspace(prices.min(), prices.max(), 11)
cbar = plt.colorbar(ticks=tick_values/prices.max())
cbar.ax.set_yticklabels(["$%dk"%(round(v/1000)) for v in tick_values], fontsize=14)
cbar.set_label('Median House Value', fontsize=16)
plt.legend(fontsize=16)
save_fig(fig_id)
#plt.show()
path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
return path
def get_sample_data(num_data):
sample_data = []
for i in range(num_data):
samp = housing.sample(1)
longitude = float(samp['longitude'].values[0])
latitude = float(samp['latitude'].values[0])
housing_median_age = float(samp['housing_median_age'].values[0])
total_rooms = float(samp['total_rooms'].values[0])
total_bedrooms = float(samp['total_bedrooms'].values[0])
population = float(samp['population'].values[0])
households = float(samp['households'].values[0])
median_income = float(samp['median_income'].values[0])
sample_data.append([longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income])
return sample_data
def predict_price(longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income):
#import pickle
#loaded_model = pickle.load(open('KNN_classifier.pickle', 'rb'))
#print(loaded_model)
# initialize data of lists.
data = {'longitude':[float(longitude)],
'latitude':[float(latitude)],
'housing_median_age':[float(housing_median_age)],
'total_rooms':[float(total_rooms)],
'total_bedrooms':[float(total_bedrooms)],
'population':[float(population)],
'households':[float(households)],
'median_income':[float(median_income)],
}
#test_features = pd.DataFrame(data, columns=['longitude', 'latitude', 'housing_median_age', 'total_rooms',
# 'total_bedrooms', 'population', 'households', 'median_income'])
# Create DataFrame
test_features = pd.DataFrame(data)
#test_features = test_features.append(data,ignore_index=True)
test_features = test_features.dropna(subset=["total_bedrooms"])
## 3. scale the numeric features in test set.
## important note: do not apply fit function on the test set, using same scalar from training set
test_features_normalized = scaler.transform(test_features)
test_features_normalized
pred = lin_reg.predict(test_features_normalized)[0]
map_file = draw_map_customize(longitude,latitude, fig_id='test',fig_extension='png' )
return pred,map_file
### configure inputs/outputs
set_longitude = gr.inputs.Slider(-124.350000, -114.310000, step=0.5, default=-120, label = 'Longitude')
set_latitude = gr.inputs.Slider(32, 41, step=0.5, default=33, label = 'Latitude')
set_housing_median_age = gr.inputs.Slider(1, 52, step=1, default=10, label = 'Housing_median_age (Year)')
set_total_rooms = gr.inputs.Slider(1, 20, step=5, default=12, label = 'Total_rooms')
set_total_bedrooms = gr.inputs.Slider(1, 10, step=5, default=9, label = 'Total_bedrooms')
set_population = gr.inputs.Slider(3, 35682, step=5, default=10, label = 'Population')
set_households = gr.inputs.Slider(1, 6082, step=5, default=10, label = 'Households')
set_median_income = gr.inputs.Slider(0, 25000000, step=0.5, default=10, label = 'Median_income')
set_label = gr.outputs.Textbox(label="Predicted Housing Prices")
# define output as the single class text
set_out_images = gr.outputs.Image(label="Visualize your location")
### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
interface = gr.Interface(fn=predict_price,
inputs=[set_longitude, set_latitude,set_housing_median_age,set_total_rooms,set_total_bedrooms,set_population,set_households,set_median_income],
outputs=[set_label,set_out_images],
examples_per_page = 2,
examples = get_sample_data(10),
title="Housing Price Prediction",
description= "Click examples below for a quick demo",
theme = 'huggingface',
layout = 'vertical'
)
interface.launch(debug=True)