Spaces:

Nathanotal
/

stockholmHousingValuation

Runtime error

App Files Files Community

stockholmHousingValuation / app.py

Nathanotal

asdf

9f26ac9 about 2 years ago

raw

history blame

7.87 kB

	import gradio as gr
	import numpy as np
	from PIL import Image
	import requests
	import pandas as pd
	import matplotlib.pyplot as plt
	import numpy as np
	import joblib
	import hopsworks
	from tqdm import tqdm
	import xgboost as xgb
	from geopy.geocoders import Nominatim
	from datetime import date
	from datetime import timedelta

	# Login to hopsworks and get the feature store

	# TODO: Remove brf
	# area;streetName;number;sqm;rooms;soldDate;monthlyFee;monthlyCost;floor;yearBuilt;brf;agency;lat;lon;gdp;unemployment;interestRate
	columnHeaders = ['area','streetName','number','sqm','rooms','soldDate','monthlyFee','monthlyCost','floor','yearBuilt', 'brf','agency','lat','lon','gdp','unemployment','interestRate']

	featureToMinMax = {
	'sqm': (10, 800),
	'rooms': (1, 20),
	'monthlyFee': (0, 60000),
	'monthlyCost': (0, 20000),
	'floor': (-3, 35),
	'yearBuilt': (1850, 2023),
	'lat': (58.8, 60.2),
	'lon': (17.5, 19.1),
	'gdp': (505.1, 630.14),
	'unemployment': (6.36, 8.66),
	'interestRate': (-0.5, 2.64),
	'number': (0, 300),
	'soldDate': (2010, 2025)
	} # Extracted from the data

	def downloadModel():
	# Download saved Autogluon model from Hopsworks
	project = hopsworks.login(api_key_value='OWXnoeaQ1Bg6I0IE.EgaQo2HmubMIzfChCahCK6sQVLs4vyrhj2ODWHcYr0RN9f1gqac2dJjn8p2fXwcQ') # TODO: Remove, lol
	mr = project.get_model_registry()
	temp = mr.get_model("xgboost_model", version=5)
	model_path = temp.download()

	xgb_model = joblib.load(model_path + "/xgboost_model.pkl")
	return xgb_model

	def getAddressInfo(streetName, number):
	streetName = cleanAddress(streetName)
	try:
	return getCoordinatesFromAddress(streetName, number)
	except AddressNotFound:
	return None, None

	def cleanAddress(x):
	# Remove "-" from the street
	x = ''.join(x.split('-'))
	# Remove all zero width spaces, non-breaking spaces and non-breaking hyphens
	x = x.replace('\u200b', '')
	x = x.replace('\u00a0', '')
	x = x.replace('\u2011', '')
	# Remove all soft hyphens
	x = x.replace('\xad', '')
	x = x.replace('\u200c', '')

	x.strip()
	return x

	class AddressNotFound(Exception):
	pass

	def getCoordinatesFromAddress(streetName, number):

	HOST_ADDRESS = '165.227.162.37'
	HOST_PORT = '8080'
	EMAIL = 'nathan.allard@gmail.com'
	DOMAIN = HOST_ADDRESS + ':' + HOST_PORT
	LOCATOR = Nominatim(user_agent=EMAIL, domain=DOMAIN, scheme='http', timeout=10)

	number = str(int(float(number)))
	address = f'{streetName} {number}, Stockholm'

	if number == '0':
	address = f'{streetName}, Stockholm'

	location = LOCATOR.geocode(address)

	if location is None:
	raise AddressNotFound
	else:
	# Return with a precision of 6 decimals (accuracy of <1 meter)
	lat = round(location.latitude, 6)
	lon = round(location.longitude, 6)
	return lat, lon

	def getFinancialInfo(date):
	gdp, unemployment, interestRate = None, None, None
	return 600.0, 7.0, 0

	def dateToFloat(date):
	year, month, day = str(date).split('-')
	day = day.split(' ')[0]
	return int(year) + int(month) / 12 + int(day) / 365

	def normalize(x, minVal, maxVal, feature):
	# Not fantastic
	res = (float(x) - minVal) / (maxVal - minVal)
	return min(max(res, 0), 1)

	def normalizeData(df):
	# We do this manually because we want the UI to be able to transform the input data the same way


	# Normalize select numerical values to a value between 0 and 1
	print('Normalizing data...')
	for feature, minMax in tqdm(featureToMinMax.items()):
	min = minMax[0]
	max = minMax[1]
	if feature == 'soldDate':
	df[feature] = df[feature].apply(lambda x: dateToFloat(x))

	df[feature] = df[feature].apply(lambda x: normalize(x, min, max, feature))

	return df

	def parsePrice(price):
	featureToMinMaxPrice = {
	'price': (1.5e5, 7e7)
	}
	MIN = featureToMinMaxPrice['price'][0]
	MAX = featureToMinMaxPrice['price'][1]
	price = price * MAX + MIN
	return f'{str(int(price))} SEK'

	def xgbFix(df):
	features_to_categorical = ["area", "streetName", "brf", "agency"]

	features_to_float = ["number", "sqm", "rooms", "monthlyFee",
	"monthlyCost", "floor", "yearBuilt", "gdp", "unemployment",
	"interestRate", "lat", "lon", "soldDate"]

	df[features_to_categorical] = df[features_to_categorical].astype("category")
	df[features_to_float] = df[features_to_float].astype(float)
	return df

	model = downloadModel()

	def xgboostPred(df, explanation):
	# Drop categorical features
	df = df.drop(['area', 'streetName', 'brf', 'agency'], axis=1)

	# Save first row as a numpy array
	input_list = df.iloc[0].to_numpy()

	res = model.predict(np.asarray(input_list).reshape(1, -1))

	return res

	def autoPred():
	pass

	def getDates():
	today = date.today()
	inAMonth = today + timedelta(days=30)
	inAYear = today + timedelta(days=365)
	lastYear = today - timedelta(days=365)
	beforeUkraineWar = '2022-02-24'
	threeYearsAgo = today - timedelta(days=365*3)

	dateToExplanation = {
	today.strftime("%Y-%m-%d") : 'today',
	inAMonth.strftime("%Y-%m-%d") : 'in a month',
	inAYear.strftime("%Y-%m-%d") : 'in a year',
	lastYear.strftime("%Y-%m-%d") : 'last year',
	threeYearsAgo.strftime("%Y-%m-%d") : 'three years ago',
	beforeUkraineWar : 'before Russia invaded Ukraine',
	}

	return dateToExplanation


	def sthlm(streetName, area, number, sqm, rooms, monthlyFee, monthlyCost, floor, yearBuilt):
	lat, lon = getAddressInfo(streetName, number)

	# If none
	if lat is None or lon is None:
	return '', 'Address not found in the OpenStreetMap dataset (Nominatim), please try another address'

	agency = 'Notar' # Make fun if categorical works
	brf = 'BRF Kartboken 1' # TODO: remove
	dates = getDates()
	input_variables = pd.DataFrame(
	columns=columnHeaders)

	for soldDate in dates.keys():
	gdp, unemployment, interestRate = getFinancialInfo(soldDate)

	# Parse the input so we can run it through the model
	# Create a dataframe from the input values
	input_variables = input_variables.append(
	pd.DataFrame(
	[[area,streetName,number,sqm,rooms,soldDate,monthlyFee,monthlyCost,floor,yearBuilt,brf,agency,lat,lon,gdp,unemployment,interestRate]], columns=columnHeaders))

	df = normalizeData(input_variables)
	df = xgbFix(df)

	pricePred = xgboostPred(df)
	explanations = dates.values()

	result = []
	for i, pred in enumerate(pricePred):
	explanation = explanations[i]
	result.append(f'Predicted price of the apartment {explanation}: {parsePrice(pred)}')

	return '\n'.join(result), ''



	# All features present in the sthlm dataset
	numericalInputs = ['number', 'sqm','rooms', 'monthlyFee','monthlyCost','floor','yearBuilt']
	categoricalInputs = ['area']
	inputs = [gr.inputs.Textbox(lines=1, label='streetName')]
	catToInput = {
	'feature': ['Bromma', 'Abrahamsberg', 'Akalla']
	}

	# Generate the input form
	for feature in categoricalInputs:
	inputs.append(gr.inputs.Dropdown(
	choices=catToInput.get('feature'), default="a", label=feature))

	for feature in numericalInputs:
	minVal = featureToMinMax[feature][0]
	maxVal = featureToMinMax[feature][1]
	inputs.append(gr.inputs.Number(default=0, label=feature, minimum=minVal, maximum=maxVal))



	# Create the interface
	demo = gr.Interface(
	fn=sthlm,
	title="Stockholm Housing Valuation",
	description="Predict the price of an apartment in Stockholm",
	allow_flagging="never",
	inputs=inputs,
	outputs=['text', 'text'])

	demo.launch()