Spaces:
Runtime error
Runtime error
import gradio as gr | |
import numpy as np | |
from PIL import Image | |
import requests | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import joblib | |
import hopsworks | |
from tqdm import tqdm | |
import xgboost as xgb | |
from geopy.geocoders import Nominatim | |
from datetime import date | |
from datetime import timedelta | |
# Login to hopsworks and get the feature store | |
# TODO: Remove brf | |
# area;streetName;number;sqm;rooms;soldDate;monthlyFee;monthlyCost;floor;yearBuilt;brf;agency;lat;lon;gdp;unemployment;interestRate | |
columnHeaders = ['area','streetName','number','sqm','rooms','soldDate','monthlyFee','monthlyCost','floor','yearBuilt', 'brf','agency','lat','lon','gdp','unemployment','interestRate'] | |
featureToMinMax = { | |
'sqm': (10, 800), | |
'rooms': (1, 20), | |
'monthlyFee': (0, 60000), | |
'monthlyCost': (0, 20000), | |
'floor': (-3, 35), | |
'yearBuilt': (1850, 2023), | |
'lat': (58.8, 60.2), | |
'lon': (17.5, 19.1), | |
'gdp': (505.1, 630.14), | |
'unemployment': (6.36, 8.66), | |
'interestRate': (-0.5, 2.64), | |
'number': (0, 300), | |
'soldDate': (2010, 2025) | |
} # Extracted from the data | |
def downloadModel(): | |
# Download saved Autogluon model from Hopsworks | |
project = hopsworks.login(api_key_value='OWXnoeaQ1Bg6I0IE.EgaQo2HmubMIzfChCahCK6sQVLs4vyrhj2ODWHcYr0RN9f1gqac2dJjn8p2fXwcQ') # TODO: Remove, lol | |
mr = project.get_model_registry() | |
temp = mr.get_model("xgboost_model", version=5) | |
model_path = temp.download() | |
xgb_model = joblib.load(model_path + "/xgboost_model.pkl") | |
return xgb_model | |
def getAddressInfo(streetName, number): | |
streetName = cleanAddress(streetName) | |
try: | |
return getCoordinatesFromAddress(streetName, number) | |
except AddressNotFound: | |
return None, None | |
def cleanAddress(x): | |
# Remove "-" from the street | |
x = ''.join(x.split('-')) | |
# Remove all zero width spaces, non-breaking spaces and non-breaking hyphens | |
x = x.replace('\u200b', '') | |
x = x.replace('\u00a0', '') | |
x = x.replace('\u2011', '') | |
# Remove all soft hyphens | |
x = x.replace('\xad', '') | |
x = x.replace('\u200c', '') | |
x.strip() | |
return x | |
class AddressNotFound(Exception): | |
pass | |
def getCoordinatesFromAddress(streetName, number): | |
HOST_ADDRESS = '165.227.162.37' | |
HOST_PORT = '8080' | |
EMAIL = 'nathan.allard@gmail.com' | |
DOMAIN = HOST_ADDRESS + ':' + HOST_PORT | |
LOCATOR = Nominatim(user_agent=EMAIL, domain=DOMAIN, scheme='http', timeout=10) | |
number = str(int(float(number))) | |
address = f'{streetName} {number}, Stockholm' | |
if number == '0': | |
address = f'{streetName}, Stockholm' | |
location = LOCATOR.geocode(address) | |
if location is None: | |
raise AddressNotFound | |
else: | |
# Return with a precision of 6 decimals (accuracy of <1 meter) | |
lat = round(location.latitude, 6) | |
lon = round(location.longitude, 6) | |
return lat, lon | |
def getFinancialInfo(date): | |
gdp, unemployment, interestRate = None, None, None | |
return 600.0, 7.0, 0 | |
def dateToFloat(date): | |
year, month, day = str(date).split('-') | |
day = day.split(' ')[0] | |
return int(year) + int(month) / 12 + int(day) / 365 | |
def normalize(x, minVal, maxVal, feature): | |
# Not fantastic | |
res = (float(x) - minVal) / (maxVal - minVal) | |
return min(max(res, 0), 1) | |
def normalizeData(df): | |
# We do this manually because we want the UI to be able to transform the input data the same way | |
# Normalize select numerical values to a value between 0 and 1 | |
print('Normalizing data...') | |
for feature, minMax in tqdm(featureToMinMax.items()): | |
min = minMax[0] | |
max = minMax[1] | |
if feature == 'soldDate': | |
df[feature] = df[feature].apply(lambda x: dateToFloat(x)) | |
df[feature] = df[feature].apply(lambda x: normalize(x, min, max, feature)) | |
return df | |
def parsePrice(price): | |
featureToMinMaxPrice = { | |
'price': (1.5e5, 7e7) | |
} | |
MIN = featureToMinMaxPrice['price'][0] | |
MAX = featureToMinMaxPrice['price'][1] | |
price = price * MAX + MIN | |
return f'{str(int(price))} SEK' | |
def xgbFix(df): | |
features_to_categorical = ["area", "streetName", "brf", "agency"] | |
features_to_float = ["number", "sqm", "rooms", "monthlyFee", | |
"monthlyCost", "floor", "yearBuilt", "gdp", "unemployment", | |
"interestRate", "lat", "lon", "soldDate"] | |
df[features_to_categorical] = df[features_to_categorical].astype("category") | |
df[features_to_float] = df[features_to_float].astype(float) | |
return df | |
model = downloadModel() | |
def xgboostPred(df, explanation): | |
# Drop categorical features | |
df = df.drop(['area', 'streetName', 'brf', 'agency'], axis=1) | |
# Save first row as a numpy array | |
input_list = df.iloc[0].to_numpy() | |
res = model.predict(np.asarray(input_list).reshape(1, -1)) | |
return res | |
def autoPred(): | |
pass | |
def getDates(): | |
today = date.today() | |
inAMonth = today + timedelta(days=30) | |
inAYear = today + timedelta(days=365) | |
lastYear = today - timedelta(days=365) | |
beforeUkraineWar = '2022-02-24' | |
threeYearsAgo = today - timedelta(days=365*3) | |
dateToExplanation = { | |
today.strftime("%Y-%m-%d") : 'today', | |
inAMonth.strftime("%Y-%m-%d") : 'in a month', | |
inAYear.strftime("%Y-%m-%d") : 'in a year', | |
lastYear.strftime("%Y-%m-%d") : 'last year', | |
threeYearsAgo.strftime("%Y-%m-%d") : 'three years ago', | |
beforeUkraineWar : 'before Russia invaded Ukraine', | |
} | |
return dateToExplanation | |
def sthlm(streetName, area, number, sqm, rooms, monthlyFee, monthlyCost, floor, yearBuilt): | |
lat, lon = getAddressInfo(streetName, number) | |
# If none | |
if lat is None or lon is None: | |
return '', 'Address not found in the OpenStreetMap dataset (Nominatim), please try another address' | |
agency = 'Notar' # Make fun if categorical works | |
brf = 'BRF Kartboken 1' # TODO: remove | |
dates = getDates() | |
input_variables = pd.DataFrame( | |
columns=columnHeaders) | |
for soldDate in dates.keys(): | |
gdp, unemployment, interestRate = getFinancialInfo(soldDate) | |
# Parse the input so we can run it through the model | |
# Create a dataframe from the input values | |
input_variables = input_variables.append( | |
pd.DataFrame( | |
[[area,streetName,number,sqm,rooms,soldDate,monthlyFee,monthlyCost,floor,yearBuilt,brf,agency,lat,lon,gdp,unemployment,interestRate]], columns=columnHeaders)) | |
df = normalizeData(input_variables) | |
df = xgbFix(df) | |
pricePred = xgboostPred(df) | |
explanations = dates.values() | |
result = [] | |
for i, pred in enumerate(pricePred): | |
explanation = explanations[i] | |
result.append(f'Predicted price of the apartment {explanation}: {parsePrice(pred)}') | |
return '\n'.join(result), '' | |
# All features present in the sthlm dataset | |
numericalInputs = ['number', 'sqm','rooms', 'monthlyFee','monthlyCost','floor','yearBuilt'] | |
categoricalInputs = ['area'] | |
inputs = [gr.inputs.Textbox(lines=1, label='streetName')] | |
catToInput = { | |
'feature': ['Bromma', 'Abrahamsberg', 'Akalla'] | |
} | |
# Generate the input form | |
for feature in categoricalInputs: | |
inputs.append(gr.inputs.Dropdown( | |
choices=catToInput.get('feature'), default="a", label=feature)) | |
for feature in numericalInputs: | |
minVal = featureToMinMax[feature][0] | |
maxVal = featureToMinMax[feature][1] | |
inputs.append(gr.inputs.Number(default=0, label=feature, minimum=minVal, maximum=maxVal)) | |
# Create the interface | |
demo = gr.Interface( | |
fn=sthlm, | |
title="Stockholm Housing Valuation", | |
description="Predict the price of an apartment in Stockholm", | |
allow_flagging="never", | |
inputs=inputs, | |
outputs=['text', 'text']) | |
demo.launch() | |