import gradio as gr import numpy as np from PIL import Image import requests import pandas as pd import matplotlib.pyplot as plt import numpy as np import joblib import hopsworks from tqdm import tqdm # Login to hopsworks and get the feature store columnHeaders = ['area','streetName','number','sqm','rooms','soldDate','monthlyFee','monthlyCost','floor','yearBuilt','agency','lat','lon','gdp','unemployment','interestRate'] def downloadModel(): # Download saved Autogluon model from Hopsworks project = hopsworks.login() mr = project.get_model_registry() temp = mr.get_model("xgboost_model", version=3) model_path = temp.download() xgb_model = joblib.load(model_path + "/xgboost_model.pkl") print(xgb_model) return xgb_model def getAddressInfo(streetName, number): address = getAddress(streetName, number) ... lat = None lon = None return lat, lon def getAddress(streetName, number): ... return None def getFinancialInfo(): ... gdp, unemployment, interestRate = None, None, None return gdp, unemployment, interestRate def dateToFloat(date): year, month, day = str(date).split('-') day = day.split(' ')[0] return int(year) + int(month) / 12 + int(day) / 365 def normalize(x, minVal, maxVal, feature): # Not fantastic, but it suffices if feature in ['lat', 'lon'] and x == 0: return 0 res = (x - minVal) / (maxVal - minVal) return min(max(res, 0), 1) def normalizeData(df): # We do this manually because we want the UI to be able to transform the input data the same way featureToMinMax = { 'sqm': (10, 800), 'rooms': (1, 20), 'monthlyFee': (0, 60000), 'monthlyCost': (0, 20000), 'floor': (-3, 35), 'yearBuilt': (1850, 2023), 'lat': (58.8, 60.2), 'lon': (17.5, 19.1), 'gdp': (505.1, 630.14), 'unemployment': (6.36, 8.66), 'interestRate': (-0.5, 2.64), 'price': (1.5e5, 7e7), 'number': (0, 300), 'soldDate': (2010, 2025) } # Extracted from the data # Normalize select numerical values to a value between 0 and 1 print('Normalizing data...') for feature, minMax in tqdm(featureToMinMax.items()): min = minMax[0] max = minMax[1] if feature == 'soldDate': df[feature] = df[feature].apply(lambda x: dateToFloat(x)) df[feature] = df[feature].apply(lambda x: normalize(x, min, max, feature)) return df model = downloadModel() def sthlm(area, streetName, number, sqm, rooms, monthlyFee, monthlyCost, floor, yearBuilt): soldDate = '2021-01-01' # TODO price = None agency = None brf = None lat, lon = getAddressInfo(streetName, number) gdp, unemployment, interestRate = getFinancialInfo(soldDate) # Parse the input so we can run it through the model # Create a dataframe from the input values input_variables = pd.DataFrame( [[area,streetName,number,sqm,rooms,soldDate,monthlyFee,monthlyCost,floor,yearBuilt,agency,lat,lon,gdp,unemployment,interestRate]], columns=columnHeaders) df = normalizeData(input_variables) # Save first row as a numpy array input_list = df.iloc[0].to_numpy() # 'res' is a list of predictions returned as the label. res = model.predict(np.asarray(input_list).reshape(1, -1)) print(res) return 100 # All features present in the sthlm dataset numericalInputs = ['number', 'sqm','rooms', 'monthlyFee','monthlyCost','floor','yearBuilt'] categoricalInputs = ['area'] inputs = [gr.inputs.Textbox(lines=1, label='streetName')] catToInput = { 'feature': ['a', 'b', 'c'] } # Generate the input form for feature in numericalInputs: inputs.append(gr.inputs.Number(default=0, label=feature)) for feature in categoricalInputs: inputs.append(gr.inputs.Dropdown( choices=catToInput.get('feature'), default="a", label=feature)) # Create the interface demo = gr.Interface( fn=sthlm, title="Stockholm Housing Valuation", description="Predict the price of an apartment in Stockholm", allow_flagging="never", inputs=inputs, outputs=['number']) demo.launch()