Nathanotal commited on
Commit
725441e
1 Parent(s): 11d8589

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +144 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from PIL import Image
4
+ import requests
5
+ import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ import numpy as np
8
+ import joblib
9
+ import hopsworks
10
+ from tqdm import tqdm
11
+
12
+ # Login to hopsworks and get the feature store
13
+
14
+
15
+
16
+ columnHeaders = ['area','streetName','number','sqm','rooms','soldDate','monthlyFee','monthlyCost','floor','yearBuilt','agency','lat','lon','gdp','unemployment','interestRate']
17
+
18
+
19
+ def downloadModel():
20
+ # Download saved Autogluon model from Hopsworks
21
+ project = hopsworks.login()
22
+ mr = project.get_model_registry()
23
+ temp = mr.get_model("xgboost_model", version=3)
24
+ model_path = temp.download()
25
+
26
+ xgb_model = joblib.load(model_path + "/xgboost_model.pkl")
27
+ print(xgb_model)
28
+ return xgb_model
29
+
30
+ def getAddressInfo(streetName, number):
31
+ address = getAddress(streetName, number)
32
+ ...
33
+ lat = None
34
+ lon = None
35
+ return lat, lon
36
+
37
+ def getAddress(streetName, number):
38
+ ...
39
+ return None
40
+
41
+ def getFinancialInfo():
42
+ ...
43
+ gdp, unemployment, interestRate = None, None, None
44
+ return gdp, unemployment, interestRate
45
+
46
+ def dateToFloat(date):
47
+ year, month, day = str(date).split('-')
48
+ day = day.split(' ')[0]
49
+ return int(year) + int(month) / 12 + int(day) / 365
50
+
51
+ def normalize(x, minVal, maxVal, feature):
52
+ # Not fantastic, but it suffices
53
+ if feature in ['lat', 'lon'] and x == 0:
54
+ return 0
55
+
56
+ res = (x - minVal) / (maxVal - minVal)
57
+ return min(max(res, 0), 1)
58
+
59
+ def normalizeData(df):
60
+ # We do this manually because we want the UI to be able to transform the input data the same way
61
+ featureToMinMax = {
62
+ 'sqm': (10, 800),
63
+ 'rooms': (1, 20),
64
+ 'monthlyFee': (0, 60000),
65
+ 'monthlyCost': (0, 20000),
66
+ 'floor': (-3, 35),
67
+ 'yearBuilt': (1850, 2023),
68
+ 'lat': (58.8, 60.2),
69
+ 'lon': (17.5, 19.1),
70
+ 'gdp': (505.1, 630.14),
71
+ 'unemployment': (6.36, 8.66),
72
+ 'interestRate': (-0.5, 2.64),
73
+ 'price': (1.5e5, 7e7),
74
+ 'number': (0, 300),
75
+ 'soldDate': (2010, 2025)
76
+ } # Extracted from the data
77
+
78
+ # Normalize select numerical values to a value between 0 and 1
79
+ print('Normalizing data...')
80
+ for feature, minMax in tqdm(featureToMinMax.items()):
81
+ min = minMax[0]
82
+ max = minMax[1]
83
+ if feature == 'soldDate':
84
+ df[feature] = df[feature].apply(lambda x: dateToFloat(x))
85
+
86
+ df[feature] = df[feature].apply(lambda x: normalize(x, min, max, feature))
87
+
88
+ return df
89
+
90
+
91
+ model = downloadModel()
92
+
93
+ def sthlm(area, streetName, number, sqm, rooms, monthlyFee, monthlyCost, floor, yearBuilt):
94
+ soldDate = '2021-01-01' # TODO
95
+ price = None
96
+ agency = None
97
+ brf = None
98
+ lat, lon = getAddressInfo(streetName, number)
99
+ gdp, unemployment, interestRate = getFinancialInfo(soldDate)
100
+
101
+ # Parse the input so we can run it through the model
102
+ # Create a dataframe from the input values
103
+ input_variables = pd.DataFrame(
104
+ [[area,streetName,number,sqm,rooms,soldDate,monthlyFee,monthlyCost,floor,yearBuilt,agency,lat,lon,gdp,unemployment,interestRate]], columns=columnHeaders)
105
+
106
+ df = normalizeData(input_variables)
107
+
108
+ # Save first row as a numpy array
109
+ input_list = df.iloc[0].to_numpy()
110
+
111
+ # 'res' is a list of predictions returned as the label.
112
+ res = model.predict(np.asarray(input_list).reshape(1, -1))
113
+
114
+ print(res)
115
+
116
+ return 100
117
+
118
+
119
+ # All features present in the sthlm dataset
120
+ numericalInputs = ['number', 'sqm','rooms', 'monthlyFee','monthlyCost','floor','yearBuilt']
121
+ categoricalInputs = ['area']
122
+ inputs = [gr.inputs.Textbox(lines=1, label='streetName')]
123
+ catToInput = {
124
+ 'feature': ['a', 'b', 'c']
125
+ }
126
+
127
+ # Generate the input form
128
+ for feature in numericalInputs:
129
+ inputs.append(gr.inputs.Number(default=0, label=feature))
130
+
131
+ for feature in categoricalInputs:
132
+ inputs.append(gr.inputs.Dropdown(
133
+ choices=catToInput.get('feature'), default="a", label=feature))
134
+
135
+ # Create the interface
136
+ demo = gr.Interface(
137
+ fn=sthlm,
138
+ title="Stockholm Housing Valuation",
139
+ description="Predict the price of an apartment in Stockholm",
140
+ allow_flagging="never",
141
+ inputs=inputs,
142
+ outputs=['number'])
143
+
144
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ hopsworks
2
+ joblib
3
+ gradio
4
+ numpy
5
+ pandas
6
+ requests
7
+ scikit-learn
8
+ tqdm