Nathanotal commited on
Commit
5e80c1b
1 Parent(s): 12299c2

fix financial data

Browse files
app.py CHANGED
@@ -76,6 +76,57 @@ def getAddressInfo(streetName, number):
76
  except AddressNotFound:
77
  return None, None
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def cleanAddress(x):
80
  # Remove "-" from the street
81
  x = ''.join(x.split('-'))
@@ -227,21 +278,22 @@ def sthlm(streetName, number, sqm, rooms, monthlyFee, monthlyCost, floor, yearBu
227
  columns=columnHeaders)
228
 
229
  for soldDate in dates.keys():
230
- gdp, unemployment, interestRate = getFinancialInfo(soldDate)
231
 
232
  # Parse the input so we can run it through the model
233
  # Create a dataframe from the input values
234
  input_variables = input_variables.append(
235
  pd.DataFrame(
236
- [[streetName,number,sqm,rooms,soldDate,monthlyFee,monthlyCost,floor,yearBuilt,brf,agency,lat,lon,gdp,unemployment,interestRate]], columns=columnHeaders))
237
-
238
- df = normalizeData(input_variables)
 
239
  df = xgbFix(df)
240
 
241
  pricePred = None
242
  if auto:
243
  # pricePred = autoPred(df)
244
- None
245
  else:
246
  pricePred = xgboostPred(df)
247
 
@@ -258,7 +310,7 @@ def sthlm(streetName, number, sqm, rooms, monthlyFee, monthlyCost, floor, yearBu
258
  if diff > 0:
259
  result.append(f'If the apartment would be sold {explanation} it would have been worth more: {parsePrice(pred)} (+{parsePrice(diff)})')
260
  else:
261
- result.append(f'If the apartment would be sold {explanation} it would have been worth less: {parsePrice(pred)} (-{parsePrice(diff)})')
262
 
263
  return '\n'.join(result), ''
264
 
 
76
  except AddressNotFound:
77
  return None, None
78
 
79
+ # Adds the financial data to the apartment data
80
+ def populateApartmentData(aptDf):
81
+ print('Populating with financial data...')
82
+ gdpDf = pd.read_csv(f'./data/historicalGDP.csv', sep=';')
83
+ unemploymentDf = pd.read_csv(f'./data/historicalUnemployment.csv', sep=';')
84
+ interestRateDf = pd.read_csv(f'./data/historicalInterest.csv', sep=';')
85
+ gdpDf = interpolateTime(gdpDf)
86
+ unemploymentDf = interpolateTime(unemploymentDf)
87
+ interestRateDf = interpolateTime(interestRateDf)
88
+ aptDf['gdp'] = aptDf['soldDate'].apply(getValueFromTime, args=(gdpDf,))
89
+ aptDf['unemployment'] = aptDf['soldDate'].apply(getValueFromTime, args=(unemploymentDf,))
90
+ aptDf['interestRate'] = aptDf['soldDate'].apply(getValueFromTime, args=(interestRateDf,))
91
+ return aptDf
92
+
93
+ def interpolateTime(df):
94
+ df['date'] = pd.to_datetime(df['date'])
95
+ df = df.set_index('date')
96
+ df = df.resample('MS').mean()
97
+ df = df.interpolate(method='time')
98
+ return fixChange(df)
99
+
100
+ def getValueFromTime(datetime, dataDf):
101
+ # Get the value from the dataDf at the given datetime
102
+ # If the datetime is not in the dataDf, print the datetime and return '0'
103
+ # First, set the day of the datetime to the first day of the month
104
+ datetime = datetime.replace(day=1)
105
+ try:
106
+ return dataDf.loc[datetime, 'value']
107
+ except KeyError:
108
+ # Try adding one month
109
+ nextMonth = datetime.month + 1
110
+ if nextMonth > 12:
111
+ datetime = datetime.replace(month=1)
112
+ datetime = datetime.replace(year=datetime.year + 1)
113
+
114
+ def fixChange(df):
115
+ # Set change to be the difference between the current and previous price
116
+ df['change'] = df['value'].diff()
117
+ # If the change is Nan set it to 0
118
+ df['change'] = df['change'].fillna(0)
119
+
120
+ return df
121
+
122
+ def fixChange(df):
123
+ # Set change to be the difference between the current and previous price
124
+ df['change'] = df['value'].diff()
125
+ # If the change is Nan set it to 0
126
+ df['change'] = df['change'].fillna(0)
127
+
128
+ return df
129
+
130
  def cleanAddress(x):
131
  # Remove "-" from the street
132
  x = ''.join(x.split('-'))
 
278
  columns=columnHeaders)
279
 
280
  for soldDate in dates.keys():
281
+ # gdp, unemployment, interestRate = getFinancialInfo(soldDate)
282
 
283
  # Parse the input so we can run it through the model
284
  # Create a dataframe from the input values
285
  input_variables = input_variables.append(
286
  pd.DataFrame(
287
+ [[streetName,number,sqm,rooms,soldDate,monthlyFee,monthlyCost,floor,yearBuilt,brf,agency,lat,lon]], columns=columnHeaders))
288
+
289
+ df = populateApartmentData(input_variables)
290
+ df = normalizeData(df)
291
  df = xgbFix(df)
292
 
293
  pricePred = None
294
  if auto:
295
  # pricePred = autoPred(df)
296
+ '', 'Autogluon is not working right now, please try again later'
297
  else:
298
  pricePred = xgboostPred(df)
299
 
 
310
  if diff > 0:
311
  result.append(f'If the apartment would be sold {explanation} it would have been worth more: {parsePrice(pred)} (+{parsePrice(diff)})')
312
  else:
313
+ result.append(f'If the apartment would be sold {explanation} it would have been worth less: {parsePrice(pred)} ({parsePrice(diff)})')
314
 
315
  return '\n'.join(result), ''
316
 
data/historicalGDP.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ date;value;change
2
+ 2025-01-01;631.64;1.70
3
+ 2024-01-01;629.94;1.00
4
+ 2023-01-01;628.94;-1.20
5
+ 2022-01-01;630.14;2.70
6
+ 2021-01-01;627.44;4.80
7
+ 2020-01-01;541.49;-2.94
8
+ 2019-01-01;533.88;1.99
9
+ 2018-01-01;555.46;1.95
10
+ 2017-01-01;541.02;2.57
11
+ 2016-01-01;515.65;2.07
12
+ 2015-01-01;505.10;4.49
13
+ 2014-01-01;581.96;2.66
14
+ 2013-01-01;586.84;1.19
15
+ 2012-01-01;552.48;-0.59
data/historicalInterest.csv ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ date;value
2
+ 2025-12-23;2.84
3
+ 2025-11-23;2.84
4
+ 2025-10-23;2.84
5
+ 2025-09-23;2.84
6
+ 2025-08-23;2.84
7
+ 2025-07-23;2.84
8
+ 2025-06-23;2.84
9
+ 2025-05-23;2.84
10
+ 2025-04-23;2.84
11
+ 2025-03-23;2.84
12
+ 2025-02-23;2.84
13
+ 2025-01-23;2.84
14
+ 2024-12-23;2.84
15
+ 2024-11-23;2.84
16
+ 2024-10-23;2.84
17
+ 2024-09-23;2.84
18
+ 2024-08-23;2.84
19
+ 2024-07-23;2.84
20
+ 2024-06-23;2.84
21
+ 2024-05-23;2.84
22
+ 2024-04-23;2.84
23
+ 2024-03-23;2.84
24
+ 2024-02-23;2.84
25
+ 2024-01-23;2.84
26
+ 2023-12-23;2.84
27
+ 2023-11-23;2.84
28
+ 2023-10-23;2.84
29
+ 2023-09-23;2.64
30
+ 2023-08-23;2.64
31
+ 2023-07-23;2.64
32
+ 2023-06-23;2.64
33
+ 2023-05-23;2.64
34
+ 2023-04-23;2.64
35
+ 2023-03-23;2.64
36
+ 2023-02-23;2.64
37
+ 2023-01-23;2.64
38
+ 2022-12-26;2.64
39
+ 2022-11-30;2.50
40
+ 2022-09-21;1.75
41
+ 2022-07-06;0.75
42
+ 2022-06-08;0.25
43
+ 2022-05-04;0.25
44
+ 2022-02-16;0.00
45
+ 2021-12-01;0.00
46
+ 2021-09-22;0.00
47
+ 2021-07-07;0.00
48
+ 2021-04-28;0.00
49
+ 2021-02-17;0.00
50
+ 2020-12-02;0.00
51
+ 2020-09-23;0.00
52
+ 2020-07-08;0.00
53
+ 2020-04-29;0.00
54
+ 2020-03-18;0.00
55
+ 2020-02-19;0.00
56
+ 2020-01-08;0.00
57
+ 2019-10-30;-0.25
58
+ 2019-09-11;-0.25
59
+ 2019-07-10;-0.25
60
+ 2019-05-08;-0.25
61
+ 2019-02-20;-0.25
62
+ 2019-01-09;-0.25
63
+ 2018-10-31;-0.50
64
+ 2018-09-12;-0.50
65
+ 2018-07-04;-0.50
66
+ 2018-05-02;-0.50
67
+ 2018-02-21;-0.50
68
+ 2018-01-03;-0.50
69
+ 2017-11-01;-0.50
70
+ 2017-09-13;-0.50
71
+ 2017-07-05;-0.50
72
+ 2017-05-03;-0.50
73
+ 2017-02-22;-0.50
74
+ 2016-12-28;-0.50
75
+ 2016-11-02;-0.50
76
+ 2016-09-14;-0.50
77
+ 2016-07-13;-0.50
78
+ 2016-04-27;-0.50
79
+ 2016-02-17;-0.50
80
+ 2015-12-16;-0.35
81
+ 2015-11-04;-0.35
82
+ 2015-09-09;-0.35
83
+ 2015-07-08;-0.35
84
+ 2015-05-06;-0.25
85
+ 2015-03-25;-0.25
86
+ 2015-02-18;-0.10
87
+ 2014-12-17;0.00
88
+ 2014-10-29;0.00
89
+ 2014-09-10;0.25
90
+ 2014-07-09;0.25
91
+ 2014-04-16;0.75
92
+ 2014-02-19;0.75
93
+ 2013-12-18;0.75
94
+ 2013-10-30;1.00
95
+ 2013-09-11;1.00
96
+ 2013-07-10;1.00
97
+ 2013-04-24;1.00
98
+ 2013-02-20;1.00
99
+ 2012-12-19;1.00
100
+ 2012-10-31;1.25
101
+ 2012-09-12;1.25
102
+ 2012-07-11;1.50
103
+ 2012-04-25;1.50
104
+ 2012-02-22;1.50
data/historicalUnemployment.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ date;value;change
2
+ 2025-01-01;8.30;0.00
3
+ 2024-01-01;8.30;0.40
4
+ 2023-01-01;7.90;0.80
5
+ 2022-01-01;7.10;-1.56
6
+ 2021-01-01;8.66;0.37
7
+ 2020-01-01;8.29;1.46
8
+ 2019-01-01;6.83;0.47
9
+ 2018-01-01;6.36;-0.36
10
+ 2017-01-01;6.72;-0.27
11
+ 2016-01-01;6.99;-0.44
12
+ 2015-01-01;7.43;-0.52
13
+ 2014-01-01;7.95;-0.10
14
+ 2013-01-01;8.05;0.07
15
+ 2012-01-01;7.98;0.18