sivakornchong
commited on
Commit
•
7c9d88c
1
Parent(s):
9157140
add code to calculate df
Browse files- .gitattributes +1 -0
- app.py +2 -1
- main.py +29 -3
- main_old.py → src_retired/main_old.py +0 -0
- src_retired/sample.py +32 -0
- test.ipynb +154 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
finalized_model.sav filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
finalized_model.sav filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -17,7 +17,8 @@ iface = gr.Interface(
|
|
17 |
gr.inputs.Dropdown(choices=room_list, type="value", default=None, label='Room')
|
18 |
],
|
19 |
outputs= [
|
20 |
-
gr.
|
|
|
21 |
]
|
22 |
)
|
23 |
iface.launch()
|
|
|
17 |
gr.inputs.Dropdown(choices=room_list, type="value", default=None, label='Room')
|
18 |
],
|
19 |
outputs= [
|
20 |
+
gr.Textbox(type="text", label='Predicted House Price ($)'),
|
21 |
+
gr.Dataframe(row_count = (10, "dynamic"), col_count=(4, "fixed"), label="Past transactions")
|
22 |
]
|
23 |
)
|
24 |
iface.launch()
|
main.py
CHANGED
@@ -7,6 +7,29 @@ import pandas as pd
|
|
7 |
import datetime
|
8 |
from datetime import datetime
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def main_fn(Postal_,age_,town_,storey_,room_):
|
11 |
#Load model
|
12 |
filename = 'finalized_model2.sav'
|
@@ -91,9 +114,12 @@ def main_fn(Postal_,age_,town_,storey_,room_):
|
|
91 |
RPI = float(RPI_dict[formatted_quarter])
|
92 |
price = resale_adj_price*(RPI/133.9)
|
93 |
|
94 |
-
|
|
|
|
|
|
|
95 |
|
96 |
if __name__ == "__main__":
|
97 |
Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
|
98 |
-
|
99 |
-
print(
|
|
|
7 |
import datetime
|
8 |
from datetime import datetime
|
9 |
|
10 |
+
def findlast10(postal):
|
11 |
+
df = pd.read_json("data/data_features.json", lines=True)
|
12 |
+
df_filtered = df[df['Postal']==str(postal)]
|
13 |
+
|
14 |
+
df_output=df_filtered.sort_values(by='transaction_yr', ascending=False).head(10).reset_index(drop=True)
|
15 |
+
|
16 |
+
storey_dict = {
|
17 |
+
'01 TO 03': 1, '04 TO 06': 2, '07 TO 09': 3, '10 TO 12': 4,
|
18 |
+
'13 TO 15': 5, '16 TO 18': 6, '19 TO 21': 7, '22 TO 24': 8,
|
19 |
+
'25 TO 27': 9, '28 TO 30': 10, '31 TO 33': 11, '34 TO 36': 12,
|
20 |
+
'37 TO 39': 13, '40 TO 42': 14, '43 TO 45': 15, '46 TO 48': 16,
|
21 |
+
'49 TO 51': 17
|
22 |
+
}
|
23 |
+
|
24 |
+
# Swap keys and values using dictionary comprehension
|
25 |
+
swapped_dict = {value: key for key, value in storey_dict.items()}
|
26 |
+
|
27 |
+
df_output['storey_height']=df_output['storey_height'].apply(lambda x: swapped_dict[x])
|
28 |
+
df_out = df_output[['transaction','area','storey_height','resale_price']]
|
29 |
+
|
30 |
+
return df_out
|
31 |
+
|
32 |
+
|
33 |
def main_fn(Postal_,age_,town_,storey_,room_):
|
34 |
#Load model
|
35 |
filename = 'finalized_model2.sav'
|
|
|
114 |
RPI = float(RPI_dict[formatted_quarter])
|
115 |
price = resale_adj_price*(RPI/133.9)
|
116 |
|
117 |
+
# Obtain the last 10 transactions with the same postal address
|
118 |
+
df = findlast10(Postal_input)
|
119 |
+
|
120 |
+
return (int(price), df)
|
121 |
|
122 |
if __name__ == "__main__":
|
123 |
Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
|
124 |
+
item = main_fn(Postal_,age_,town_,storey_,room_)
|
125 |
+
print(item)
|
main_old.py → src_retired/main_old.py
RENAMED
File without changes
|
src_retired/sample.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import requests
|
3 |
+
from misc import nearest_mrt
|
4 |
+
import time
|
5 |
+
import pickle
|
6 |
+
import os
|
7 |
+
import pandas as pd
|
8 |
+
|
9 |
+
|
10 |
+
filename = 'finalized_model.sav'
|
11 |
+
|
12 |
+
if os.path.exists("./finalized_model.sav"):
|
13 |
+
model = pickle.load(open(filename, 'rb'))
|
14 |
+
print('loaded model')
|
15 |
+
else:
|
16 |
+
print('failed loading model')
|
17 |
+
|
18 |
+
#extract feature names#
|
19 |
+
feature_names = model.feature_names
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
site_names = feature_names[5:31]
|
24 |
+
town = []
|
25 |
+
for town1 in site_names:
|
26 |
+
town1 = town1[5:]
|
27 |
+
town.append(town1)
|
28 |
+
|
29 |
+
print(town)
|
30 |
+
|
31 |
+
print(len(town))
|
32 |
+
print(len(feature_names))
|
test.ipynb
CHANGED
@@ -402,6 +402,160 @@
|
|
402 |
"price = resale_adj_price*(RPI/133.9) \n",
|
403 |
"price"
|
404 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
}
|
406 |
],
|
407 |
"metadata": {
|
|
|
402 |
"price = resale_adj_price*(RPI/133.9) \n",
|
403 |
"price"
|
404 |
]
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"cell_type": "code",
|
408 |
+
"execution_count": null,
|
409 |
+
"id": "05a4a4c1-cbe7-4623-ace3-2b6c61f1575c",
|
410 |
+
"metadata": {},
|
411 |
+
"outputs": [],
|
412 |
+
"source": []
|
413 |
+
},
|
414 |
+
{
|
415 |
+
"cell_type": "markdown",
|
416 |
+
"id": "686ef72d-5f24-4ed9-bf0f-4fcbc5b3138a",
|
417 |
+
"metadata": {},
|
418 |
+
"source": [
|
419 |
+
"## Find last 10 as dataframe"
|
420 |
+
]
|
421 |
+
},
|
422 |
+
{
|
423 |
+
"cell_type": "code",
|
424 |
+
"execution_count": 106,
|
425 |
+
"id": "7e9134ec-a778-4ca7-accf-4f500c1a493b",
|
426 |
+
"metadata": {},
|
427 |
+
"outputs": [
|
428 |
+
{
|
429 |
+
"data": {
|
430 |
+
"text/html": [
|
431 |
+
"<div>\n",
|
432 |
+
"<style scoped>\n",
|
433 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
434 |
+
" vertical-align: middle;\n",
|
435 |
+
" }\n",
|
436 |
+
"\n",
|
437 |
+
" .dataframe tbody tr th {\n",
|
438 |
+
" vertical-align: top;\n",
|
439 |
+
" }\n",
|
440 |
+
"\n",
|
441 |
+
" .dataframe thead th {\n",
|
442 |
+
" text-align: right;\n",
|
443 |
+
" }\n",
|
444 |
+
"</style>\n",
|
445 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
446 |
+
" <thead>\n",
|
447 |
+
" <tr style=\"text-align: right;\">\n",
|
448 |
+
" <th></th>\n",
|
449 |
+
" <th>transaction</th>\n",
|
450 |
+
" <th>area</th>\n",
|
451 |
+
" <th>storey_height</th>\n",
|
452 |
+
" <th>resale_price</th>\n",
|
453 |
+
" </tr>\n",
|
454 |
+
" </thead>\n",
|
455 |
+
" <tbody>\n",
|
456 |
+
" <tr>\n",
|
457 |
+
" <th>0</th>\n",
|
458 |
+
" <td>2023-04</td>\n",
|
459 |
+
" <td>114.0</td>\n",
|
460 |
+
" <td>07 TO 09</td>\n",
|
461 |
+
" <td>510000</td>\n",
|
462 |
+
" </tr>\n",
|
463 |
+
" <tr>\n",
|
464 |
+
" <th>1</th>\n",
|
465 |
+
" <td>2022-06</td>\n",
|
466 |
+
" <td>132.0</td>\n",
|
467 |
+
" <td>10 TO 12</td>\n",
|
468 |
+
" <td>585000</td>\n",
|
469 |
+
" </tr>\n",
|
470 |
+
" <tr>\n",
|
471 |
+
" <th>2</th>\n",
|
472 |
+
" <td>2022-12</td>\n",
|
473 |
+
" <td>109.0</td>\n",
|
474 |
+
" <td>01 TO 03</td>\n",
|
475 |
+
" <td>470000</td>\n",
|
476 |
+
" </tr>\n",
|
477 |
+
" <tr>\n",
|
478 |
+
" <th>3</th>\n",
|
479 |
+
" <td>2021-03</td>\n",
|
480 |
+
" <td>121.0</td>\n",
|
481 |
+
" <td>07 TO 09</td>\n",
|
482 |
+
" <td>455000</td>\n",
|
483 |
+
" </tr>\n",
|
484 |
+
" <tr>\n",
|
485 |
+
" <th>4</th>\n",
|
486 |
+
" <td>2020-02</td>\n",
|
487 |
+
" <td>109.0</td>\n",
|
488 |
+
" <td>07 TO 09</td>\n",
|
489 |
+
" <td>329000</td>\n",
|
490 |
+
" </tr>\n",
|
491 |
+
" <tr>\n",
|
492 |
+
" <th>5</th>\n",
|
493 |
+
" <td>2019-05</td>\n",
|
494 |
+
" <td>114.0</td>\n",
|
495 |
+
" <td>04 TO 06</td>\n",
|
496 |
+
" <td>330000</td>\n",
|
497 |
+
" </tr>\n",
|
498 |
+
" <tr>\n",
|
499 |
+
" <th>6</th>\n",
|
500 |
+
" <td>2017-09</td>\n",
|
501 |
+
" <td>109.0</td>\n",
|
502 |
+
" <td>10 TO 12</td>\n",
|
503 |
+
" <td>355000</td>\n",
|
504 |
+
" </tr>\n",
|
505 |
+
" </tbody>\n",
|
506 |
+
"</table>\n",
|
507 |
+
"</div>"
|
508 |
+
],
|
509 |
+
"text/plain": [
|
510 |
+
" transaction area storey_height resale_price\n",
|
511 |
+
"0 2023-04 114.0 07 TO 09 510000\n",
|
512 |
+
"1 2022-06 132.0 10 TO 12 585000\n",
|
513 |
+
"2 2022-12 109.0 01 TO 03 470000\n",
|
514 |
+
"3 2021-03 121.0 07 TO 09 455000\n",
|
515 |
+
"4 2020-02 109.0 07 TO 09 329000\n",
|
516 |
+
"5 2019-05 114.0 04 TO 06 330000\n",
|
517 |
+
"6 2017-09 109.0 10 TO 12 355000"
|
518 |
+
]
|
519 |
+
},
|
520 |
+
"execution_count": 106,
|
521 |
+
"metadata": {},
|
522 |
+
"output_type": "execute_result"
|
523 |
+
}
|
524 |
+
],
|
525 |
+
"source": [
|
526 |
+
"df = pd.read_json(\"data/data_features.json\", lines=True)\n",
|
527 |
+
"\n",
|
528 |
+
"Postal_input= 680705\n",
|
529 |
+
"df_filtered = df[df['Postal']==str(Postal_input)]\n",
|
530 |
+
"\n",
|
531 |
+
"df_output=df_filtered.sort_values(by='transaction_yr', ascending=False).head(10).reset_index(drop=True)\n",
|
532 |
+
"\n",
|
533 |
+
"storey_dict = {\n",
|
534 |
+
" '01 TO 03': 1, '04 TO 06': 2, '07 TO 09': 3, '10 TO 12': 4,\n",
|
535 |
+
" '13 TO 15': 5, '16 TO 18': 6, '19 TO 21': 7, '22 TO 24': 8,\n",
|
536 |
+
" '25 TO 27': 9, '28 TO 30': 10, '31 TO 33': 11, '34 TO 36': 12,\n",
|
537 |
+
" '37 TO 39': 13, '40 TO 42': 14, '43 TO 45': 15, '46 TO 48': 16,\n",
|
538 |
+
" '49 TO 51': 17\n",
|
539 |
+
"}\n",
|
540 |
+
"\n",
|
541 |
+
"# Swap keys and values using dictionary comprehension\n",
|
542 |
+
"swapped_dict = {value: key for key, value in storey_dict.items()}\n",
|
543 |
+
"\n",
|
544 |
+
"df_output['storey_height']=df_output['storey_height'].apply(lambda x: swapped_dict[x])\n",
|
545 |
+
"df_out = df_output[['transaction','area','storey_height','resale_price']]\n",
|
546 |
+
"\n",
|
547 |
+
"df_out"
|
548 |
+
]
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"cell_type": "code",
|
552 |
+
"execution_count": 96,
|
553 |
+
"id": "667f879d-a891-4bf2-9f62-ffd96f3e809f",
|
554 |
+
"metadata": {},
|
555 |
+
"outputs": [],
|
556 |
+
"source": [
|
557 |
+
"\n"
|
558 |
+
]
|
559 |
}
|
560 |
],
|
561 |
"metadata": {
|