File size: 4,176 Bytes
ed2b5ab
 
 
 
 
2a7ee55
b9ed1ac
 
2a7ee55
7c9d88c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5638308
b9ed1ac
 
 
2a7ee55
 
 
 
 
b9ed1ac
 
2a7ee55
 
 
 
 
 
 
 
 
 
 
 
b9ed1ac
 
 
2a7ee55
2304f1b
2a7ee55
2304f1b
2a7ee55
 
9157140
b9ed1ac
2a7ee55
 
a392acb
420f797
2a7ee55
 
 
 
 
 
 
 
2304f1b
2a7ee55
 
 
 
b9ed1ac
 
2a7ee55
b9ed1ac
 
2a7ee55
b9ed1ac
2304f1b
2a7ee55
 
b9ed1ac
 
 
 
 
2a7ee55
b9ed1ac
2a7ee55
 
 
b9ed1ac
2a7ee55
 
b9ed1ac
 
 
 
 
 
 
 
 
2a7ee55
7c9d88c
 
 
 
2a7ee55
 
 
7c9d88c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import json
import requests
from misc import nearest_mrt
import pickle
import os
import pandas as pd
import datetime
from datetime import datetime

def findlast10(postal):
    df = pd.read_json("data/data_features.json", lines=True)
    df_filtered = df[df['Postal']==str(postal)]

    df_output=df_filtered.sort_values(by='transaction_yr', ascending=False).head(10).reset_index(drop=True)

    storey_dict = {
        '01 TO 03': 1, '04 TO 06': 2, '07 TO 09': 3, '10 TO 12': 4,
        '13 TO 15': 5, '16 TO 18': 6, '19 TO 21': 7, '22 TO 24': 8,
        '25 TO 27': 9, '28 TO 30': 10, '31 TO 33': 11, '34 TO 36': 12,
        '37 TO 39': 13, '40 TO 42': 14, '43 TO 45': 15, '46 TO 48': 16,
        '49 TO 51': 17
    }

    # Swap keys and values using dictionary comprehension
    swapped_dict = {value: key for key, value in storey_dict.items()}

    df_output['storey_height']=df_output['storey_height'].apply(lambda x: swapped_dict[x])
    df_out = df_output[['transaction','area','storey_height','resale_price']]

    return df_out


def main_fn(Postal_,age_,town_,storey_,room_):
    #Load model
    filename = 'finalized_model2.sav'
    if os.path.exists("./finalized_model2.sav"):
        model = pickle.load(open(filename, 'rb'))
        print('loaded model')
    else:
        print('failed loading model')

    #extract feature names
    feature_names = model.feature_names_in_.tolist()
    input = [0]*len(feature_names)

    #Set up mrt_list
    mrt_name = []
    mrt_loc = []
    with open('data/mrt_list.json', 'r') as file:
        for line in file:
            item = json.loads(line)
            mrt_name.append(item['MRT'])
            loc = tuple([float(i) for i in item['location']])
            mrt_loc.append(loc)

    # #Test input
    # Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
   
    ##POSTAL
    Postal_input = int(Postal_)
    # Postal_input = 680705
    input[feature_names.index('Postal')] = Postal_input

    ##DISTANCE TO MRT
    search_term = Postal_input
    query_string= 'https://www.onemap.gov.sg/api/common/elastic/search?searchVal={}&returnGeom=Y&getAddrDetails=Y&pageNum=1'.format(search_term)
    resp = requests.get(query_string)
    data = json.loads(resp.content)
    print(query_string)
    print(data)
    chosen_result = data['results'][0]

    #Calculate the distance to nearest MRT
    distance_km, nearest_mr = nearest_mrt(chosen_result['LATITUDE'], chosen_result['LONGITUDE'], mrt_name, mrt_loc)
    input[feature_names.index('distance_mrt')] = distance_km

    ##STOREY
    #Height is input, but then converted to the scale we used for iterating model
    height_input = int(storey_)
    # height_input = 51
    Height = (height_input+2)//3
    input[feature_names.index('storey_height')] = Height

    ##Town
    input[feature_names.index("town")]=town_

    ##Room
    input[feature_names.index("flat_num")]=room_

    ##AGE/ TRANSACTION YEAR [Current default to 2024]
    age_input = int(age_)
    # age_input = 30

    # Get the current date
    current_date = datetime.now()

    input[feature_names.index('age_transation')] = age_input
    input[feature_names.index('transaction_yr')] = current_date.year  #Default to 2024 first

    # Create final_dataframe as input to model
    Actual = dict(zip(feature_names,input))
    Actual_df = pd.DataFrame(Actual, index=[0])

    # Use model to predict adjusted price
    resale_adj_price = model.predict(Actual_df)[0]

    # Readjust back to actual price
    # Calculate the quarter
    quarter = (current_date.month - 1) // 3 + 1
    # Format the quarter in the desired format
    formatted_quarter = f"{quarter}Q{current_date.year}"
    RPI_pd = pd.read_csv('data/RPI_dict.csv', header=None)
    RPI_dict = dict(zip(RPI_pd[0], RPI_pd[1]))
    RPI = float(RPI_dict[formatted_quarter])
    price = resale_adj_price*(RPI/133.9) 

    # Obtain the last 10 transactions with the same postal address
    df = findlast10(Postal_input)

    return (int(price), df)

if __name__ == "__main__":
    Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
    item = main_fn(Postal_,age_,town_,storey_,room_)
    print(item)