sivakornchong's picture
redirect model directory, change default values
4008330
import json
import requests
from misc import nearest_mrt
import pickle
import os
import pandas as pd
import datetime
from datetime import datetime
def findlast10(postal):
df = pd.read_json("data/data_features.json", lines=True)
df_filtered = df[df["Postal"] == str(postal)]
df_output = df_filtered.sort_values(by="transaction_yr", ascending=False).head(10).reset_index(drop=True)
storey_dict = {
"01 TO 03": 1,
"04 TO 06": 2,
"07 TO 09": 3,
"10 TO 12": 4,
"13 TO 15": 5,
"16 TO 18": 6,
"19 TO 21": 7,
"22 TO 24": 8,
"25 TO 27": 9,
"28 TO 30": 10,
"31 TO 33": 11,
"34 TO 36": 12,
"37 TO 39": 13,
"40 TO 42": 14,
"43 TO 45": 15,
"46 TO 48": 16,
"49 TO 51": 17,
}
# Swap keys and values using dictionary comprehension
swapped_dict = {value: key for key, value in storey_dict.items()}
df_output["storey_height"] = df_output["storey_height"].apply(lambda x: swapped_dict[x])
df_out = df_output[["transaction", "area", "storey_height", "resale_price"]]
return df_out
def main_fn(Postal_, age_, town_, storey_, room_):
# Load model
filename = "model/model.pkl"
if os.path.exists(filename):
model = pickle.load(open(filename, "rb"))
print("loaded model")
else:
print("failed loading model")
# extract feature names
feature_names = model.feature_names_in_.tolist()
input = [0] * len(feature_names)
# Set up mrt_list
mrt_name = []
mrt_loc = []
with open("data/mrt_list.json", "r") as file:
for line in file:
item = json.loads(line)
mrt_name.append(item["MRT"])
loc = tuple([float(i) for i in item["location"]])
mrt_loc.append(loc)
# #Test input
# Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
##POSTAL
Postal_input = int(Postal_)
# Postal_input = 680705
input[feature_names.index("Postal")] = Postal_input
##DISTANCE TO MRT
search_term = Postal_input
query_string = "https://www.onemap.gov.sg/api/common/elastic/search?searchVal={}&returnGeom=Y&getAddrDetails=Y&pageNum=1".format(
search_term
)
resp = requests.get(query_string)
data = json.loads(resp.content)
print(query_string)
print(data)
chosen_result = data["results"][0]
# Calculate the distance to nearest MRT
distance_km, nearest_mr = nearest_mrt(chosen_result["LATITUDE"], chosen_result["LONGITUDE"], mrt_name, mrt_loc)
input[feature_names.index("distance_mrt")] = distance_km
##STOREY
# Height is input, but then converted to the scale we used for iterating model
height_input = int(storey_)
# height_input = 51
Height = (height_input + 2) // 3
input[feature_names.index("storey_height")] = Height
##Town
input[feature_names.index("town")] = town_
##Room
input[feature_names.index("flat_num")] = room_
##AGE/ TRANSACTION YEAR [Current default to 2024]
age_input = int(age_)
# age_input = 30
# Get the current date
current_date = datetime.now()
input[feature_names.index("age_transation")] = age_input
input[feature_names.index("transaction_yr")] = current_date.year # Default to 2024 first
# Create final_dataframe as input to model
Actual = dict(zip(feature_names, input))
Actual_df = pd.DataFrame(Actual, index=[0])
# Use model to predict adjusted price
resale_adj_price = model.predict(Actual_df)[0]
# Readjust back to actual price
# Calculate the quarter
quarter = (current_date.month - 1) // 3 + 1
# Format the quarter in the desired format
formatted_quarter = f"{quarter}Q{current_date.year}"
RPI_pd = pd.read_csv("data/RPI_dict.csv", header=None)
RPI_dict = dict(zip(RPI_pd[0], RPI_pd[1]))
RPI = float(RPI_dict[formatted_quarter])
price = resale_adj_price * (RPI / 133.9)
# Obtain the last 10 transactions with the same postal address
df = findlast10(Postal_input)
return (int(price), df)
if __name__ == "__main__":
Postal_, age_, town_, storey_, room_ = 680705, 30, "CHOA CHU KANG", 12, "5 ROOM"
item = main_fn(Postal_, age_, town_, storey_, room_)
print(item)