sivakornchong commited on
Commit
7c9d88c
1 Parent(s): 9157140

add code to calculate df

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  finalized_model.sav filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  finalized_model.sav filter=lfs diff=lfs merge=lfs -text
36
+ *.json filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -17,7 +17,8 @@ iface = gr.Interface(
17
  gr.inputs.Dropdown(choices=room_list, type="value", default=None, label='Room')
18
  ],
19
  outputs= [
20
- gr.outputs.Textbox(type="text", label='Predicted House Price ($)')
 
21
  ]
22
  )
23
  iface.launch()
 
17
  gr.inputs.Dropdown(choices=room_list, type="value", default=None, label='Room')
18
  ],
19
  outputs= [
20
+ gr.Textbox(type="text", label='Predicted House Price ($)'),
21
+ gr.Dataframe(row_count = (10, "dynamic"), col_count=(4, "fixed"), label="Past transactions")
22
  ]
23
  )
24
  iface.launch()
main.py CHANGED
@@ -7,6 +7,29 @@ import pandas as pd
7
  import datetime
8
  from datetime import datetime
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def main_fn(Postal_,age_,town_,storey_,room_):
11
  #Load model
12
  filename = 'finalized_model2.sav'
@@ -91,9 +114,12 @@ def main_fn(Postal_,age_,town_,storey_,room_):
91
  RPI = float(RPI_dict[formatted_quarter])
92
  price = resale_adj_price*(RPI/133.9)
93
 
94
- return int(price)
 
 
 
95
 
96
  if __name__ == "__main__":
97
  Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
98
- price = main_fn(Postal_,age_,town_,storey_,room_)
99
- print(price)
 
7
  import datetime
8
  from datetime import datetime
9
 
10
+ def findlast10(postal):
11
+ df = pd.read_json("data/data_features.json", lines=True)
12
+ df_filtered = df[df['Postal']==str(postal)]
13
+
14
+ df_output=df_filtered.sort_values(by='transaction_yr', ascending=False).head(10).reset_index(drop=True)
15
+
16
+ storey_dict = {
17
+ '01 TO 03': 1, '04 TO 06': 2, '07 TO 09': 3, '10 TO 12': 4,
18
+ '13 TO 15': 5, '16 TO 18': 6, '19 TO 21': 7, '22 TO 24': 8,
19
+ '25 TO 27': 9, '28 TO 30': 10, '31 TO 33': 11, '34 TO 36': 12,
20
+ '37 TO 39': 13, '40 TO 42': 14, '43 TO 45': 15, '46 TO 48': 16,
21
+ '49 TO 51': 17
22
+ }
23
+
24
+ # Swap keys and values using dictionary comprehension
25
+ swapped_dict = {value: key for key, value in storey_dict.items()}
26
+
27
+ df_output['storey_height']=df_output['storey_height'].apply(lambda x: swapped_dict[x])
28
+ df_out = df_output[['transaction','area','storey_height','resale_price']]
29
+
30
+ return df_out
31
+
32
+
33
  def main_fn(Postal_,age_,town_,storey_,room_):
34
  #Load model
35
  filename = 'finalized_model2.sav'
 
114
  RPI = float(RPI_dict[formatted_quarter])
115
  price = resale_adj_price*(RPI/133.9)
116
 
117
+ # Obtain the last 10 transactions with the same postal address
118
+ df = findlast10(Postal_input)
119
+
120
+ return (int(price), df)
121
 
122
  if __name__ == "__main__":
123
  Postal_,age_,town_,storey_,room_ = 680705, 30, 'CHOA CHU KANG', 12, '5 ROOM'
124
+ item = main_fn(Postal_,age_,town_,storey_,room_)
125
+ print(item)
main_old.py → src_retired/main_old.py RENAMED
File without changes
src_retired/sample.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import requests
3
+ from misc import nearest_mrt
4
+ import time
5
+ import pickle
6
+ import os
7
+ import pandas as pd
8
+
9
+
10
+ filename = 'finalized_model.sav'
11
+
12
+ if os.path.exists("./finalized_model.sav"):
13
+ model = pickle.load(open(filename, 'rb'))
14
+ print('loaded model')
15
+ else:
16
+ print('failed loading model')
17
+
18
+ #extract feature names#
19
+ feature_names = model.feature_names
20
+
21
+
22
+
23
+ site_names = feature_names[5:31]
24
+ town = []
25
+ for town1 in site_names:
26
+ town1 = town1[5:]
27
+ town.append(town1)
28
+
29
+ print(town)
30
+
31
+ print(len(town))
32
+ print(len(feature_names))
test.ipynb CHANGED
@@ -402,6 +402,160 @@
402
  "price = resale_adj_price*(RPI/133.9) \n",
403
  "price"
404
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  }
406
  ],
407
  "metadata": {
 
402
  "price = resale_adj_price*(RPI/133.9) \n",
403
  "price"
404
  ]
405
+ },
406
+ {
407
+ "cell_type": "code",
408
+ "execution_count": null,
409
+ "id": "05a4a4c1-cbe7-4623-ace3-2b6c61f1575c",
410
+ "metadata": {},
411
+ "outputs": [],
412
+ "source": []
413
+ },
414
+ {
415
+ "cell_type": "markdown",
416
+ "id": "686ef72d-5f24-4ed9-bf0f-4fcbc5b3138a",
417
+ "metadata": {},
418
+ "source": [
419
+ "## Find last 10 as dataframe"
420
+ ]
421
+ },
422
+ {
423
+ "cell_type": "code",
424
+ "execution_count": 106,
425
+ "id": "7e9134ec-a778-4ca7-accf-4f500c1a493b",
426
+ "metadata": {},
427
+ "outputs": [
428
+ {
429
+ "data": {
430
+ "text/html": [
431
+ "<div>\n",
432
+ "<style scoped>\n",
433
+ " .dataframe tbody tr th:only-of-type {\n",
434
+ " vertical-align: middle;\n",
435
+ " }\n",
436
+ "\n",
437
+ " .dataframe tbody tr th {\n",
438
+ " vertical-align: top;\n",
439
+ " }\n",
440
+ "\n",
441
+ " .dataframe thead th {\n",
442
+ " text-align: right;\n",
443
+ " }\n",
444
+ "</style>\n",
445
+ "<table border=\"1\" class=\"dataframe\">\n",
446
+ " <thead>\n",
447
+ " <tr style=\"text-align: right;\">\n",
448
+ " <th></th>\n",
449
+ " <th>transaction</th>\n",
450
+ " <th>area</th>\n",
451
+ " <th>storey_height</th>\n",
452
+ " <th>resale_price</th>\n",
453
+ " </tr>\n",
454
+ " </thead>\n",
455
+ " <tbody>\n",
456
+ " <tr>\n",
457
+ " <th>0</th>\n",
458
+ " <td>2023-04</td>\n",
459
+ " <td>114.0</td>\n",
460
+ " <td>07 TO 09</td>\n",
461
+ " <td>510000</td>\n",
462
+ " </tr>\n",
463
+ " <tr>\n",
464
+ " <th>1</th>\n",
465
+ " <td>2022-06</td>\n",
466
+ " <td>132.0</td>\n",
467
+ " <td>10 TO 12</td>\n",
468
+ " <td>585000</td>\n",
469
+ " </tr>\n",
470
+ " <tr>\n",
471
+ " <th>2</th>\n",
472
+ " <td>2022-12</td>\n",
473
+ " <td>109.0</td>\n",
474
+ " <td>01 TO 03</td>\n",
475
+ " <td>470000</td>\n",
476
+ " </tr>\n",
477
+ " <tr>\n",
478
+ " <th>3</th>\n",
479
+ " <td>2021-03</td>\n",
480
+ " <td>121.0</td>\n",
481
+ " <td>07 TO 09</td>\n",
482
+ " <td>455000</td>\n",
483
+ " </tr>\n",
484
+ " <tr>\n",
485
+ " <th>4</th>\n",
486
+ " <td>2020-02</td>\n",
487
+ " <td>109.0</td>\n",
488
+ " <td>07 TO 09</td>\n",
489
+ " <td>329000</td>\n",
490
+ " </tr>\n",
491
+ " <tr>\n",
492
+ " <th>5</th>\n",
493
+ " <td>2019-05</td>\n",
494
+ " <td>114.0</td>\n",
495
+ " <td>04 TO 06</td>\n",
496
+ " <td>330000</td>\n",
497
+ " </tr>\n",
498
+ " <tr>\n",
499
+ " <th>6</th>\n",
500
+ " <td>2017-09</td>\n",
501
+ " <td>109.0</td>\n",
502
+ " <td>10 TO 12</td>\n",
503
+ " <td>355000</td>\n",
504
+ " </tr>\n",
505
+ " </tbody>\n",
506
+ "</table>\n",
507
+ "</div>"
508
+ ],
509
+ "text/plain": [
510
+ " transaction area storey_height resale_price\n",
511
+ "0 2023-04 114.0 07 TO 09 510000\n",
512
+ "1 2022-06 132.0 10 TO 12 585000\n",
513
+ "2 2022-12 109.0 01 TO 03 470000\n",
514
+ "3 2021-03 121.0 07 TO 09 455000\n",
515
+ "4 2020-02 109.0 07 TO 09 329000\n",
516
+ "5 2019-05 114.0 04 TO 06 330000\n",
517
+ "6 2017-09 109.0 10 TO 12 355000"
518
+ ]
519
+ },
520
+ "execution_count": 106,
521
+ "metadata": {},
522
+ "output_type": "execute_result"
523
+ }
524
+ ],
525
+ "source": [
526
+ "df = pd.read_json(\"data/data_features.json\", lines=True)\n",
527
+ "\n",
528
+ "Postal_input= 680705\n",
529
+ "df_filtered = df[df['Postal']==str(Postal_input)]\n",
530
+ "\n",
531
+ "df_output=df_filtered.sort_values(by='transaction_yr', ascending=False).head(10).reset_index(drop=True)\n",
532
+ "\n",
533
+ "storey_dict = {\n",
534
+ " '01 TO 03': 1, '04 TO 06': 2, '07 TO 09': 3, '10 TO 12': 4,\n",
535
+ " '13 TO 15': 5, '16 TO 18': 6, '19 TO 21': 7, '22 TO 24': 8,\n",
536
+ " '25 TO 27': 9, '28 TO 30': 10, '31 TO 33': 11, '34 TO 36': 12,\n",
537
+ " '37 TO 39': 13, '40 TO 42': 14, '43 TO 45': 15, '46 TO 48': 16,\n",
538
+ " '49 TO 51': 17\n",
539
+ "}\n",
540
+ "\n",
541
+ "# Swap keys and values using dictionary comprehension\n",
542
+ "swapped_dict = {value: key for key, value in storey_dict.items()}\n",
543
+ "\n",
544
+ "df_output['storey_height']=df_output['storey_height'].apply(lambda x: swapped_dict[x])\n",
545
+ "df_out = df_output[['transaction','area','storey_height','resale_price']]\n",
546
+ "\n",
547
+ "df_out"
548
+ ]
549
+ },
550
+ {
551
+ "cell_type": "code",
552
+ "execution_count": 96,
553
+ "id": "667f879d-a891-4bf2-9f62-ffd96f3e809f",
554
+ "metadata": {},
555
+ "outputs": [],
556
+ "source": [
557
+ "\n"
558
+ ]
559
  }
560
  ],
561
  "metadata": {