{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/miniconda3/lib/python3.12/site-packages/threadpoolctl.py:1214: RuntimeWarning: \n",
      "Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at\n",
      "the same time. Both libraries are known to be incompatible and this\n",
      "can cause random crashes or deadlocks on Linux when loaded in the\n",
      "same Python program.\n",
      "Using threadpoolctl may cause crashes or deadlocks. For more\n",
      "information and possible workarounds, please see\n",
      "    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md\n",
      "\n",
      "  warnings.warn(msg, RuntimeWarning)\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import pickle\n",
    "\n",
    "# Load the saved list of numerical columns\n",
    "with open('list_num_cols.txt', 'r') as file_1:\n",
    "    combined_columns = json.load(file_1)\n",
    "\n",
    "# Load the saved model\n",
    "with open('model.pkl', 'rb') as file_2:\n",
    "    lr = pickle.load(file_2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Kita akan coba buka yang kita tadi save untuk dipake untuk inference."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original Dummy Data:\n",
      "       Suburb  Rooms      Price  Distance  Bathroom  Car  Landsize  \\\n",
      "0  Abbotsford      2  1035000.0       2.5       1.0  0.0     156.0   \n",
      "1  Abbotsford      3  1465000.0       2.5       2.0  0.0     134.0   \n",
      "2  Abbotsford      4  1600000.0       2.5       1.0  2.0     120.0   \n",
      "3  Abbotsford      3  1876000.0       2.5       2.0  0.0     245.0   \n",
      "4  Abbotsford      2  1636000.0       2.5       1.0  2.0     256.0   \n",
      "\n",
      "   BuildingArea  YearBuilt  Propertycount  \n",
      "0          79.0     1900.0         4019.0  \n",
      "1         150.0     1900.0         4019.0  \n",
      "2         142.0     2014.0         4019.0  \n",
      "3         210.0     1910.0         4019.0  \n",
      "4         107.0     1890.0         4019.0  \n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Assuming df_data_dummy is your DataFrame with the data\n",
    "df_data_dummy = pd.DataFrame({\n",
    "\n",
    "    \"Suburb\": [\"Abbotsford\", \"Abbotsford\", \"Abbotsford\", \"Abbotsford\", \"Abbotsford\"],\n",
    "    \"Rooms\": [2, 3, 4, 3, 2],\n",
    "    \"Price\": [1035000.0, 1465000.0, 1600000.0, 1876000.0, 1636000.0],\n",
    "    \"Distance\": [2.5, 2.5, 2.5, 2.5, 2.5],\n",
    "    \"Bathroom\": [1.0, 2.0, 1.0, 2.0, 1.0],\n",
    "    \"Car\": [0.0, 0.0, 2.0, 0.0, 2.0],\n",
    "    \"Landsize\": [156.0, 134.0, 120.0, 245.0, 256.0],\n",
    "    \"BuildingArea\": [79.0, 150.0, 142.0, 210.0, 107.0],\n",
    "    \"YearBuilt\": [1900.0, 1900.0, 2014.0, 1910.0, 1890.0],\n",
    "    \"Propertycount\": [4019.0, 4019.0, 4019.0, 4019.0, 4019.0]\n",
    "\n",
    "})\n",
    "\n",
    "df_dummy_data = pd.DataFrame(df_data_dummy)\n",
    "print(\"Original Dummy Data:\")\n",
    "print(df_dummy_data)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Kita akan membuat dataset \"dummy\" baru dan masukan ke dataframe dinamakan \"df_dummy_data\". Kita mau uji nanti dengan linear regression."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Suburb</th>\n",
       "      <th>Rooms</th>\n",
       "      <th>Price</th>\n",
       "      <th>Distance</th>\n",
       "      <th>Bathroom</th>\n",
       "      <th>Car</th>\n",
       "      <th>Landsize</th>\n",
       "      <th>BuildingArea</th>\n",
       "      <th>YearBuilt</th>\n",
       "      <th>Propertycount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abbotsford</td>\n",
       "      <td>2</td>\n",
       "      <td>1035000.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>156.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>1900.0</td>\n",
       "      <td>4019.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Abbotsford</td>\n",
       "      <td>3</td>\n",
       "      <td>1465000.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>134.0</td>\n",
       "      <td>150.0</td>\n",
       "      <td>1900.0</td>\n",
       "      <td>4019.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Abbotsford</td>\n",
       "      <td>4</td>\n",
       "      <td>1600000.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>142.0</td>\n",
       "      <td>2014.0</td>\n",
       "      <td>4019.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Abbotsford</td>\n",
       "      <td>3</td>\n",
       "      <td>1876000.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>245.0</td>\n",
       "      <td>210.0</td>\n",
       "      <td>1910.0</td>\n",
       "      <td>4019.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Abbotsford</td>\n",
       "      <td>2</td>\n",
       "      <td>1636000.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>256.0</td>\n",
       "      <td>107.0</td>\n",
       "      <td>1890.0</td>\n",
       "      <td>4019.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Suburb  Rooms      Price  Distance  Bathroom  Car  Landsize  \\\n",
       "0  Abbotsford      2  1035000.0       2.5       1.0  0.0     156.0   \n",
       "1  Abbotsford      3  1465000.0       2.5       2.0  0.0     134.0   \n",
       "2  Abbotsford      4  1600000.0       2.5       1.0  2.0     120.0   \n",
       "3  Abbotsford      3  1876000.0       2.5       2.0  0.0     245.0   \n",
       "4  Abbotsford      2  1636000.0       2.5       1.0  2.0     256.0   \n",
       "\n",
       "   BuildingArea  YearBuilt  Propertycount  \n",
       "0          79.0     1900.0         4019.0  \n",
       "1         150.0     1900.0         4019.0  \n",
       "2         142.0     2014.0         4019.0  \n",
       "3         210.0     1910.0         4019.0  \n",
       "4         107.0     1890.0         4019.0  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_dummy_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_dummy_data_new = df_dummy_data[combined_columns]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "masukan kolom ke data dummy. Berikutnya namakan variable baru."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictions = lr.predict(df_dummy_data_new)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Akan membuat prediksi dengan linear regression di test. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1101277.02454045, 1665725.95948649, 1297970.1974852 ,\n",
       "       1639455.71625785, 1297855.42299958])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Keluarlah prediksi harga rumah di beberapa bulan kedepan. Harganya semua diatas AU$1 Juta."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}