{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/miniconda3/lib/python3.12/site-packages/threadpoolctl.py:1214: RuntimeWarning: \n", "Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at\n", "the same time. Both libraries are known to be incompatible and this\n", "can cause random crashes or deadlocks on Linux when loaded in the\n", "same Python program.\n", "Using threadpoolctl may cause crashes or deadlocks. For more\n", "information and possible workarounds, please see\n", " https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md\n", "\n", " warnings.warn(msg, RuntimeWarning)\n" ] } ], "source": [ "import json\n", "import pickle\n", "\n", "# Load the saved list of numerical columns\n", "with open('list_num_cols.txt', 'r') as file_1:\n", " combined_columns = json.load(file_1)\n", "\n", "# Load the saved model\n", "with open('model.pkl', 'rb') as file_2:\n", " lr = pickle.load(file_2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Kita akan coba buka yang kita tadi save untuk dipake untuk inference." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Original Dummy Data:\n", " Suburb Rooms Price Distance Bathroom Car Landsize \\\n", "0 Abbotsford 2 1035000.0 2.5 1.0 0.0 156.0 \n", "1 Abbotsford 3 1465000.0 2.5 2.0 0.0 134.0 \n", "2 Abbotsford 4 1600000.0 2.5 1.0 2.0 120.0 \n", "3 Abbotsford 3 1876000.0 2.5 2.0 0.0 245.0 \n", "4 Abbotsford 2 1636000.0 2.5 1.0 2.0 256.0 \n", "\n", " BuildingArea YearBuilt Propertycount \n", "0 79.0 1900.0 4019.0 \n", "1 150.0 1900.0 4019.0 \n", "2 142.0 2014.0 4019.0 \n", "3 210.0 1910.0 4019.0 \n", "4 107.0 1890.0 4019.0 \n" ] } ], "source": [ "import pandas as pd\n", "\n", "# Assuming df_data_dummy is your DataFrame with the data\n", "df_data_dummy = pd.DataFrame({\n", "\n", " \"Suburb\": [\"Abbotsford\", \"Abbotsford\", \"Abbotsford\", \"Abbotsford\", \"Abbotsford\"],\n", " \"Rooms\": [2, 3, 4, 3, 2],\n", " \"Price\": [1035000.0, 1465000.0, 1600000.0, 1876000.0, 1636000.0],\n", " \"Distance\": [2.5, 2.5, 2.5, 2.5, 2.5],\n", " \"Bathroom\": [1.0, 2.0, 1.0, 2.0, 1.0],\n", " \"Car\": [0.0, 0.0, 2.0, 0.0, 2.0],\n", " \"Landsize\": [156.0, 134.0, 120.0, 245.0, 256.0],\n", " \"BuildingArea\": [79.0, 150.0, 142.0, 210.0, 107.0],\n", " \"YearBuilt\": [1900.0, 1900.0, 2014.0, 1910.0, 1890.0],\n", " \"Propertycount\": [4019.0, 4019.0, 4019.0, 4019.0, 4019.0]\n", "\n", "})\n", "\n", "df_dummy_data = pd.DataFrame(df_data_dummy)\n", "print(\"Original Dummy Data:\")\n", "print(df_dummy_data)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Kita akan membuat dataset \"dummy\" baru dan masukan ke dataframe dinamakan \"df_dummy_data\". Kita mau uji nanti dengan linear regression." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Suburb | \n", "Rooms | \n", "Price | \n", "Distance | \n", "Bathroom | \n", "Car | \n", "Landsize | \n", "BuildingArea | \n", "YearBuilt | \n", "Propertycount | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Abbotsford | \n", "2 | \n", "1035000.0 | \n", "2.5 | \n", "1.0 | \n", "0.0 | \n", "156.0 | \n", "79.0 | \n", "1900.0 | \n", "4019.0 | \n", "
1 | \n", "Abbotsford | \n", "3 | \n", "1465000.0 | \n", "2.5 | \n", "2.0 | \n", "0.0 | \n", "134.0 | \n", "150.0 | \n", "1900.0 | \n", "4019.0 | \n", "
2 | \n", "Abbotsford | \n", "4 | \n", "1600000.0 | \n", "2.5 | \n", "1.0 | \n", "2.0 | \n", "120.0 | \n", "142.0 | \n", "2014.0 | \n", "4019.0 | \n", "
3 | \n", "Abbotsford | \n", "3 | \n", "1876000.0 | \n", "2.5 | \n", "2.0 | \n", "0.0 | \n", "245.0 | \n", "210.0 | \n", "1910.0 | \n", "4019.0 | \n", "
4 | \n", "Abbotsford | \n", "2 | \n", "1636000.0 | \n", "2.5 | \n", "1.0 | \n", "2.0 | \n", "256.0 | \n", "107.0 | \n", "1890.0 | \n", "4019.0 | \n", "