{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/miniconda3/lib/python3.12/site-packages/threadpoolctl.py:1214: RuntimeWarning: \n", "Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at\n", "the same time. Both libraries are known to be incompatible and this\n", "can cause random crashes or deadlocks on Linux when loaded in the\n", "same Python program.\n", "Using threadpoolctl may cause crashes or deadlocks. For more\n", "information and possible workarounds, please see\n", " https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md\n", "\n", " warnings.warn(msg, RuntimeWarning)\n" ] } ], "source": [ "import json\n", "import pickle\n", "\n", "# Load the saved list of numerical columns\n", "with open('list_num_cols.txt', 'r') as file_1:\n", " combined_columns = json.load(file_1)\n", "\n", "# Load the saved model\n", "with open('model.pkl', 'rb') as file_2:\n", " lr = pickle.load(file_2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Kita akan coba buka yang kita tadi save untuk dipake untuk inference." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Original Dummy Data:\n", " Suburb Rooms Price Distance Bathroom Car Landsize \\\n", "0 Abbotsford 2 1035000.0 2.5 1.0 0.0 156.0 \n", "1 Abbotsford 3 1465000.0 2.5 2.0 0.0 134.0 \n", "2 Abbotsford 4 1600000.0 2.5 1.0 2.0 120.0 \n", "3 Abbotsford 3 1876000.0 2.5 2.0 0.0 245.0 \n", "4 Abbotsford 2 1636000.0 2.5 1.0 2.0 256.0 \n", "\n", " BuildingArea YearBuilt Propertycount \n", "0 79.0 1900.0 4019.0 \n", "1 150.0 1900.0 4019.0 \n", "2 142.0 2014.0 4019.0 \n", "3 210.0 1910.0 4019.0 \n", "4 107.0 1890.0 4019.0 \n" ] } ], "source": [ "import pandas as pd\n", "\n", "# Assuming df_data_dummy is your DataFrame with the data\n", "df_data_dummy = pd.DataFrame({\n", "\n", " \"Suburb\": [\"Abbotsford\", \"Abbotsford\", \"Abbotsford\", \"Abbotsford\", \"Abbotsford\"],\n", " \"Rooms\": [2, 3, 4, 3, 2],\n", " \"Price\": [1035000.0, 1465000.0, 1600000.0, 1876000.0, 1636000.0],\n", " \"Distance\": [2.5, 2.5, 2.5, 2.5, 2.5],\n", " \"Bathroom\": [1.0, 2.0, 1.0, 2.0, 1.0],\n", " \"Car\": [0.0, 0.0, 2.0, 0.0, 2.0],\n", " \"Landsize\": [156.0, 134.0, 120.0, 245.0, 256.0],\n", " \"BuildingArea\": [79.0, 150.0, 142.0, 210.0, 107.0],\n", " \"YearBuilt\": [1900.0, 1900.0, 2014.0, 1910.0, 1890.0],\n", " \"Propertycount\": [4019.0, 4019.0, 4019.0, 4019.0, 4019.0]\n", "\n", "})\n", "\n", "df_dummy_data = pd.DataFrame(df_data_dummy)\n", "print(\"Original Dummy Data:\")\n", "print(df_dummy_data)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Kita akan membuat dataset \"dummy\" baru dan masukan ke dataframe dinamakan \"df_dummy_data\". Kita mau uji nanti dengan linear regression." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SuburbRoomsPriceDistanceBathroomCarLandsizeBuildingAreaYearBuiltPropertycount
0Abbotsford21035000.02.51.00.0156.079.01900.04019.0
1Abbotsford31465000.02.52.00.0134.0150.01900.04019.0
2Abbotsford41600000.02.51.02.0120.0142.02014.04019.0
3Abbotsford31876000.02.52.00.0245.0210.01910.04019.0
4Abbotsford21636000.02.51.02.0256.0107.01890.04019.0
\n", "
" ], "text/plain": [ " Suburb Rooms Price Distance Bathroom Car Landsize \\\n", "0 Abbotsford 2 1035000.0 2.5 1.0 0.0 156.0 \n", "1 Abbotsford 3 1465000.0 2.5 2.0 0.0 134.0 \n", "2 Abbotsford 4 1600000.0 2.5 1.0 2.0 120.0 \n", "3 Abbotsford 3 1876000.0 2.5 2.0 0.0 245.0 \n", "4 Abbotsford 2 1636000.0 2.5 1.0 2.0 256.0 \n", "\n", " BuildingArea YearBuilt Propertycount \n", "0 79.0 1900.0 4019.0 \n", "1 150.0 1900.0 4019.0 \n", "2 142.0 2014.0 4019.0 \n", "3 210.0 1910.0 4019.0 \n", "4 107.0 1890.0 4019.0 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_dummy_data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "df_dummy_data_new = df_dummy_data[combined_columns]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "masukan kolom ke data dummy. Berikutnya namakan variable baru." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "predictions = lr.predict(df_dummy_data_new)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Akan membuat prediksi dengan linear regression di test. " ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1101277.02454045, 1665725.95948649, 1297970.1974852 ,\n", " 1639455.71625785, 1297855.42299958])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predictions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Keluarlah prediksi harga rumah di beberapa bulan kedepan. Harganya semua diatas AU$1 Juta." ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }