{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Load data from file into a pandas df" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "File 'hr.csv' loaded successfully. \n", "Found 311 rows, 36 columns\n" ] } ], "source": [ "DATADIR=\"data/\"\n", "FILENAME=None\n", "\n", "while FILENAME is None:\n", " \n", " file_candidate = input(\"Enter file name:\")\n", " if file_candidate == \"\": break\n", " \n", " try:\n", " print(f\"Assesing file '{file_candidate}'...\".ljust(120), end=\"\\r\")\n", " file_path = DATADIR + file_candidate\n", " extension = file_candidate.split(\".\")[-1] \n", " match extension:\n", " case \"csv\":\n", " df = pd.read_csv(file_path)\n", " case \"json\":\n", " df = pd.read_json(file_path)\n", " case \"xlsx\":\n", " df = pd.read_excel(file_path)\n", " case _:\n", " print(f\"Error: Invalid extension '{extension}'\")\n", " continue\n", " print(f\"File '{file_candidate}' loaded successfully.\")\n", " rows, columns = df.shape\n", " print(f\"Found {rows} rows, {columns} columns\")\n", " FILENAME = file_candidate\n", " except FileNotFoundError:\n", " print(f\"Error: '{file_candidate}' doesn't exist in {os.getcwd()}/{DATADIR}\")\n", " except Exception as error:\n", " print(f\"Error: Unable to read file '{file_candidate}' ({str(type(error))}: {error})\".ljust(120))" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Clean data to remove duplicates and rows with missing values." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "DROP_MISSING = True\n", "REMOVE_DUPLICATES = True\n", "\n", "df = df.dropna(how=\"any\" if DROP_MISSING else \"all\")\n", "if REMOVE_DUPLICATES: df = df.drop_duplicates()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }