{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "os.chdir(\"../\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'c:\\\\MLOps\\\\CICD\\\\End-to-End-ML'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%pwd" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "from dataclasses import dataclass\n", "from pathlib import Path\n", "\n", "@dataclass(frozen=True)\n", "class DataTransformationConfig:\n", " root_dir: Path\n", " data_path:Path" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "from mlProject.constants import *\n", "from mlProject.utils.common import read_yaml, create_directories " ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "class ConfigurationManager:\n", " def __init__(\n", " self, \n", " config_filepath=CONFIG_FILE_PATH,\n", " params_filepath=PARAMS_FILE_PATH,\n", " schema_filepath=SCHEMA_FILE_PATH): \n", " \n", " self.config=read_yaml(config_filepath)\n", " # self.params=read_yaml(params_filepath)\n", " self.schema=read_yaml(schema_filepath)\n", " create_directories([self.config.artifacts_root])\n", "\n", " def get_transformation_config(self)->DataTransformationConfig:\n", " config=self.config.data_transformation\n", " create_directories([config.root_dir])\n", "\n", " # create an object by initialization\n", " data_transformation_config=DataTransformationConfig(\n", " root_dir=config.root_dir,\n", " data_path=config.data_path\n", " )\n", "\n", " return data_transformation_config \n", " \n" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "import os\n", "from mlProject import logger\n", "from sklearn.model_selection import train_test_split\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "class DataTransformation:\n", " def __init__(self, config: DataTransformationConfig):\n", " self.config=config\n", "\n", " def train_test_splitting(self):\n", " data=pd.read_csv(self.config.data_path)\n", " data.drop(['Unnamed: 0','RowNumber'], axis=1, inplace=True)\n", " data.set_index('CustomerId', inplace=True)\n", " train,test=train_test_split(data)\n", "\n", " train.to_csv( os.path.join(self.config.root_dir,\"train.csv\"), index=True)\n", " test.to_csv(os.path.join(self.config.root_dir, \"test.csv\"), index=True)\n", "\n", " logger.info(\"Splitted data into training and the test sets\")\n", " logger.info(train.shape)\n", " logger.info(test.shape)\n", "\n", " print(f\"Train Shape :{train.shape}\")\n", " print(f\"Test Shape :{test.shape}\")\n" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2024-03-10 17:28:06,122:INFO:common: yaml file: config\\config.yaml loaded successfully]\n", "[2024-03-10 17:28:06,129:INFO:common: yaml file: schema.yaml loaded successfully]\n", "[2024-03-10 17:28:06,129:INFO:common: created directory at: artifacts]\n", "[2024-03-10 17:28:06,137:INFO:common: created directory at: artifacts/data_transformation]\n", "[2024-03-10 17:28:06,226:INFO:3354807181: Splitted data into training and the test sets]\n", "[2024-03-10 17:28:06,226:INFO:3354807181: (7500, 12)]\n", "[2024-03-10 17:28:06,234:INFO:3354807181: (2500, 12)]\n", "Train Shape :(7500, 12)\n", "Test Shape :(2500, 12)\n" ] } ], "source": [ "config=ConfigurationManager()\n", "data_transformation_config=config.get_transformation_config()\n", "data_transformation=DataTransformation(config=data_transformation_config)\n", "data_transformation.train_test_splitting()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "conda_e2eml", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.18" } }, "nbformat": 4, "nbformat_minor": 2 }