NetworkSecurity / networksecurity /components /data_transformation.py
Inder-26
Data Transformation done
eae2854
import os,sys
import numpy as np
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.pipeline import Pipeline
from networksecurity.constant.training_pipeline import TARGET_COLUMN, DATA_TRANSFORMATION_IMPUTER_PARAMS
from networksecurity.entity.artifact_entity import DataTransformationArtifact, DataValidationArtifact
from networksecurity.entity.config_entity import DataTransformationConfig
from networksecurity.exception.exception import NetworkSecurityException
from networksecurity.logging.logger import logging
from networksecurity.utils.main_utils.utils import save_numpy_array_data, save_object
class DataTransformation:
def __init__(self,
data_transformation_config: DataTransformationConfig,
data_validation_artifact: DataValidationArtifact):
try:
logging.info(f"{'>>'*20} Data Transformation {'<<'*20}")
self.data_transformation_config = data_transformation_config
self.data_validation_artifact = data_validation_artifact
except Exception as e:
raise NetworkSecurityException(e, sys)
@staticmethod
def read_data(file_path) -> pd.DataFrame:
try:
return pd.read_csv(file_path)
except Exception as e:
raise NetworkSecurityException(e, sys)
def get_data_transformer_object(cls)-> Pipeline:
"""
It initializes the KNNImputer object with the parameters specfied in the training_pipeline.py file
and returns the pipeline object with the KNNImputer object as the first step.
Args:
cls: DataTransformation
Returns:
Pipeline: Pipeline object with the KNNImputer object as the first step.
"""
logging.info("Entered the get_data_transformer_object method of Data_Transformation class")
try:
imputer = KNNImputer(**DATA_TRANSFORMATION_IMPUTER_PARAMS)
logging.info("Created KNNImputer object with the parameters specified in the training_pipeline.py file")
preprocessor = Pipeline(steps=[("imputer", imputer)])
return preprocessor
except Exception as e:
raise NetworkSecurityException(e, sys)
def initiate_data_transformation(self)->DataTransformationArtifact:
logging.info("Entered initiate_data_transformation method of DataTransformation class")
try:
logging.info("Starting data transformation")
train_df = DataTransformation.read_data(self.data_validation_artifact.valid_train_file_path)
test_df = DataTransformation.read_data(self.data_validation_artifact.valid_test_file_path)
## Training dataframe
input_feature_train_df = train_df.drop(columns=[TARGET_COLUMN], axis=1)
target_feature_train_df = train_df[TARGET_COLUMN]
target_feature_train_df = target_feature_train_df.replace(-1, 0)
## Testing dataframe
input_feature_test_df = test_df.drop(columns=[TARGET_COLUMN], axis=1)
target_feature_test_df = test_df[TARGET_COLUMN]
target_feature_test_df = target_feature_test_df.replace(-1, 0)
preprocessor = self.get_data_transformer_object()
preprocessor_object=preprocessor.fit(input_feature_train_df)
transformed_input_train_feature = preprocessor_object.transform(input_feature_train_df)
transformed_input_test_feature = preprocessor_object.transform(input_feature_test_df)
train_arr = np.c_[transformed_input_train_feature, np.array(target_feature_train_df)]
test_arr = np.c_[transformed_input_test_feature, np.array(target_feature_test_df)]
## Save numpy array data and preprocessor object
save_numpy_array_data(
file_path=self.data_transformation_config.transformed_train_file_path,
array=train_arr
)
save_numpy_array_data(
file_path=self.data_transformation_config.transformed_test_file_path,
array=test_arr
)
save_object(
file_path=self.data_transformation_config.transformed_object_file_path,
obj=preprocessor_object
)
data_transformation_artifact = DataTransformationArtifact(
transformed_object_file_path=self.data_transformation_config.transformed_object_file_path,
transformed_train_file_path=self.data_transformation_config.transformed_train_file_path,
transformed_test_file_path=self.data_transformation_config.transformed_test_file_path,
)
return data_transformation_artifact
logging.info("Data transformation completed")
except Exception as e:
raise NetworkSecurityException(e, sys)