Spaces:
Runtime error
Runtime error
| import os,sys | |
| import numpy as np | |
| import pandas as pd | |
| from sklearn.impute import KNNImputer | |
| from sklearn.pipeline import Pipeline | |
| from networksecurity.constant.training_pipeline import TARGET_COLUMN, DATA_TRANSFORMATION_IMPUTER_PARAMS | |
| from networksecurity.entity.artifact_entity import DataTransformationArtifact, DataValidationArtifact | |
| from networksecurity.entity.config_entity import DataTransformationConfig | |
| from networksecurity.exception.exception import NetworkSecurityException | |
| from networksecurity.logging.logger import logging | |
| from networksecurity.utils.main_utils.utils import save_numpy_array_data, save_object | |
| class DataTransformation: | |
| def __init__(self, | |
| data_transformation_config: DataTransformationConfig, | |
| data_validation_artifact: DataValidationArtifact): | |
| try: | |
| logging.info(f"{'>>'*20} Data Transformation {'<<'*20}") | |
| self.data_transformation_config = data_transformation_config | |
| self.data_validation_artifact = data_validation_artifact | |
| except Exception as e: | |
| raise NetworkSecurityException(e, sys) | |
| def read_data(file_path) -> pd.DataFrame: | |
| try: | |
| return pd.read_csv(file_path) | |
| except Exception as e: | |
| raise NetworkSecurityException(e, sys) | |
| def get_data_transformer_object(cls)-> Pipeline: | |
| """ | |
| It initializes the KNNImputer object with the parameters specfied in the training_pipeline.py file | |
| and returns the pipeline object with the KNNImputer object as the first step. | |
| Args: | |
| cls: DataTransformation | |
| Returns: | |
| Pipeline: Pipeline object with the KNNImputer object as the first step. | |
| """ | |
| logging.info("Entered the get_data_transformer_object method of Data_Transformation class") | |
| try: | |
| imputer = KNNImputer(**DATA_TRANSFORMATION_IMPUTER_PARAMS) | |
| logging.info("Created KNNImputer object with the parameters specified in the training_pipeline.py file") | |
| preprocessor = Pipeline(steps=[("imputer", imputer)]) | |
| return preprocessor | |
| except Exception as e: | |
| raise NetworkSecurityException(e, sys) | |
| def initiate_data_transformation(self)->DataTransformationArtifact: | |
| logging.info("Entered initiate_data_transformation method of DataTransformation class") | |
| try: | |
| logging.info("Starting data transformation") | |
| train_df = DataTransformation.read_data(self.data_validation_artifact.valid_train_file_path) | |
| test_df = DataTransformation.read_data(self.data_validation_artifact.valid_test_file_path) | |
| ## Training dataframe | |
| input_feature_train_df = train_df.drop(columns=[TARGET_COLUMN], axis=1) | |
| target_feature_train_df = train_df[TARGET_COLUMN] | |
| target_feature_train_df = target_feature_train_df.replace(-1, 0) | |
| ## Testing dataframe | |
| input_feature_test_df = test_df.drop(columns=[TARGET_COLUMN], axis=1) | |
| target_feature_test_df = test_df[TARGET_COLUMN] | |
| target_feature_test_df = target_feature_test_df.replace(-1, 0) | |
| preprocessor = self.get_data_transformer_object() | |
| preprocessor_object=preprocessor.fit(input_feature_train_df) | |
| transformed_input_train_feature = preprocessor_object.transform(input_feature_train_df) | |
| transformed_input_test_feature = preprocessor_object.transform(input_feature_test_df) | |
| train_arr = np.c_[transformed_input_train_feature, np.array(target_feature_train_df)] | |
| test_arr = np.c_[transformed_input_test_feature, np.array(target_feature_test_df)] | |
| ## Save numpy array data and preprocessor object | |
| save_numpy_array_data( | |
| file_path=self.data_transformation_config.transformed_train_file_path, | |
| array=train_arr | |
| ) | |
| save_numpy_array_data( | |
| file_path=self.data_transformation_config.transformed_test_file_path, | |
| array=test_arr | |
| ) | |
| save_object( | |
| file_path=self.data_transformation_config.transformed_object_file_path, | |
| obj=preprocessor_object | |
| ) | |
| data_transformation_artifact = DataTransformationArtifact( | |
| transformed_object_file_path=self.data_transformation_config.transformed_object_file_path, | |
| transformed_train_file_path=self.data_transformation_config.transformed_train_file_path, | |
| transformed_test_file_path=self.data_transformation_config.transformed_test_file_path, | |
| ) | |
| return data_transformation_artifact | |
| logging.info("Data transformation completed") | |
| except Exception as e: | |
| raise NetworkSecurityException(e, sys) |