Meera2602's picture
try1
0928d05
raw
history blame
No virus
4.32 kB
import os
import sys
from pathlib import Path
import pickle
from src.Predictive_Maintenance.logger import logging
from src.Predictive_Maintenance.exception import CustomException
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
from src.Predictive_Maintenance.utils.utils import type_of_failure
ordinal_encoder =OrdinalEncoder(categories=[['L', 'M', 'H']])
label_encoder = LabelEncoder()
scaler = MinMaxScaler()
raw_data_path:str = os.path.join("artifacts", "raw.csv")
class DataTransformationConfig:
preprocessor_csv_file_path: str = os.path.join("artifacts", "preprocessed_csv.csv")
class DataTransformation:
def __init__(self):
self.data_transformation_config = DataTransformationConfig()
def initiate_data_transformation(self, raw_data_path):
try:
df = pd.read_csv(raw_data_path)
logging.info("read data complete")
logging.info(f"df dataframe head: \n {df.head().to_string()}")
# create type of failure column
logging.info("create 'type of failure' column")
df.apply(lambda row: type_of_failure(row.name, df), axis=1)
logging.info("create 'type of failure' column completed successfully")
#df['type_of_failure'].replace(np.NaN, 'no failure', inplace=True)
#drop productid and uid
df.drop(['UDI', 'Product ID'], axis=1, inplace=True)
df.drop(['TWF', 'HDF', 'PWF', 'OSF', 'RNF'], axis=1, inplace=True)
logging.info('removed productid and uid from the dataset')
#convert kalvin to celcius
logging.info('converting kelvin to celcius')
df['Air temperature [c]'] = df['Air temperature [K]'] - 273.15
df['Process temperature [c]'] = df['Process temperature [K]'] - 273.15
df.drop(['Air temperature [K]', 'Process temperature [K]'], axis=1, inplace=True)
logging.info('converted kelvin to celcius successfully')
# categorical Encoding
df['Type'] = ordinal_encoder.fit_transform(df[['Type']])
# lable encoding
df['type_of_failure'] = label_encoder.fit_transform(df['type_of_failure'])
logging.info('Encoding completed successfully')
# feature scaling
scale_cols = ['Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]', 'Air temperature [c]', 'Process temperature [c]']
df_scaled = scaler.fit_transform(df[scale_cols])
with open(Path("artifacts", "scaler.pkl"), "wb") as f:
pickle.dump(scaler, f)
df_scaled = pd.DataFrame(df_scaled)
df_scaled.columns = scale_cols
df.drop(scale_cols, axis=1, inplace=True)
df_scaled = pd.concat([df, df_scaled], axis=1)
logging.info("data scaling stage completed successfully")
# oversampling
logging.info("Oversampling stage begin.............")
smote = SMOTE(sampling_strategy='auto')
X = df_scaled.drop('type_of_failure', axis=1)
y = df_scaled['type_of_failure']
X_resampled, y_resampled = smote.fit_resample(X, y)
df_sampled = pd.concat([X_resampled, y_resampled], axis=1)
logging.info(f"df sampled head: \n {df_sampled.head().to_string()}")
logging.info("Over sampling stage completed successfully")
df_sampled.to_csv(self.data_transformation_config.preprocessor_csv_file_path, index=False)
return df_sampled
except Exception as e:
logging.info("Error occured during DataTransformation stage")
raise CustomException(e, sys)
if __name__ == "__main__":
data_transformation = DataTransformation()
data_transformation.initiate_data_transformation(raw_data_path)