Spaces:

singhjagpreet
/

student-performance

Sleeping

App Files Files Community

singhjagpreet commited on Sep 10, 2023

Commit

bf0670d

•

1 Parent(s): eb2eadc

data trasformation

Browse files

Files changed (9) hide show

logs/09_09_2023_20_29_47.log/09_09_2023_20_29_47.log +9 -0
logs/09_09_2023_20_30_41.log/09_09_2023_20_30_41.log +10 -0
logs/09_09_2023_20_31_40.log/09_09_2023_20_31_40.log +10 -0
src/__pycache__/utils.cpython-310.pyc +0 -0
src/components/__pycache__/__init__.cpython-310.pyc +0 -0
src/components/__pycache__/data_transformation.cpython-310.pyc +0 -0
src/components/data_ingestion.py +7 -1
src/components/data_transformation.py +111 -0
src/utils.py +19 -0

logs/09_09_2023_20_29_47.log/09_09_2023_20_29_47.log ADDED Viewed

	@@ -0,0 +1,9 @@

+[ 2023-09-09 20:29:51,791 ] 25 root - INFO - Entered the data ingestion method or component
+[ 2023-09-09 20:29:51,795 ] 28 root - INFO - read the dataset as dataframe
+[ 2023-09-09 20:29:51,799 ] 37 root - INFO - Train test split initiated
+[ 2023-09-09 20:29:51,804 ] 44 root - INFO - ingestion of data completed
+[ 2023-09-09 20:29:51,806 ] 68 root - INFO - read train and test data completed
+[ 2023-09-09 20:29:51,806 ] 70 root - INFO - obtaining preprocessing object
+[ 2023-09-09 20:29:51,806 ] 44 root - INFO - numerical columns: ['writing_score', 'reading_score']
+[ 2023-09-09 20:29:51,806 ] 51 root - INFO - categorical columns: ['gender', 'race_ethnicity', 'parental_level_of_education', 'lunch', 'test_preparation_course']
+[ 2023-09-09 20:29:51,806 ] 81 root - INFO - applying preprocessing object on training and testing dataframe

logs/09_09_2023_20_30_41.log/09_09_2023_20_30_41.log ADDED Viewed

	@@ -0,0 +1,10 @@

+[ 2023-09-09 20:30:42,388 ] 25 root - INFO - Entered the data ingestion method or component
+[ 2023-09-09 20:30:42,391 ] 28 root - INFO - read the dataset as dataframe
+[ 2023-09-09 20:30:42,394 ] 37 root - INFO - Train test split initiated
+[ 2023-09-09 20:30:42,398 ] 44 root - INFO - ingestion of data completed
+[ 2023-09-09 20:30:42,400 ] 68 root - INFO - read train and test data completed
+[ 2023-09-09 20:30:42,400 ] 70 root - INFO - obtaining preprocessing object
+[ 2023-09-09 20:30:42,400 ] 44 root - INFO - numerical columns: ['writing_score', 'reading_score']
+[ 2023-09-09 20:30:42,400 ] 51 root - INFO - categorical columns: ['gender', 'race_ethnicity', 'parental_level_of_education', 'lunch', 'test_preparation_course']
+[ 2023-09-09 20:30:42,400 ] 81 root - INFO - applying preprocessing object on training and testing dataframe
+[ 2023-09-09 20:30:42,408 ] 100 root - INFO - saved preprocessing object.

logs/09_09_2023_20_31_40.log/09_09_2023_20_31_40.log ADDED Viewed

	@@ -0,0 +1,10 @@

+[ 2023-09-09 20:31:41,630 ] 25 root - INFO - Entered the data ingestion method or component
+[ 2023-09-09 20:31:41,633 ] 28 root - INFO - read the dataset as dataframe
+[ 2023-09-09 20:31:41,635 ] 37 root - INFO - Train test split initiated
+[ 2023-09-09 20:31:41,639 ] 44 root - INFO - ingestion of data completed
+[ 2023-09-09 20:31:41,641 ] 68 root - INFO - read train and test data completed
+[ 2023-09-09 20:31:41,641 ] 70 root - INFO - obtaining preprocessing object
+[ 2023-09-09 20:31:41,641 ] 44 root - INFO - numerical columns: ['writing_score', 'reading_score']
+[ 2023-09-09 20:31:41,641 ] 51 root - INFO - categorical columns: ['gender', 'race_ethnicity', 'parental_level_of_education', 'lunch', 'test_preparation_course']
+[ 2023-09-09 20:31:41,641 ] 81 root - INFO - applying preprocessing object on training and testing dataframe
+[ 2023-09-09 20:31:41,648 ] 100 root - INFO - saved preprocessing object.

src/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (651 Bytes). View file

src/components/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (158 Bytes). View file

src/components/__pycache__/data_transformation.cpython-310.pyc ADDED Viewed

Binary file (3.17 kB). View file

src/components/data_ingestion.py CHANGED Viewed

@@ -7,6 +7,9 @@ import pandas as pd
 from sklearn.model_selection import train_test_split
 from dataclasses import dataclass
 @dataclass
 class DataIngestionConfig:
     train_data_path: str=os.path.join('artifacts','train.csv')
@@ -49,4 +52,7 @@ class DataIngestion:
 if __name__ == '__main__':
     obj=DataIngestion()
-    obj.intiate_data_ingestion()

 from sklearn.model_selection import train_test_split
 from dataclasses import dataclass
+from src.components.data_transformation import DataTransformation,DataTransformationConfig
 @dataclass
 class DataIngestionConfig:
     train_data_path: str=os.path.join('artifacts','train.csv')
 if __name__ == '__main__':
     obj=DataIngestion()
+    train_data_path, test_data_path = obj.intiate_data_ingestion()
+    data_transformation = DataTransformation()
+    data_transformation.initiate_data_tranformation(train_data_path,test_data_path)

src/components/data_transformation.py CHANGED Viewed

	@@ -0,0 +1,111 @@

+import sys
+import os
+from dataclasses import dataclass
+import numpy as np
+import pandas as pd
+from sklearn.compose import ColumnTransformer
+from sklearn.impute import SimpleImputer
+from sklearn.preprocessing import OneHotEncoder,StandardScaler
+from sklearn.pipeline import Pipeline
+from src.exception import CustomException
+from src.logger import logging
+from src.utils import save_object
+@dataclass
+class DataTransformationConfig:
+    preprocessor_ob_file_path = os.path.join('artifacts','preprocessor.pkl')
+class DataTransformation:
+    def __init__(self):
+        self.data_transformation_config = DataTransformationConfig()
+        self.numerical_columns = ['writing_score','reading_score']
+        self.categorical_columns = ['gender',
+                                'race_ethnicity',
+                                'parental_level_of_education',
+                                'lunch',
+                                'test_preparation_course']
+        self.target_column_name = 'math_score'
+    def get_data_transformer_object(self):
+        """
+        function performs data transformation
+        """
+        try:
+            num_pipeline = Pipeline(steps=[
+            ('imputer',SimpleImputer(strategy='median')),
+            ('scaller',StandardScaler())
+        ])
+            logging.info(f"numerical columns: {self.numerical_columns}")
+            cat_pipeline = Pipeline(steps=[
+            ('imputer',SimpleImputer(strategy='most_frequent')),
+            ('ohe',OneHotEncoder(drop='first',handle_unknown='ignore'))
+        ])
+            logging.info(f"categorical columns: {self.categorical_columns}")
+            preprocessor = ColumnTransformer([
+                ('num_pipeline',num_pipeline,self.numerical_columns),
+                ('cat_pipeline',cat_pipeline,self.categorical_columns)
+            ]
+            )
+            return preprocessor
+        except Exception as e:
+            raise CustomException(e,sys)
+    def initiate_data_tranformation(self, train_path, test_path):
+        try:
+            train_df = pd.read_csv(train_path)
+            test_df = pd.read_csv(test_path)
+            logging.info('read train and test data completed')
+            logging.info('obtaining preprocessing object')
+            preprocessing_obj = self.get_data_transformer_object()
+            input_feature_train_df = train_df.drop(self.target_column_name,axis=1)
+            target_feature_train_df = train_df[self.target_column_name]
+            input_feature_test_df = test_df.drop(self.target_column_name,axis=1)
+            target_feature_test_df = test_df[self.target_column_name]
+            logging.info(f"applying preprocessing object on training and testing dataframe")
+            input_feature_train_arr = preprocessing_obj.fit_transform(input_feature_train_df)
+            input_feature_test_arr = preprocessing_obj.transform(input_feature_test_df)
+            train_arr = np.c_[
+                input_feature_train_arr, np.array(target_feature_train_df)
+            ]
+            test_arr = np.c_[
+                input_feature_test_arr, np.array(target_feature_test_df)
+            ]
+            save_object(
+                file_path = self.data_transformation_config.preprocessor_ob_file_path,
+                obj = preprocessing_obj
+            )
+            logging.info(f"saved preprocessing object.")
+            return (
+                train_arr,
+                test_arr,
+                self.data_transformation_config.preprocessor_ob_file_path
+            )
+        except Exception as e:
+            raise CustomException(e,sys)

src/utils.py CHANGED Viewed

	@@ -0,0 +1,19 @@

+import os
+import sys
+import numpy as np
+import pandas as pd
+import pickle
+from src.exception import CustomException
+def save_object(file_path, obj):
+    try:
+        dir_path = os.path.dirname(file_path)
+        os.makedirs(dir_path,exist_ok=True)
+        with open(file_path, 'wb') as file_obj:
+            pickle.dump(obj,file_obj)
+    except Exception as e:
+        raise CustomException(e,sys)