Spaces:
Sleeping
Sleeping
import os | |
import sys | |
from src.exception import CustomException | |
from src.logger import logging | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
from dataclasses import dataclass | |
from src.components.data_transformation import DataTransformation,DataTransformationConfig | |
from src.components.model_trainer import ModelTrainerConfig,ModelTrainer | |
class DataIngestionConfig: | |
train_data_path: str=os.path.join('artifacts','train.csv') | |
test_data_path: str = os.path.join('artifacts','test.csv') | |
raw_data_path: str = os.path.join('artifacts','data.csv') | |
class DataIngestion: | |
def __init__(self): | |
self.ingestion_config = DataIngestionConfig() | |
def intiate_data_ingestion(self): | |
logging.info("Entered the data ingestion method or component") | |
try: | |
df=pd.read_csv('notebook/data/stud.csv') | |
logging.info('read the dataset as dataframe') | |
## make dir | |
os.makedirs(os.path.dirname(self.ingestion_config.train_data_path),exist_ok=True) | |
## save raw data | |
df.to_csv(self.ingestion_config.raw_data_path,index=False,header=True) | |
## train test split | |
logging.info('Train test split initiated') | |
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42) | |
train_set.to_csv(self.ingestion_config.train_data_path,index=False,header=True) | |
test_set.to_csv(self.ingestion_config.test_data_path,index=False, header=True) | |
logging.info("ingestion of data completed") | |
return ( | |
self.ingestion_config.train_data_path, | |
self.ingestion_config.test_data_path, | |
) | |
except Exception as e: | |
raise CustomException(e,sys) | |
if __name__ == '__main__': | |
obj=DataIngestion() | |
train_data_path, test_data_path = obj.intiate_data_ingestion() | |
data_transformation = DataTransformation() | |
train_arr, test_arr,_ = data_transformation.initiate_data_tranformation(train_data_path,test_data_path) | |
modelTrainer=ModelTrainer() | |
print(modelTrainer.initiate_model_trainer(train_array=train_arr,test_array=test_arr)) | |