Spaces:
Sleeping
Sleeping
# Import necessary modules | |
import os | |
import sys | |
import pandas as pd | |
from src.exception import CustomException | |
from src.logger import logging | |
from sklearn.model_selection import train_test_split | |
from dataclasses import dataclass | |
from src.components.data_transformation import DataTransformation, DataTransformationConfig | |
from src.components.model_trainer import ModelTrainerConfig, ModelTrainer | |
# Define a configuration class for data ingestion settings | |
class DataIngestionConfig: | |
train_data_path: str = os.path.join('artifacts', "train.csv") | |
test_data_path: str = os.path.join('artifacts', "test.csv") | |
raw_data_path: str = os.path.join('artifacts', "data.csv") | |
# Define the main class responsible for data ingestion | |
class DataIngestion: | |
def __init__(self): | |
self.ingestion_config = DataIngestionConfig() | |
# Function to initiate data ingestion | |
def initiate_data_ingestion(self): | |
logging.info("Entered the Data Ingestion Method and Components") | |
try: | |
# Read the dataset into a dataframe | |
df = pd.read_csv('EDA/data/data_modified.csv') | |
logging.info('Read the Dataset as Dataframe') | |
# Create the directory for saving data if it doesn't exist | |
os.makedirs(os.path.dirname(self.ingestion_config.train_data_path), exist_ok=True) | |
# Save the raw data to a CSV file | |
df.to_csv(self.ingestion_config.raw_data_path, index=False, header=True) | |
logging.info("Train Test Split Initiated") | |
# Split the dataset into training and testing sets | |
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42) | |
train_set.to_csv(self.ingestion_config.train_data_path, index=False, header=True) | |
test_set.to_csv(self.ingestion_config.test_data_path, index=False, header=True) | |
logging.info("Ingestion of the Data is Completed") | |
# Return the paths of the training and testing data | |
return ( | |
self.ingestion_config.train_data_path, | |
self.ingestion_config.test_data_path | |
) | |
except Exception as e: | |
# Raise a custom exception if an error occurs | |
raise CustomException(e, sys) | |
# Main execution block to run data ingestion | |
if __name__ == "__main__": | |
obj = DataIngestion() | |
train_data, test_data = obj.initiate_data_ingestion() | |
data_transformation = DataTransformation() | |
train_arr, test_arr, preprocessor_file_path = data_transformation.initiate_data_transformation(train_data, test_data) | |
modeltrainer = ModelTrainer() | |
print(modeltrainer.initiate_model_trainer(train_arr, test_arr)) | |