|
import os |
|
import sys |
|
import shutil |
|
from src.logger import logging |
|
from src.exception import PlantException |
|
from src.entity.config_entity import DataValidationConfig |
|
from src.entity.artifact_entity import DataIngestionArtifact |
|
from src.entity.artifact_entity import DataValidationArtifact |
|
|
|
class DataValidation: |
|
def __init__(self,data_ingestion_artifact: DataIngestionArtifact, |
|
data_validation_config: DataValidationConfig): |
|
|
|
try: |
|
self.data_ingestion_artifact = data_ingestion_artifact |
|
self.data_validation_config = data_validation_config |
|
|
|
except Exception as e: |
|
raise PlantException(e, sys) |
|
|
|
|
|
def validate_all_files_exist(self) -> bool: |
|
try: |
|
|
|
validation_status = None |
|
all_files = os.listdir(self.data_ingestion_artifact.dataset_path) |
|
logging.info(f"File format we got: {all_files}") |
|
|
|
for file in all_files: |
|
if file not in self.data_validation_config.required_file_list: |
|
validation_status = False |
|
os.makedirs(self.data_validation_config.data_validation_dir, exist_ok=True) |
|
|
|
with open(self.data_validation_config.valid_status_file_dir, "w") as f: |
|
f.write(f"Validation status: {validation_status}") |
|
|
|
else: |
|
validation_status = True |
|
os.makedirs(self.data_validation_config.data_validation_dir, exist_ok=True) |
|
with open(self.data_validation_config.valid_status_file_dir, "w") as f: |
|
f.write(f"Validation status: {validation_status}") |
|
|
|
return validation_status |
|
|
|
except Exception as e: |
|
raise PlantException(e, sys) |
|
|
|
def initiate_data_validation(self) -> DataValidationArtifact: |
|
logging.info("Entered initiate_data_validation method of DataValidation class") |
|
try: |
|
status = self.validate_all_files_exist() |
|
data_validation_artifact = DataValidationArtifact(validation_status=status) |
|
|
|
logging.info("Exited initiate_data_validation method of DataValidation class") |
|
logging.info(f"Data validation artifact: {data_validation_artifact}") |
|
|
|
|
|
|
|
|
|
return data_validation_artifact |
|
|
|
except Exception as e: |
|
raise PlantException(e, sys) |