Spaces:
Sleeping
Sleeping
import sys | |
import os | |
import numpy as np | |
sys.path.append("..") | |
import unittest | |
import pandas as pd | |
from Modules.data_transformer import DataTransformer | |
class TestDataTransformer(unittest.TestCase): | |
def setUp(self): | |
# Initialize DataTransformer object with sample data | |
data = { | |
'A': [1, 2, 3, None, 5], | |
'B': [4, 5, None, 7, 8], | |
'C': ['X', 'Y', 'Z', 'X', 'Y'], | |
'D': ['M', 'N', 'O', 'N', 'P'], | |
'E': [10.1, 20.2, None, 40.4, 50.5], | |
'F': [10.1, 20.2, None, 40.4, None], | |
'G': [None, 20.2, None, 40.4, 50.5] | |
} | |
self.sample_data = pd.DataFrame(data) | |
self.sample_data.to_csv("data.csv", index=False) | |
self.transformer = DataTransformer(self.sample_data.copy()) | |
def test_handle_null_remove(self): | |
# Test removing rows with null values | |
self.transformer.handle_null_remove(['G']) | |
self.assertNotIn(None, self.transformer.data['G']) | |
# self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data)) | |
def test_remove_columns_func(self): | |
# Test removing columns | |
self.transformer.remove_columns_func(['D']) | |
self.assertNotIn('D', self.transformer.data.columns) | |
def test_handle_null_impute(self): | |
# Test imputing null values with mean | |
self.transformer.handle_null_impute('A', 'mean') | |
self.assertFalse(self.transformer.data['A'].isnull().any()) | |
self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data)) | |
# Test imputing null values with mode | |
self.transformer.handle_null_impute('F', 'mode') | |
self.assertFalse(self.transformer.data['F'].isnull().any()) | |
self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data)) | |
# Test imputing null values with 0 | |
self.transformer.handle_null_impute('G', '0') | |
self.assertFalse(self.transformer.data['G'].isnull().any()) | |
self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data)) | |
def test_categorical_to_numerical_func(self): | |
# Test converting categorical columns to numerical | |
self.transformer.categorical_to_numerical_func(['C']) | |
self.assertTrue(any(col.startswith('C_') for col in self.transformer.data.columns)) | |
self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data)) | |
def tearDown(self): | |
# Clean up temporary files generated during tests | |
import os | |
os.remove("data.csv") | |
if __name__ == '__main__': | |
unittest.main() | |