Insights / Testing /test_data_transformer.py
Atharva Thakur
Modules import error
022392e
raw
history blame
2.56 kB
import sys
import os
import numpy as np
sys.path.append("..")
import unittest
import pandas as pd
from Modules.data_transformer import DataTransformer
class TestDataTransformer(unittest.TestCase):
def setUp(self):
# Initialize DataTransformer object with sample data
data = {
'A': [1, 2, 3, None, 5],
'B': [4, 5, None, 7, 8],
'C': ['X', 'Y', 'Z', 'X', 'Y'],
'D': ['M', 'N', 'O', 'N', 'P'],
'E': [10.1, 20.2, None, 40.4, 50.5],
'F': [10.1, 20.2, None, 40.4, None],
'G': [None, 20.2, None, 40.4, 50.5]
}
self.sample_data = pd.DataFrame(data)
self.sample_data.to_csv("data.csv", index=False)
self.transformer = DataTransformer(self.sample_data.copy())
def test_handle_null_remove(self):
# Test removing rows with null values
self.transformer.handle_null_remove(['G'])
self.assertNotIn(None, self.transformer.data['G'])
# self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data))
def test_remove_columns_func(self):
# Test removing columns
self.transformer.remove_columns_func(['D'])
self.assertNotIn('D', self.transformer.data.columns)
def test_handle_null_impute(self):
# Test imputing null values with mean
self.transformer.handle_null_impute('A', 'mean')
self.assertFalse(self.transformer.data['A'].isnull().any())
self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data))
# Test imputing null values with mode
self.transformer.handle_null_impute('F', 'mode')
self.assertFalse(self.transformer.data['F'].isnull().any())
self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data))
# Test imputing null values with 0
self.transformer.handle_null_impute('G', '0')
self.assertFalse(self.transformer.data['G'].isnull().any())
self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data))
def test_categorical_to_numerical_func(self):
# Test converting categorical columns to numerical
self.transformer.categorical_to_numerical_func(['C'])
self.assertTrue(any(col.startswith('C_') for col in self.transformer.data.columns))
self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data))
def tearDown(self):
# Clean up temporary files generated during tests
import os
os.remove("data.csv")
if __name__ == '__main__':
unittest.main()