Spaces:
Sleeping
Sleeping
Merge pull request #18 from A-R-Thakur/testing
Browse files- .github/workflows/Build-Test.yml +1 -9
- Modules/data_transformer.py +37 -21
- test_app.py β Testing/test_app.py +3 -1
- Testing/test_data_transformer.py +69 -0
- app.py +1 -0
.github/workflows/Build-Test.yml
CHANGED
@@ -12,15 +12,6 @@ permissions:
|
|
12 |
contents: read
|
13 |
|
14 |
jobs:
|
15 |
-
codacy-analysis-cli:
|
16 |
-
name: Codacy Analysis CLI
|
17 |
-
runs-on: ubuntu-latest
|
18 |
-
steps:
|
19 |
-
- name: Checkout code
|
20 |
-
uses: actions/checkout@main
|
21 |
-
|
22 |
-
- name: Run Codacy Analysis CLI
|
23 |
-
uses: codacy/codacy-analysis-cli-action@master
|
24 |
|
25 |
Test:
|
26 |
|
@@ -49,4 +40,5 @@ jobs:
|
|
49 |
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
50 |
- name: Test with pytest
|
51 |
run: |
|
|
|
52 |
pytest -vv
|
|
|
12 |
contents: read
|
13 |
|
14 |
jobs:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
Test:
|
17 |
|
|
|
40 |
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
41 |
- name: Test with pytest
|
42 |
run: |
|
43 |
+
cd Testing
|
44 |
pytest -vv
|
Modules/data_transformer.py
CHANGED
@@ -5,7 +5,7 @@ import numpy as np
|
|
5 |
class DataTransformer:
|
6 |
def __init__(self, data):
|
7 |
self.data = data
|
8 |
-
|
9 |
|
10 |
def perform_column_operation(self):
|
11 |
column_operation = st.sidebar.text_input('Column operation (e.g., age * 2)')
|
@@ -21,26 +21,17 @@ class DataTransformer:
|
|
21 |
st.subheader("Remove Null Values")
|
22 |
col = st.multiselect('Choose columns to remove nulls', self.data.columns)
|
23 |
if st.button('Remove Null'):
|
24 |
-
self.
|
25 |
st.success("Null values removed")
|
26 |
-
self.data.to_csv("data.csv", index=False)
|
27 |
st.subheader("Impute Null Values")
|
28 |
col = st.multiselect('Choose columns to impute nulls', self.data.select_dtypes(include=[np.number]).columns)
|
29 |
option = st.selectbox('Impute nulls with', ('-Select-','mean', 'mode', '0'))
|
30 |
if st.button('Impute Null'):
|
31 |
try:
|
32 |
-
|
33 |
-
self.data[col] = self.data[col].fillna(self.data[col].mean())
|
34 |
-
elif option == "mode":
|
35 |
-
self.data[col] = self.data[col].fillna(self.data[col].mode().iloc[0])
|
36 |
-
elif option == "0":
|
37 |
-
self.data[col] = self.data[col].fillna(0)
|
38 |
-
elif option == "-Select-":
|
39 |
-
raise ValueError("Select a value")
|
40 |
st.success("Null values filled")
|
41 |
except ValueError as e:
|
42 |
st.error(str(e))
|
43 |
-
self.data.to_csv("data.csv", index=False)
|
44 |
with right:
|
45 |
st.write("Null Stats")
|
46 |
null_counts = self.data.isnull().sum()
|
@@ -65,12 +56,8 @@ class DataTransformer:
|
|
65 |
st.subheader("Convert Categorical to Numerical")
|
66 |
columns_to_encode = st.multiselect('Choose columns to convert', self.data.select_dtypes(include=object).columns)
|
67 |
if st.button('Convert'):
|
68 |
-
|
69 |
-
one_hot_encoded = pd.get_dummies(self.data[col], prefix=col).astype(int)
|
70 |
-
self.data = pd.concat([self.data, one_hot_encoded], axis=1)
|
71 |
-
self.data.drop(col, axis=1, inplace=True)
|
72 |
st.success("Converted categoricals variables")
|
73 |
-
self.data.to_csv("data.csv", index=False)
|
74 |
st.write(self.data.head())
|
75 |
return self.data
|
76 |
|
@@ -78,17 +65,46 @@ class DataTransformer:
|
|
78 |
st.subheader("Remove Columns")
|
79 |
col = st.multiselect('Choose columns to remove', self.data.columns)
|
80 |
if st.button('Remove Columns'):
|
81 |
-
self.
|
82 |
st.success("Columns removed")
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
self.data.to_csv("data.csv", index=False)
|
84 |
return self.data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
#transformed data is not retained
|
88 |
#null values handling
|
89 |
#2 options - to remove or to impute that is the question
|
90 |
#categorical to numerical
|
91 |
|
92 |
# PROBLEMS TO BE ADDRESSED
|
93 |
-
#give option to analyse the transformed dataset or save it.
|
94 |
-
|
|
|
5 |
class DataTransformer:
|
6 |
def __init__(self, data):
|
7 |
self.data = data
|
8 |
+
|
9 |
|
10 |
def perform_column_operation(self):
|
11 |
column_operation = st.sidebar.text_input('Column operation (e.g., age * 2)')
|
|
|
21 |
st.subheader("Remove Null Values")
|
22 |
col = st.multiselect('Choose columns to remove nulls', self.data.columns)
|
23 |
if st.button('Remove Null'):
|
24 |
+
self.handle_null_remove(col)
|
25 |
st.success("Null values removed")
|
|
|
26 |
st.subheader("Impute Null Values")
|
27 |
col = st.multiselect('Choose columns to impute nulls', self.data.select_dtypes(include=[np.number]).columns)
|
28 |
option = st.selectbox('Impute nulls with', ('-Select-','mean', 'mode', '0'))
|
29 |
if st.button('Impute Null'):
|
30 |
try:
|
31 |
+
self.handle_null_impute(col,option)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
st.success("Null values filled")
|
33 |
except ValueError as e:
|
34 |
st.error(str(e))
|
|
|
35 |
with right:
|
36 |
st.write("Null Stats")
|
37 |
null_counts = self.data.isnull().sum()
|
|
|
56 |
st.subheader("Convert Categorical to Numerical")
|
57 |
columns_to_encode = st.multiselect('Choose columns to convert', self.data.select_dtypes(include=object).columns)
|
58 |
if st.button('Convert'):
|
59 |
+
self.categorical_to_numerical_func(columns_to_encode)
|
|
|
|
|
|
|
60 |
st.success("Converted categoricals variables")
|
|
|
61 |
st.write(self.data.head())
|
62 |
return self.data
|
63 |
|
|
|
65 |
st.subheader("Remove Columns")
|
66 |
col = st.multiselect('Choose columns to remove', self.data.columns)
|
67 |
if st.button('Remove Columns'):
|
68 |
+
self.remove_columns_func(col)
|
69 |
st.success("Columns removed")
|
70 |
+
return self.data
|
71 |
+
|
72 |
+
|
73 |
+
#---CORE FUNCTIONALITY---
|
74 |
+
def remove_columns_func(self,col):
|
75 |
+
self.data.drop(columns=col, inplace=True)
|
76 |
self.data.to_csv("data.csv", index=False)
|
77 |
return self.data
|
78 |
+
|
79 |
+
def handle_null_remove(self,col):
|
80 |
+
self.data.dropna(subset=col, inplace=True)
|
81 |
+
print(self.data)
|
82 |
+
self.data.to_csv("data.csv", index=False)
|
83 |
+
|
84 |
+
def handle_null_impute(self,col,option):
|
85 |
+
if option == "mean":
|
86 |
+
self.data[col] = self.data[col].fillna(self.data[col].mean())
|
87 |
+
elif option == "mode":
|
88 |
+
self.data[col] = self.data[col].fillna(self.data[col].mode().iloc[0])
|
89 |
+
elif option == "0":
|
90 |
+
self.data[col] = self.data[col].fillna(0)
|
91 |
+
elif option == "-Select-":
|
92 |
+
raise ValueError("Select an option")
|
93 |
+
self.data.to_csv("data.csv", index=False)
|
94 |
+
|
95 |
|
96 |
+
def categorical_to_numerical_func(self,columns_to_encode):
|
97 |
+
for col in columns_to_encode:
|
98 |
+
one_hot_encoded = pd.get_dummies(self.data[col], prefix=col).astype(int)
|
99 |
+
self.data = pd.concat([self.data, one_hot_encoded], axis=1)
|
100 |
+
self.data.drop(col, axis=1, inplace=True)
|
101 |
+
self.data.to_csv("data.csv", index=False)
|
102 |
+
|
103 |
+
# PROBLEMS RESOLVED
|
104 |
#transformed data is not retained
|
105 |
#null values handling
|
106 |
#2 options - to remove or to impute that is the question
|
107 |
#categorical to numerical
|
108 |
|
109 |
# PROBLEMS TO BE ADDRESSED
|
110 |
+
#give option to analyse the transformed dataset or save it.
|
|
test_app.py β Testing/test_app.py
RENAMED
@@ -1,8 +1,10 @@
|
|
1 |
from streamlit.testing.v1 import AppTest
|
|
|
|
|
2 |
|
3 |
def test_smoke():
|
4 |
"""Basic smoke test"""
|
5 |
-
at = AppTest.from_file("app.py", default_timeout=10).run()
|
6 |
# Supported elements are primarily exposed as properties on the script
|
7 |
# results object, which returns a sequence of that element.
|
8 |
assert not at.exception
|
|
|
1 |
from streamlit.testing.v1 import AppTest
|
2 |
+
# import sys
|
3 |
+
# sys.path.append("..")
|
4 |
|
5 |
def test_smoke():
|
6 |
"""Basic smoke test"""
|
7 |
+
at = AppTest.from_file("../app.py", default_timeout=10).run()
|
8 |
# Supported elements are primarily exposed as properties on the script
|
9 |
# results object, which returns a sequence of that element.
|
10 |
assert not at.exception
|
Testing/test_data_transformer.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
|
6 |
+
sys.path.append("..")
|
7 |
+
|
8 |
+
import unittest
|
9 |
+
import pandas as pd
|
10 |
+
from Modules.data_transformer import DataTransformer
|
11 |
+
|
12 |
+
|
13 |
+
class TestDataTransformer(unittest.TestCase):
|
14 |
+
|
15 |
+
def setUp(self):
|
16 |
+
# Initialize DataTransformer object with sample data
|
17 |
+
data = {
|
18 |
+
'A': [1, 2, 3, None, 5],
|
19 |
+
'B': [4, 5, None, 7, 8],
|
20 |
+
'C': ['X', 'Y', 'Z', 'X', 'Y'],
|
21 |
+
'D': ['M', 'N', 'O', 'N', 'P'],
|
22 |
+
'E': [10.1, 20.2, None, 40.4, 50.5],
|
23 |
+
'F': [10.1, 20.2, None, 40.4, None],
|
24 |
+
'G': [None, 20.2, None, 40.4, 50.5]
|
25 |
+
}
|
26 |
+
self.sample_data = pd.DataFrame(data)
|
27 |
+
self.sample_data.to_csv("data.csv", index=False)
|
28 |
+
self.transformer = DataTransformer(self.sample_data.copy())
|
29 |
+
|
30 |
+
def test_handle_null_remove(self):
|
31 |
+
# Test removing rows with null values
|
32 |
+
self.transformer.handle_null_remove(['G'])
|
33 |
+
self.assertNotIn(None, self.transformer.data['G'])
|
34 |
+
# self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data))
|
35 |
+
|
36 |
+
def test_remove_columns_func(self):
|
37 |
+
# Test removing columns
|
38 |
+
self.transformer.remove_columns_func(['D'])
|
39 |
+
self.assertNotIn('D', self.transformer.data.columns)
|
40 |
+
|
41 |
+
def test_handle_null_impute(self):
|
42 |
+
# Test imputing null values with mean
|
43 |
+
self.transformer.handle_null_impute('A', 'mean')
|
44 |
+
self.assertFalse(self.transformer.data['A'].isnull().any())
|
45 |
+
self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data))
|
46 |
+
|
47 |
+
# Test imputing null values with mode
|
48 |
+
self.transformer.handle_null_impute('F', 'mode')
|
49 |
+
self.assertFalse(self.transformer.data['F'].isnull().any())
|
50 |
+
self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data))
|
51 |
+
|
52 |
+
# Test imputing null values with 0
|
53 |
+
self.transformer.handle_null_impute('G', '0')
|
54 |
+
self.assertFalse(self.transformer.data['G'].isnull().any())
|
55 |
+
self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data))
|
56 |
+
|
57 |
+
def test_categorical_to_numerical_func(self):
|
58 |
+
# Test converting categorical columns to numerical
|
59 |
+
self.transformer.categorical_to_numerical_func(['C'])
|
60 |
+
self.assertTrue(any(col.startswith('C_') for col in self.transformer.data.columns))
|
61 |
+
self.assertTrue(pd.read_csv("data.csv").equals(self.transformer.data))
|
62 |
+
|
63 |
+
def tearDown(self):
|
64 |
+
# Clean up temporary files generated during tests
|
65 |
+
import os
|
66 |
+
os.remove("data.csv")
|
67 |
+
|
68 |
+
if __name__ == '__main__':
|
69 |
+
unittest.main()
|
app.py
CHANGED
@@ -52,6 +52,7 @@ def main():
|
|
52 |
|
53 |
# --- DATA CLEANING ---
|
54 |
if selected == "Data Cleaning":
|
|
|
55 |
data_transformer = DataTransformer(data)
|
56 |
|
57 |
# modified_data = data_transformer.perform_column_operation()
|
|
|
52 |
|
53 |
# --- DATA CLEANING ---
|
54 |
if selected == "Data Cleaning":
|
55 |
+
st.header("Data Cleaning")
|
56 |
data_transformer = DataTransformer(data)
|
57 |
|
58 |
# modified_data = data_transformer.perform_column_operation()
|