Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
class DataTransformer: | |
def __init__(self, data): | |
self.data = data | |
st.header("Data Cleaning") | |
def perform_column_operation(self): | |
column_operation = st.sidebar.text_input('Column operation (e.g., age * 2)') | |
if column_operation: | |
column, operation = column_operation.split() | |
self.data[column] = self.data[column].apply(lambda x: eval(str(x) + operation)) | |
st.write(self.data) | |
return self.data | |
def remove_null(self): | |
if st.button('Remove Null'): | |
col = st.multiselect('Choose columns to remove nulls', self.data.columns) | |
self.data.dropna(subset=col, inplace=True) | |
st.toast("Null values removed") | |
return self.data | |
def impute_null(self): | |
col = st.multiselect('Choose columns to impute nulls', self.data.select_dtypes(include=[np.number]).columns) | |
option = st.selectbox('Impute nulls with', ('mean', 'mode', '0')) | |
if st.button('Impute Null'): | |
if option == "mean": | |
self.data[col] = self.data[col].fillna(self.data[col].mean()) | |
elif option == "mode": | |
self.data[col] = self.data[col].fillna(self.data[col].mode().iloc[0]) # mode() returns a DataFrame, so we select the first row | |
elif option == "0": | |
self.data[col] = self.data[col].fillna(0) | |
st.success("Null values filled") | |
self.data.to_csv("data.csv", index=False) | |
return self.data | |
def remove_columns(self): | |
if st.button('Remove Columns'): | |
col = st.multiselect('Choose columns to remove', self.data.columns) | |
self.data.drop(columns=col, inplace=True) | |
st.toast("Columns removed") | |
return self.data | |
# PROBLEMS RESOLVED | |
#transformed data is not retained | |
#null values handling | |
#2 options - to remove or to impute that is the question | |
# PROBLEMS TO BE ADDRESSED | |
#categorical to numerical | |
#give option to analyse the transformed dataset or save it. | |