Spaces:
Sleeping
Sleeping
Atharva Thakur
commited on
Commit
•
d14ee80
1
Parent(s):
91d7466
Data retaintion problem solved
Browse files- .gitignore +2 -0
- Experiment.py +57 -0
- app.py +18 -9
- data_loader.py +24 -17
- data_transformer.py +8 -6
- test.py +0 -42
.gitignore
CHANGED
@@ -9,6 +9,8 @@ __pycache__/
|
|
9 |
|
10 |
# data set
|
11 |
data.csv
|
|
|
|
|
12 |
#Env variables
|
13 |
.env
|
14 |
# Distribution / packaging
|
|
|
9 |
|
10 |
# data set
|
11 |
data.csv
|
12 |
+
original_data.csv
|
13 |
+
|
14 |
#Env variables
|
15 |
.env
|
16 |
# Distribution / packaging
|
Experiment.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
# Function to upload dataset
|
6 |
+
def upload_dataset():
|
7 |
+
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
|
8 |
+
if uploaded_file is not None:
|
9 |
+
df = pd.read_csv(uploaded_file)
|
10 |
+
return df
|
11 |
+
|
12 |
+
# Function to impute null values
|
13 |
+
def impute_null(df):
|
14 |
+
# Implement your logic for null value imputation
|
15 |
+
col = st.multiselect('Choose columns to impute nulls', df.select_dtypes(include=[np.number]).columns)
|
16 |
+
option = st.selectbox('Impute nulls with', ('mean', 'mode', '0'))
|
17 |
+
if st.button('Impute Null'):
|
18 |
+
if option == "mean":
|
19 |
+
df[col] = df[col].fillna(df[col].mean())
|
20 |
+
elif option == "mode":
|
21 |
+
df[col] = df[col].fillna(df[col].mode().iloc[0]) # mode() returns a DataFrame, so we select the first row
|
22 |
+
elif option == "0":
|
23 |
+
df[col] = df[col].fillna(0)
|
24 |
+
st.success("Null values filled")
|
25 |
+
return df
|
26 |
+
|
27 |
+
# Function to display transformed data
|
28 |
+
def display_data(df):
|
29 |
+
st.write(df)
|
30 |
+
|
31 |
+
def main():
|
32 |
+
st.title("Data Transformation App")
|
33 |
+
|
34 |
+
# Step 1: Upload Dataset
|
35 |
+
st.sidebar.title("Upload Dataset")
|
36 |
+
df = upload_dataset()
|
37 |
+
|
38 |
+
if df is not None:
|
39 |
+
# Step 2: Perform Data Transformation
|
40 |
+
st.sidebar.title("Data Transformation")
|
41 |
+
if st.sidebar.button("Impute Null Values"):
|
42 |
+
df = impute_null(df)
|
43 |
+
st.success("Null values imputed successfully!")
|
44 |
+
|
45 |
+
# Step 3: Display Transformed Data
|
46 |
+
st.sidebar.title("Transformed Data")
|
47 |
+
if st.sidebar.checkbox("Show Transformed Data"):
|
48 |
+
display_data(df)
|
49 |
+
|
50 |
+
# Step 4: Store Transformed Data
|
51 |
+
# You can store the transformed data in a variable or a data structure here
|
52 |
+
|
53 |
+
# Step 5: Use Transformed Data
|
54 |
+
# You can utilize the transformed data for further analysis, visualization, etc.
|
55 |
+
|
56 |
+
if __name__ == "__main__":
|
57 |
+
main()
|
app.py
CHANGED
@@ -8,15 +8,18 @@ from data_QA import DataQA
|
|
8 |
import os
|
9 |
from streamlit_option_menu import option_menu
|
10 |
|
|
|
|
|
|
|
11 |
def main():
|
12 |
-
if os.path.exists("data.csv"):
|
13 |
-
os.remove("data.csv")
|
14 |
-
with open("data.csv", 'w'):
|
15 |
-
pass
|
16 |
st.title('Insights 📶')
|
17 |
-
|
18 |
data_loader = DataLoader()
|
19 |
-
|
|
|
|
|
|
|
|
|
20 |
|
21 |
if os.path.getsize("data.csv") != 0:
|
22 |
with st.sidebar:
|
@@ -31,6 +34,7 @@ def main():
|
|
31 |
|
32 |
# --- EDA ---
|
33 |
if selected == "Exploratory Data Analysis":
|
|
|
34 |
data_analyzer = DataAnalyzer(data)
|
35 |
data_analyzer.show_eda()
|
36 |
data_analyzer.show_null_value_statistics()
|
@@ -43,13 +47,18 @@ def main():
|
|
43 |
# --- DATA CLEANING ---
|
44 |
if selected == "Data Cleaning":
|
45 |
data_transformer = DataTransformer(data)
|
46 |
-
|
47 |
modified_data = data_transformer.perform_column_operation()
|
48 |
-
data_analyzer.show_null_value_statistics()
|
49 |
modified_data = data_transformer.remove_null()
|
50 |
modified_data = data_transformer.impute_null()
|
51 |
-
|
|
|
|
|
|
|
|
|
52 |
|
|
|
|
|
53 |
# data_filter = DataFilter(modified_data)
|
54 |
# data = data_filter.filter_rows()
|
55 |
|
|
|
8 |
import os
|
9 |
from streamlit_option_menu import option_menu
|
10 |
|
11 |
+
|
12 |
+
import pandas as pd
|
13 |
+
|
14 |
def main():
|
|
|
|
|
|
|
|
|
15 |
st.title('Insights 📶')
|
16 |
+
|
17 |
data_loader = DataLoader()
|
18 |
+
load = data_loader.load_data()
|
19 |
+
if load:
|
20 |
+
data = pd.read_csv('data.csv')
|
21 |
+
|
22 |
+
|
23 |
|
24 |
if os.path.getsize("data.csv") != 0:
|
25 |
with st.sidebar:
|
|
|
34 |
|
35 |
# --- EDA ---
|
36 |
if selected == "Exploratory Data Analysis":
|
37 |
+
data = pd.read_csv("data.csv")
|
38 |
data_analyzer = DataAnalyzer(data)
|
39 |
data_analyzer.show_eda()
|
40 |
data_analyzer.show_null_value_statistics()
|
|
|
47 |
# --- DATA CLEANING ---
|
48 |
if selected == "Data Cleaning":
|
49 |
data_transformer = DataTransformer(data)
|
50 |
+
|
51 |
modified_data = data_transformer.perform_column_operation()
|
|
|
52 |
modified_data = data_transformer.remove_null()
|
53 |
modified_data = data_transformer.impute_null()
|
54 |
+
data = modified_data
|
55 |
+
data_analyzer = DataAnalyzer(data)
|
56 |
+
data_analyzer.show_null_value_statistics()
|
57 |
+
new_data_analyzer = DataAnalyzer(modified_data)
|
58 |
+
data_analyzer.show_null_value_statistics()
|
59 |
|
60 |
+
# modified_data = data_transformer.remove_columns()
|
61 |
+
|
62 |
# data_filter = DataFilter(modified_data)
|
63 |
# data = data_filter.filter_rows()
|
64 |
|
data_loader.py
CHANGED
@@ -3,21 +3,28 @@ import pandas as pd
|
|
3 |
|
4 |
class DataLoader:
|
5 |
def __init__(self):
|
6 |
-
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
if
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
class DataLoader:
|
5 |
def __init__(self):
|
6 |
+
pass
|
7 |
|
8 |
+
@st.cache_data(experimental_allow_widgets=True)
|
9 |
+
def load_data(_):
|
10 |
+
if True:
|
11 |
+
data = pd.DataFrame()
|
12 |
+
data_source = st.selectbox('Select data source', ['Upload a CSV file', 'Input a URL'])
|
13 |
+
if data_source == 'Upload a CSV file':
|
14 |
+
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
15 |
+
if uploaded_file is not None:
|
16 |
+
data = pd.read_csv(uploaded_file)
|
17 |
+
data.to_csv('./original_data.csv', index=False)
|
18 |
+
data.to_csv('./data.csv',index=False)
|
19 |
+
return True
|
20 |
+
elif data_source == 'Input a URL':
|
21 |
+
url = st.text_input('Enter the URL of a CSV file')
|
22 |
+
if url:
|
23 |
+
try:
|
24 |
+
data = pd.read_csv(url)
|
25 |
+
data_loaded = True
|
26 |
+
except:
|
27 |
+
st.error('Could not load data from the provided URL. Please make sure the URL is correct and points to a CSV file.')
|
28 |
+
return True
|
29 |
+
print("data loader ran once")
|
30 |
+
return data
|
data_transformer.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
3 |
|
4 |
class DataTransformer:
|
5 |
def __init__(self, data):
|
@@ -22,16 +23,17 @@ class DataTransformer:
|
|
22 |
return self.data
|
23 |
|
24 |
def impute_null(self):
|
|
|
|
|
25 |
if st.button('Impute Null'):
|
26 |
-
col = st.multiselect('Choose columns to impute nulls', self.data.select_dtypes(include=[np.number]).columns)
|
27 |
-
option = st.selectbox('Impute nulls with', ('mean', 'mode', '0'))
|
28 |
if option == "mean":
|
29 |
-
self.data.fillna(
|
30 |
elif option == "mode":
|
31 |
-
self.data.fillna(
|
32 |
elif option == "0":
|
33 |
-
self.data.fillna(
|
34 |
-
st.
|
|
|
35 |
return self.data
|
36 |
|
37 |
def remove_columns(self):
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
|
5 |
class DataTransformer:
|
6 |
def __init__(self, data):
|
|
|
23 |
return self.data
|
24 |
|
25 |
def impute_null(self):
|
26 |
+
col = st.multiselect('Choose columns to impute nulls', self.data.select_dtypes(include=[np.number]).columns)
|
27 |
+
option = st.selectbox('Impute nulls with', ('mean', 'mode', '0'))
|
28 |
if st.button('Impute Null'):
|
|
|
|
|
29 |
if option == "mean":
|
30 |
+
self.data[col] = self.data[col].fillna(self.data[col].mean())
|
31 |
elif option == "mode":
|
32 |
+
self.data[col] = self.data[col].fillna(self.data[col].mode().iloc[0]) # mode() returns a DataFrame, so we select the first row
|
33 |
elif option == "0":
|
34 |
+
self.data[col] = self.data[col].fillna(0)
|
35 |
+
st.success("Null values filled")
|
36 |
+
self.data.to_csv("data.csv", index=False)
|
37 |
return self.data
|
38 |
|
39 |
def remove_columns(self):
|
test.py
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
from data_loader import DataLoader
|
3 |
-
from data_analyzer import DataAnalyzer
|
4 |
-
from data_filter import DataFilter
|
5 |
-
from data_transformer import DataTransformer
|
6 |
-
from data_visualizer import DataVisualizer
|
7 |
-
from data_QA import DataQA
|
8 |
-
import os
|
9 |
-
|
10 |
-
def main():
|
11 |
-
if os.path.exists("data.csv"):
|
12 |
-
os.remove("data.csv")
|
13 |
-
with open("data.csv", 'w'):
|
14 |
-
pass
|
15 |
-
st.title('Insights 📶')
|
16 |
-
|
17 |
-
data_loader = DataLoader()
|
18 |
-
data = data_loader.load_data()
|
19 |
-
|
20 |
-
if os.path.getsize("data.csv") != 0:
|
21 |
-
data_analyzer = DataAnalyzer(data)
|
22 |
-
data_analyzer.show_summary_statistics()
|
23 |
-
data_analyzer.show_data_types()
|
24 |
-
data_analyzer.show_null_value_statistics()
|
25 |
-
|
26 |
-
data_filter = DataFilter(data)
|
27 |
-
data = data_filter.filter_rows()
|
28 |
-
|
29 |
-
data_transformer = DataTransformer(data)
|
30 |
-
data = data_transformer.perform_column_operation()
|
31 |
-
data = data_transformer.remove_null()
|
32 |
-
data = data_transformer.impute_null()
|
33 |
-
data = data_transformer.remove_columns()
|
34 |
-
|
35 |
-
data_visualizer = DataVisualizer(data)
|
36 |
-
data_visualizer.visualize_data()
|
37 |
-
|
38 |
-
data_QA = DataQA(data)
|
39 |
-
data_QA.ask_csv()
|
40 |
-
|
41 |
-
if __name__ == "__main__":
|
42 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|