MatthiasPi commited on
Commit
a3171a2
1 Parent(s): 09bf6bf

commit the whole project

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ **/__pycache__/
README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AIViz
2
+ Software Engineering Project ESILV DIA 1
3
+
4
+ Web application allowing the user to perform Machine Learning over their own datasets. Results are then diplayed through dynamic visualizations, and downlodable.
5
+
6
+ To run the app locally :
7
+
8
+ - Make sure to have Python 3.10+
9
+ - Install App dependencies:
10
+
11
+ ```
12
+ pip install -r requirements.txt
13
+ ```
14
+
15
+ - Run the app with Streamlit
16
+
17
+ ```
18
+ streamlit run app.py engine=python
19
+ ```
20
+
21
+ AIViz is also accessible <a href="https://clementcornet-aiviz-app-n0g5vp.streamlit.app/">online</a>.
algos/classification/logistic.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ from sklearn.linear_model import LogisticRegression
4
+ from types import NoneType
5
+
6
+ def process(data):
7
+ if type(data[0]) == NoneType or type(data[1]) == NoneType: # if either training or testing dataset is still missing
8
+ st.info('Please Upload Data')
9
+ return None
10
+
11
+ if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes):
12
+ st.info('Please Upload Numerica Data.')
13
+ return None
14
+ st.write(data[0].dtypes)
15
+
16
+ x_train = data[0].iloc[:,:-1]
17
+ y_train = data[0].iloc[:,-1]
18
+ #st.write(x_train.shape)
19
+ x_test = data[1].iloc[:,:x_train.shape[1]]
20
+ #st.dataframe(data[1])
21
+ #st.write(x_test.shape)
22
+
23
+ if len(x_train.columns) != len(x_test.columns):
24
+ st.info('Training and testing datasets have different column number, cannot perform classification.')
25
+ return None
26
+
27
+ clf = LogisticRegression(random_state=0).fit(x_train, y_train)
28
+ #clf.fit(x_train, y_train)
29
+ pred = clf.predict(x_test)
30
+ x_test[data[0].columns[-1]] = pred
31
+ return x_test
algos/classification/nnclassifier.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from sklearn.neural_network import MLPClassifier
3
+ import pandas as pd
4
+ from types import NoneType
5
+
6
+ def process(data):
7
+ if type(data[0]) == NoneType or type(data[1]) == NoneType: # if either training or testing dataset is still missing
8
+ st.info('Please Upload Data')
9
+ return None
10
+ if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes):
11
+ st.info('Please Upload Numerica Data.')
12
+ return None
13
+ x_train = data[0].iloc[:,:-1]
14
+ y_train = data[0].iloc[:,-1]
15
+ #st.write(x_train.shape)
16
+ x_test = data[1].iloc[:,:x_train.shape[1]]
17
+ #st.dataframe(data[1])
18
+ #st.write(x_test.shape)
19
+
20
+ if len(x_train.columns) != len(x_test.columns):
21
+ st.info('Training and testing datasets have different column number, cannot perform classification.')
22
+ return None
23
+
24
+ clf = MLPClassifier(random_state=1, max_iter=300).fit(x_train, y_train)
25
+ pred = clf.predict(x_test)
26
+ #x_test[data[0].columns[-1]] = pred
27
+ x_test[data[0].columns[-1]] = pred
28
+ #st.dataframe(x_test)
29
+ return x_test
algos/classification/svmclassifier.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ from sklearn.pipeline import make_pipeline
4
+ from sklearn.preprocessing import StandardScaler
5
+ from sklearn.svm import SVC
6
+ from types import NoneType
7
+
8
+ def process(data):
9
+ if type(data[0]) == NoneType or type(data[1]) == NoneType: # if either training or testing dataset is still missing
10
+ st.info('Please Upload Data')
11
+ return None
12
+ if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes):
13
+ st.info('Please Upload Numerica Data.')
14
+ return None
15
+ x_train = data[0].iloc[:,:-1]
16
+ y_train = data[0].iloc[:,-1]
17
+ #st.write(x_train.shape)
18
+ x_test = data[1].iloc[:,:x_train.shape[1]]
19
+ #st.dataframe(data[1])
20
+ #st.write(x_test.shape)
21
+
22
+ if len(x_train.columns) != len(x_test.columns):
23
+ st.info('Training and testing datasets have different column number, cannot perform classification.')
24
+ return None
25
+
26
+ clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
27
+ clf.fit(x_train, y_train)
28
+ pred = clf.predict(x_test)
29
+ x_test[data[0].columns[-1]] = pred
30
+ return x_test
algos/clustering/dbscan.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ from sklearn.cluster import DBSCAN
4
+ from sklearn.preprocessing import StandardScaler
5
+ import numpy as np
6
+
7
+
8
+ def process(data):
9
+
10
+ if 'object' in list(data[0].dtypes):
11
+ st.info('This Algorithm can only process numerical data')
12
+ return None
13
+
14
+ scaler = StandardScaler()
15
+ df = data[0].copy()
16
+
17
+ for c in data[0].columns:
18
+ df[c] = scaler.fit_transform(data[0][[c]])
19
+
20
+ max_distance = st.slider("""Maximum distance between two samples for one to be considered
21
+ as in the neighborhood of the other. :""",0.01,5.0)
22
+ dbscan = DBSCAN(max_distance)
23
+ res = dbscan.fit_predict(df)
24
+ df = data[0]
25
+ df['cluster'] = res
26
+ return df
algos/clustering/kmeans.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.preprocessing import StandardScaler
3
+ import streamlit as st
4
+ from sklearn.cluster import KMeans
5
+
6
+
7
+
8
+ def process(data):
9
+
10
+ if 'object' in list(data[0].dtypes):
11
+ st.info('This Algorithm can only process numerical data')
12
+ return None
13
+
14
+ scaler = StandardScaler()
15
+ df = data[0].copy()
16
+
17
+ for c in data[0].columns:
18
+ df[c] = scaler.fit_transform(data[0][[c]])
19
+ k = st.slider('Number of Clusters :',2,9)
20
+ kmeans = KMeans(k)
21
+ res = kmeans.fit_predict(df)
22
+ df = data[0]
23
+ df['cluster'] = res
24
+ return df
algos/clustering/kproto.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.preprocessing import StandardScaler
2
+ from kmodes.kprototypes import KPrototypes
3
+ from kmodes.kprototypes import euclidean_dissim
4
+ import streamlit as st
5
+ import algos.clustering.kmeans
6
+
7
+ def process(data):
8
+
9
+
10
+ """Process K-prototype"""
11
+ df = data[0]
12
+ if 'object' not in list(df.dtypes):
13
+ return algos.clustering.kmeans.process(data)
14
+
15
+ k = st.slider('Number of Clusters :',2,9)
16
+
17
+ numerical_columns = df.select_dtypes('number').columns
18
+ categorical_columns = df.select_dtypes('object').columns
19
+ categorical_indexes = []
20
+
21
+ # Scaling
22
+ scaler = StandardScaler()
23
+ for c in categorical_columns:
24
+ categorical_indexes.append(df.columns.get_loc(c))
25
+ if len(numerical_columns) == 0 or len(categorical_columns) == 0:
26
+ return
27
+ # create a copy of our data to be scaled
28
+ df_scale = df.copy()
29
+ # standard scale numerical features
30
+ for c in numerical_columns:
31
+ df_scale[c] = scaler.fit_transform(df[[c]])
32
+
33
+ # Process Data
34
+ kproto = KPrototypes(n_clusters=k,
35
+ num_dissim=euclidean_dissim,
36
+ random_state=0)
37
+
38
+ kproto.fit_predict(df_scale, categorical= categorical_indexes)
39
+
40
+ # add clusters to dataframe
41
+ df = data[0]
42
+ df["cluster"] = kproto.labels_
43
+
44
+ return df
algos/others/others_page.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def render():
4
+ st.title("Other Algorithms")
algos/regression/elasticnet.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.linear_model import ElasticNet
2
+ import streamlit as st
3
+ from types import NoneType
4
+
5
+ def process(data):
6
+ if type(data[0]) == NoneType or type(data[1]) == NoneType: # if either training or testing dataset is still missing
7
+ st.info('Please Upload Data')
8
+ return None
9
+ if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes):
10
+ st.info('Please Upload Numerica Data.')
11
+ return None
12
+ if len(data) == 0:
13
+ st.info('Please Upload Data')
14
+ return None
15
+ x_train = data[0].iloc[:,:-1]
16
+ y_train = data[0].iloc[:,-1]
17
+ #st.write(x_train.shape)
18
+ x_test = data[1].iloc[:,:x_train.shape[1]]
19
+ #st.dataframe(data[1])
20
+ #st.write(x_test.shape)
21
+
22
+ if len(x_train.columns) != len(x_test.columns):
23
+ st.info('Training and testing datasets have different column number, cannot perform classification.')
24
+ return None
25
+
26
+ clf = ElasticNet().fit(x_train, y_train)
27
+ pred = clf.predict(x_test)
28
+
29
+
30
+ cols = x_train.columns
31
+
32
+ #st.write(clf.coef_)
33
+
34
+ st.latex(f" {x_train.columns[-1]} = ")
35
+ coeffs = ['{:.4f}'.format(float(c)) for c in clf.coef_]
36
+ #st.write(coeffs)
37
+ eq = ' + '.join([str(col) +' × '+ (alpha) for col,alpha in zip(coeffs,cols)])
38
+ st.markdown(f" $$ {clf.intercept_} {eq} $$")
39
+
40
+ st.latex(f" R² = {clf.score(x_train, y_train)} ")
41
+
42
+ x_test[data[0].columns[-1]] = pred
43
+ return x_test
algos/regression/linR.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.linear_model import LinearRegression
2
+ import streamlit as st
3
+ from types import NoneType
4
+
5
+ def process(data):
6
+ if type(data[0]) == NoneType or type(data[1]) == NoneType: # if either training or testing dataset is still missing
7
+ st.info('Please Upload Data')
8
+ return None
9
+ if len(data) == 0:
10
+ st.info('Please Upload Data')
11
+ return None
12
+ x_train = data[0].iloc[:,:-1]
13
+ y_train = data[0].iloc[:,-1]
14
+ #st.write(x_train.shape)
15
+ x_test = data[1].iloc[:,:x_train.shape[1]]
16
+ #st.dataframe(data[1])
17
+ #st.write(x_test.shape)
18
+
19
+ if len(x_train.columns) != len(x_test.columns):
20
+ st.info('Training and testing datasets have different column number, cannot perform classification.')
21
+ return None
22
+ if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes):
23
+ st.info('Please Upload Numerica Data.')
24
+ return None
25
+
26
+ reg = LinearRegression().fit(x_train, y_train)
27
+
28
+
29
+ cols = x_train.columns
30
+ #st.write(list(zip(reg.coef_,cols)))
31
+ st.latex(f" {x_train.columns[-1]} = ")
32
+ coeffs = ['{:.4f}'.format(float(c)) for c in reg.coef_]
33
+
34
+ eq = ' + '.join([str(col) +' × '+ (alpha) for col,alpha in zip(coeffs,cols)])
35
+ st.markdown(f" $$ {reg.intercept_} {eq} $$")
36
+
37
+ st.latex(f" R² = {reg.score(x_train, y_train)} ")
38
+
39
+ pred = reg.predict(x_test)
40
+ x_test[data[0].columns[-1]] = pred
41
+ return x_test
algos/regression/ridge.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.linear_model import Ridge
2
+ import streamlit as st
3
+ from types import NoneType
4
+
5
+ def process(data):
6
+ if type(data[0]) == NoneType or type(data[1]) == NoneType: # if either training or testing dataset is still missing
7
+ st.info('Please Upload Data')
8
+ return None
9
+ if len(data) == 0:
10
+ st.info('Please Upload Data')
11
+ return None
12
+ if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes):
13
+ st.info('Please Upload Numerica Data.')
14
+ return None
15
+ x_train = data[0].iloc[:,:-1]
16
+ y_train = data[0].iloc[:,-1]
17
+ #st.write(x_train.shape)
18
+ x_test = data[1].iloc[:,:x_train.shape[1]]
19
+ #st.dataframe(data[1])
20
+ #st.write(x_test.shape)
21
+
22
+ if len(x_train.columns) != len(x_test.columns):
23
+ st.info('Training and testing datasets have different column number, cannot perform classification.')
24
+ return None
25
+
26
+ clf = Ridge(alpha=1.0).fit(x_train, y_train)
27
+ pred = clf.predict(x_test)
28
+ #st.write(clf.coef_)
29
+
30
+ cols = x_train.columns
31
+ st.latex(f" {data[0].columns[-1]} = ")
32
+ coeffs = ['{:.4f}'.format(float(c)) for c in clf.coef_]
33
+ eq = ' + '.join([str(col) +' × '+ (alpha) for col,alpha in zip(coeffs,cols)])
34
+ st.markdown(f" $$ {clf.intercept_} + {eq} $$")
35
+ st.latex(f" R² = {clf.score(x_train, y_train)} ")
36
+ x_test[data[0].columns[-1]] = pred
37
+ return x_test
analysis/exploration.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utilities.template_helpers import upload_data
3
+ import pandas as pd
4
+ from types import NoneType
5
+ from pandas_profiling import ProfileReport
6
+ from streamlit_pandas_profiling import st_profile_report
7
+ import sys
8
+
9
+ def render():
10
+ st.title("DATA EXPLORATION")
11
+ col1, col2 = st.columns([2,5])
12
+ df = None
13
+ with col1.container():
14
+ df = upload_data()
15
+ if type(df) is NoneType:
16
+ return
17
+ st.dataframe(df.describe())
18
+ with col2.container():
19
+ pr = ProfileReport(df)
20
+ st_profile_report(pr)
analysis/preprocessing.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utilities.template_helpers import upload_data
3
+ from types import NoneType
4
+ import pandas as pd
5
+ import numpy as np
6
+ from sklearn.preprocessing import StandardScaler
7
+
8
+
9
+ def render():
10
+ st.title("PREPROCESSING")
11
+ # dropna
12
+ # fillna
13
+ # select columns
14
+ # scaling
15
+
16
+ col1, col2, col3 = st.columns([1,1,1])
17
+
18
+ df = None
19
+ with col1.container():
20
+ df = upload_data()
21
+ if type(df) is NoneType:
22
+ return
23
+ if df.shape == (0,0):
24
+ return
25
+ info = pd.DataFrame()
26
+ info['dtypes'] = pd.DataFrame(df.dtypes)
27
+ info['null'] = df.isna().sum()
28
+
29
+ tab1, tab2 = st.tabs(['Dataframe','Info'])
30
+ with tab1:
31
+ st.dataframe(df, use_container_width=True, height=300)
32
+ with tab2:
33
+ st.dataframe(info,use_container_width=True,height=300)
34
+
35
+ with col2.container():
36
+ ### DROP NA ###
37
+ st.write('\n\n')
38
+ st.markdown('#### Drop Null Values')
39
+ st.write('Drop any row containing null values')
40
+ drop_null = st.checkbox('Drop')
41
+ if drop_null:
42
+ df.dropna(inplace=True)
43
+
44
+ ### FILL NA ####
45
+ st.write("\n\n")
46
+ st.markdown('#### Fill Null Values')
47
+ st.write("""Replace null values with mean of the column for numerical variables,
48
+ and mode for categorical variables""")
49
+ fill_null = st.checkbox('Fill')
50
+ if fill_null:
51
+ for col in df.columns:
52
+ val = 0
53
+ if df[col].dtype == 'object':
54
+ val = df[col].mode()
55
+ else:
56
+ val = df[col].mean()
57
+ df[col].fillna(val)
58
+
59
+ ### SCALING ###
60
+ st.write('\n\n')
61
+ st.markdown("#### Scaling")
62
+ st.write("Standardize numerical features by removing the mean and scaling to unit variance.")
63
+ scale = st.checkbox('Scale')
64
+ if scale:
65
+ numerical_columns = df.select_dtypes('number').columns
66
+ categorical_columns = df.select_dtypes('object').columns
67
+ categorical_indexes = []
68
+
69
+ # Scaling
70
+ scaler = StandardScaler()
71
+ for c in categorical_columns:
72
+ categorical_indexes.append(df.columns.get_loc(c))
73
+ # create a copy of our data to be scaled
74
+ df_scale = df.copy()
75
+ # standard scale numerical features
76
+ for c in numerical_columns:
77
+ df_scale[c] = scaler.fit_transform(df[[c]])
78
+ df = df_scale
79
+
80
+
81
+ with col3.container():
82
+ ### SELECT COLUMNS
83
+ st.write("\n\n")
84
+ st.markdown("#### Choose columns")
85
+ cols = st.multiselect('Select columns to use',options=list(df.columns),default=list(df.columns))
86
+ #select_cols = st.button('Use selected columns')
87
+ #if select_cols:
88
+ df = df[cols]
89
+
90
+ st.write("\n\n")
91
+ st.markdown("#### Encode Numerical values")
92
+ enc = st.checkbox('Encode')
93
+ if enc:
94
+ df.loc[:,df.dtypes == 'object']=df.loc[:,df.dtypes == 'object'].apply(
95
+ lambda x: x.replace(x.unique(),list(range(1,1+len(x.unique())))))
96
+
97
+ st.write('\n\n')
98
+ st.markdown("#### Download Preprocessed data")
99
+ st.download_button("Download Results",
100
+ df.to_csv(index=False),
101
+ "preprocessed.csv",
102
+ "text/csv",
103
+ key="download-csv")
104
+ #st.dataframe(df)
105
+
106
+
107
+
108
+
109
+ #def res_session():
110
+ # st.session_state['drop_na'] = False
111
+ # st.session_state['fill_na'] = False
112
+ # st.session_state['scale'] = False
113
+ # st.session_state['']
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utilities.standard_template import Page, get_info
3
+ from utilities.land import land_page
4
+ import analysis.preprocessing
5
+ import analysis.exploration
6
+ import warnings
7
+
8
+ import algos.others.others_page
9
+
10
+ warnings.filterwarnings("ignore")
11
+
12
+ # PAGE CONFIGURATION, CHANGE NAME AND ICON
13
+
14
+ st.set_page_config(layout="wide",page_title='AIViz',page_icon='carott.png')
15
+ hide_streamlit_style = """
16
+ <style>
17
+ #MainMenu {visibility: hidden;}
18
+ footer {visibility: hidden;}
19
+ </style>
20
+ """
21
+ #st.markdown(hide_streamlit_style, unsafe_allow_html=True)
22
+
23
+ with st.sidebar:
24
+ #st.image('carott.png')
25
+ choice = st.selectbox('Choose Algorithm Category',[
26
+ " --- Choose --- ",
27
+ "Clustering",
28
+ "Classification",
29
+ "Regression",
30
+ "Data Exploration",
31
+ "Data Preprocessing",
32
+ #"Others"
33
+ ])
34
+ get_info(choice)
35
+
36
+ if choice in ['Clustering', 'Classification', 'Regression']:
37
+ Page(choice).render()
38
+
39
+ elif choice == 'Data Preprocessing':
40
+ analysis.preprocessing.render()
41
+
42
+ elif choice == 'Data Exploration':
43
+ analysis.exploration.render()
44
+
45
+ elif choice == 'Others':
46
+ algos.others.others_page.render()
47
+
48
+ else:
49
+ land_page()
carott.png ADDED
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy
2
+ pandas
3
+ kmodes
4
+ scikit-learn
5
+ streamlit
6
+ extra_streamlit_components
7
+ plotly
8
+ prince
9
+ pandas-profiling
10
+ streamlit-pandas-profiling
utilities/components.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utilities.template_helpers import upload_data
3
+ from types import NoneType
4
+ import extra_streamlit_components as stx
5
+
6
+ import prince
7
+ import plotly.express as px
8
+ import pandas as pd
9
+
10
+ import algos.clustering.kmeans
11
+ import algos.clustering.dbscan
12
+ import algos.clustering.kproto
13
+
14
+ import algos.classification.nnclassifier
15
+ import algos.classification.logistic
16
+ import algos.classification.svmclassifier
17
+
18
+ import algos.regression.linR
19
+ import algos.regression.ridge
20
+ import algos.regression.elasticnet
21
+
22
+ from types import NoneType
23
+
24
+ def get_data(category, algo_name=None):
25
+ if category in ['Classification','Regression']:
26
+ train = upload_data('Training Data')
27
+ test = upload_data('Testing Data')
28
+ return train, test
29
+ else:
30
+ df = upload_data()
31
+ if type(df) != NoneType:
32
+ return (df,)
33
+
34
+
35
+ def choose_algo(category):
36
+ if category == 'Clustering':
37
+ algo = stx.tab_bar(data=[
38
+ stx.TabBarItemData(id='K-Means',title='K-Means',description='Partitional Clustering Algorithm'),
39
+ stx.TabBarItemData(id='DBSCAN',title='DBSCAN',description='Density Based Clustering Algorithm'),
40
+ stx.TabBarItemData(id='K-Prototype',title='K-Prototype',description='Partitional over Mixed Data')]
41
+ )
42
+ if algo == 'K-Means':
43
+ return algos.clustering.kmeans.process
44
+ if algo == 'DBSCAN':
45
+ return algos.clustering.dbscan.process
46
+ if algo == 'K-Prototype':
47
+ return algos.clustering.kproto.process
48
+ elif category == 'Classification':
49
+ algo = stx.tab_bar(data=[
50
+ stx.TabBarItemData(id='NN',title='Neural Network',description='Multi-Layer Perceptron classifier'),
51
+ stx.TabBarItemData(id='SVM',title='Suport Vector Classifier',
52
+ description='Classification using Support Vector Machines'),
53
+ stx.TabBarItemData(id='logR',title='Logistic Regression',description='Logistic Regression Classifier')]
54
+ )
55
+ if algo == 'NN':
56
+ return algos.classification.nnclassifier.process
57
+ if algo == 'SVM':
58
+ return algos.classification.svmclassifier.process
59
+ if algo == 'logR':
60
+ return algos.classification.logistic.process
61
+ elif category == 'Regression':
62
+ algo = stx.tab_bar(data=[
63
+ stx.TabBarItemData(id='linR',title='Linear Regression',description='Linear Regression'),
64
+ stx.TabBarItemData(id='ridge',title='Ridge',
65
+ description='Ridge Regression'),
66
+ stx.TabBarItemData(id='elastic',title='Elastic Net Regression',description='Elastic Net Regression')]
67
+ )
68
+ if algo == 'linR':
69
+ return algos.regression.linR.process
70
+ if algo == 'ridge':
71
+ return algos.regression.ridge.process
72
+ if algo == 'elastic':
73
+ return algos.regression.elasticnet.process
74
+
75
+
76
+ def get_plot(df, title):
77
+
78
+ if title == 'Regression':
79
+ return None # Do not plot regression, display its coefficients
80
+
81
+ reduce_algo = None
82
+ pca = None
83
+
84
+ # Better title for the graph
85
+ viz_thing = 'Clusters'
86
+ if title == 'Classification':
87
+ viz_thing = 'Classes'
88
+
89
+ # name of column to represent as color on the graph (target class)
90
+ if type(df) == NoneType:
91
+ return None
92
+ if len(df) == 0:
93
+ return None
94
+ target_class = df.columns[-1]
95
+
96
+ if df.shape == (0,0):
97
+ return None
98
+
99
+ if 'object' in list(df.dtypes):
100
+ reduce_algo = 'FAMD'
101
+ pca = prince.FAMD(n_components=3)
102
+ else:
103
+ reduce_algo = 'Principal Component Analysis'
104
+ pca = prince.PCA(n_components=3)
105
+ reduced = pca.fit(df.iloc[:,:-1]).row_coordinates(df.iloc[:,:-1])
106
+ reduced.columns = ['X','Y','Z']
107
+ reduced[target_class] = df[target_class].astype(str)
108
+ # Each axe's inertia
109
+ labs = {
110
+ "X" : f"Component 0 - ({round(100*pca.explained_inertia_[0],2)}% inertia)",
111
+ "Y" : f"Component 1 - ({round(100*pca.explained_inertia_[1],2)}% inertia)",
112
+ "Z" : f"Component 2 - ({round(100*pca.explained_inertia_[2],2)}% inertia)",
113
+ }
114
+ tot_inertia = f"{round(100*pca.explained_inertia_.sum(),2)}"
115
+ st.write(f'{reduce_algo} Visualization of {viz_thing} ({tot_inertia}%) :')
116
+ fig = px.scatter_3d(reduced,x='X',y='Y',z='Z',color=target_class,labels=labs)
117
+ fig.update_layout(margin=dict(l=0, r=0, b=0, t=0),showlegend=False,height=300)
118
+ return fig
utilities/land.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+
4
+
5
+ def land_page():
6
+ _,center,_ = st.columns([2,3,2])
7
+ center.markdown("<h1 style='text-align: center;'>AIViz</h1>", unsafe_allow_html=True)
8
+ center.write("""Machine Learning. For everyone. Now. AIViz is a platform built to let everyone perform Machine
9
+ Learning easily on their own data.""")
10
+
11
+ center.image('carott.png')
12
+
13
+ center.markdown("<h3 style='text-align: center;'>Use your own data</h3>", unsafe_allow_html=True)
14
+
15
+ center.write("You can use your own data with AIViz. All you need is clicking a button. ")
16
+
17
+ center.markdown("<h3 style='text-align: center;'>Understand your Data</h3>", unsafe_allow_html=True)
18
+
19
+ center.write("""AIViz provides a Data Exploration tool, that lets you explore all your variables. You can
20
+ easily visalize and understand univariate and bivariate behavior of your data. """)
21
+
22
+ center.markdown("<h3 style='text-align: center;'>Preprocessing</h3>", unsafe_allow_html=True)
23
+
24
+ center.write("""You can prepare your data for Machine Learning in just a few clicks. You can decide how
25
+ to handle missing values, choose which columns to use, scale your data...""")
26
+
27
+ center.markdown("<h3 style='text-align: center;'>Machine Learning</h3>", unsafe_allow_html=True)
28
+
29
+ st.latex("""The \ smartest \ carott \ of \ the \ World \\newline \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \
30
+ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \
31
+ \ \ \ \ \ \ \ \ \ \ \ \ \ - \ us.""")
32
+
33
+ center.write("""The core of AIViz is Machine Learning. Now that you have uploaded and preprocessed
34
+ your data, you can perform Artificial Intelligence algorithms to it. We provide several
35
+ different algorithms, for Clustering, Classification or Regression.""")
utilities/standard_template.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utilities.components import get_data, choose_algo, get_plot
3
+ from types import NoneType
4
+ import pandas as pd
5
+
6
+
7
+ def get_info(category):
8
+ infos = {
9
+ " --- Choose --- ":'We Provide several different types of algorithms, such as Clustering or Classification',
10
+ "Clustering":'Unsupervised, creates clusters of similars individuals',
11
+ "Classification":"""Supervised, assigns individuals to a class using
12
+ training data. Last column will be used as targer class.""",
13
+ "Regression":"Supervised, predicts numerical value to a column, usign training data",
14
+ "Data Exploration":"Univariate and bivariate data analysis",
15
+ "Data Preprocessing":"Prepare data for Machine Learning",
16
+ "Others":'Other algorithms, such as linear regression'
17
+ }
18
+ st.info(infos[category])
19
+
20
+ class Page:
21
+ def __init__(self, title) -> None:
22
+ self.title = title
23
+ self.data = None
24
+ self.algo = None
25
+ self.plot = None
26
+ self.results = None
27
+
28
+ def render(self):
29
+ st.title(self.title.upper())
30
+ col1, col2 = st.columns([2,5])
31
+
32
+ ##### CHOOSE DATA #####
33
+ with col1.container():
34
+ data = get_data(self.title)
35
+ if type(data) == tuple:
36
+ if self.title == 'Clustering' and type(data[0]) is not NoneType:
37
+ st.dataframe(data[0], use_container_width=True,height=280)
38
+ self.data = data
39
+
40
+
41
+ with col2.container():
42
+ ##### CHOSE ALGORITHM #####
43
+ self.algo = choose_algo(self.title)
44
+ if self.algo is not None and self.data is not None:
45
+ self.results = pd.DataFrame(self.algo(self.data))
46
+ self.plot = get_plot(self.results, self.title)
47
+
48
+ ##### PLOT RESULTS #####
49
+ if self.plot is not None:
50
+ st.plotly_chart(self.plot)
51
+
52
+ ##### DOWNLOAD RESULTS #####
53
+ if self.results is not None:
54
+ col1.download_button("Download Results",
55
+ self.results.to_csv(index=False),
56
+ "results.csv",
57
+ "text/csv",
58
+ key="download-csv")
utilities/template_helpers.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+
4
+
5
+ def upload_data(descr='Upload Data'):
6
+ up = st.file_uploader(descr)
7
+ if up:
8
+ df = pd.read_csv(up).dropna()
9
+ return df