CosmickVisions commited on
Commit
a36e47b
·
verified ·
1 Parent(s): 2e68124

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -149
app.py CHANGED
@@ -850,7 +850,12 @@ elif app_mode == "Predictions":
850
  # Input data for prediction
851
  st.subheader("Enter Data for Prediction")
852
  input_data = {}
853
- for col in df.drop(columns=[st.session_state.model.steps[-1][0]]).columns:
 
 
 
 
 
854
  if pd.api.types.is_numeric_dtype(df[col]):
855
  input_data[col] = st.number_input(f"Enter {col}", value=df[col].mean())
856
  else:
@@ -858,10 +863,13 @@ elif app_mode == "Predictions":
858
 
859
  # Prediction Button
860
  if st.button("Make Prediction"):
861
- input_df = pd.DataFrame([input_data])
862
- prediction = st.session_state.model.predict(input_df)[0]
863
- st.subheader("Prediction Result")
864
- st.write(f"The predicted value is: {prediction}")
 
 
 
865
  else:
866
  st.write("Please train a model first in the 'Model Training' section.")
867
 
@@ -923,7 +931,50 @@ elif app_mode == "Visualization Lab":
923
  y_col_funnel = st.selectbox("Select Y Column for Funnel Chart", df.columns)
924
  fig = px.funnel(df, x=x_col_funnel, y=y_col_funnel)
925
  st.plotly_chart(fig, use_container_width=True)
926
- elif app_mode == "Neural Network Studio":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
927
  st.title("🧠 Neural Network Studio")
928
 
929
  if st.session_state.cleaned_data is not None:
@@ -967,148 +1018,118 @@ elif app_mode == "Visualization Lab":
967
  # Model Training Button
968
  if st.button("Train Neural Network Model"):
969
  with st.spinner("Training neural network model..."):
970
- # Split data
971
- X = df[feature_columns]
972
- y = df[target_column]
973
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
974
-
975
- # Preprocessing
976
- numeric_transformer = Pipeline(steps=[
977
- ('imputer', SimpleImputer(strategy='median')),
978
- ('scaler', StandardScaler())
979
- ])
980
- categorical_transformer = Pipeline(steps=[
981
- ('imputer', SimpleImputer(strategy='most_frequent')),
982
- ('onehot', OneHotEncoder(handle_unknown='ignore'))
983
- ])
984
-
985
- numeric_features = X_train.select_dtypes(include=['int64', 'float64']).columns
986
- categorical_features = X_train.select_dtypes(include=['object']).columns
987
-
988
- preprocessor = ColumnTransformer(
989
- transformers=[
990
- ('num', numeric_transformer, numeric_features),
991
- ('cat', categorical_transformer, categorical_features)
992
  ])
993
 
994
- X_train_processed = preprocessor.fit_transform(X_train)
995
- X_test_processed = preprocessor.transform(X_test)
996
-
997
- # Neural Network Model Selection and Training
998
- if model_type == "Simple Neural Network":
999
- import tensorflow as tf
1000
- from tensorflow import keras
1001
- from tensorflow.keras import layers
1002
-
1003
- model = keras.Sequential()
1004
- model.add(layers.Input(shape=(X_train_processed.shape[1],)))
1005
- for _ in range(hidden_layers):
1006
- model.add(layers.Dense(neurons_per_layer, activation='relu'))
1007
- model.add(layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)), activation='linear' if problem_type == "Regression" else 'softmax'))
1008
-
1009
- model.compile(optimizer='adam',
1010
- loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
1011
- metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
1012
-
1013
- model.fit(X_train_processed, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=0)
1014
-
1015
- y_pred = model.predict(X_test_processed)
1016
- if problem_type == "Classification":
1017
- y_pred = np.argmax(y_pred, axis=1)
1018
-
1019
- elif model_type == "Convolutional Neural Network (CNN)":
1020
- import tensorflow as tf
1021
- from tensorflow import keras
1022
- from tensorflow.keras import layers
1023
-
1024
- # Reshape data for CNN (example for 1D CNN)
1025
- X_train_cnn = np.expand_dims(X_train_processed, axis=2)
1026
- X_test_cnn = np.expand_dims(X_test_processed, axis=2)
1027
-
1028
- model = keras.Sequential()
1029
- model.add(layers.Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
1030
- model.add(layers.MaxPooling1D(pool_size=2))
1031
- model.add(layers.Flatten())
1032
- model.add(layers.Dense(50, activation='relu'))
1033
- model.add(layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)), activation='linear' if problem_type == "Regression" else 'softmax'))
1034
-
1035
- model.compile(optimizer='adam',
1036
- loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
1037
- metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
1038
-
1039
- model.fit(X_train_cnn, y_train, epochs=epochs_cnn, batch_size=batch_size_cnn, validation_split=0.2, verbose=0)
1040
-
1041
- y_pred = model.predict(X_test_cnn)
1042
- if problem_type == "Classification":
1043
- y_pred = np.argmax(y_pred, axis=1)
1044
-
1045
- elif model_type == "Recurrent Neural Network (RNN)":
1046
- import tensorflow as tf
1047
- from tensorflow import keras
1048
- from tensorflow.keras import layers
1049
-
1050
- # Reshape data for RNN (example for simple RNN)
1051
- X_train_rnn = np.reshape(X_train_processed, (X_train_processed.shape[0], sequence_length, X_train_processed.shape[1] // sequence_length))
1052
- X_test_rnn = np.reshape(X_test_processed, (X_test_processed.shape[0], sequence_length, X_test_processed.shape[1] // sequence_length))
1053
-
1054
- model = keras.Sequential()
1055
- model.add(layers.SimpleRNN(50, activation='relu', input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])))
1056
- model.add(layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)), activation='linear' if problem_type == "Regression" else 'softmax'))
1057
-
1058
- model.compile(optimizer='adam',
1059
- loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
1060
- metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
1061
-
1062
- model.fit(X_train_rnn, y_train, epochs=epochs_rnn, batch_size=batch_size_rnn, validation_split=0.2, verbose=0)
1063
-
1064
- y_pred = model.predict(X_test_rnn)
1065
- if problem_type == "Classification":
1066
- y_pred = np.argmax(y_pred, axis=1)
1067
-
1068
- # Evaluation
1069
- if problem_type == "Regression":
1070
- mse = mean_squared_error(y_test, y_pred)
1071
- rmse = np.sqrt(mse)
1072
- mae = mean_absolute_error(y_test, y_pred)
1073
- r2 = r2_score(y_test, y_pred)
1074
- st.write(f"Mean Squared Error: {mse:.4f}")
1075
- st.write(f"Root Mean Squared Error: {rmse:.4f}")
1076
- st.write(f"Mean Absolute Error: {mae:.4f}")
1077
- st.write(f"R-squared: {r2:.4f}")
1078
- else:
1079
- accuracy = accuracy_score(y_test, y_pred)
1080
- precision = precision_score(y_test, y_pred, average='weighted')
1081
- recall = recall_score(y_test, y_pred, average='weighted')
1082
- f1 = f1_score(y_test, y_pred, average='weighted')
1083
- st.write(f"Accuracy: {accuracy:.4f}")
1084
- st.write(f"Precision: {precision:.4f}")
1085
- st.write(f"Recall: {recall:.4f}")
1086
- st.write(f"F1 Score: {f1:.4f}")
1087
- # Clustering Analysis (Example: K-Means)
1088
- if app_mode == "Visualization Lab" and st.session_state.cleaned_data is not None:
1089
- st.subheader("Clustering Analysis")
1090
- df = st.session_state.cleaned_data.copy()
1091
-
1092
- # Select columns for clustering
1093
- cluster_cols = st.multiselect("Select Columns for Clustering", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:2])
1094
-
1095
- if cluster_cols:
1096
- # Number of clusters
1097
- n_clusters = st.slider("Number of Clusters", 2, 10, 3, help="Number of clusters to form.")
1098
-
1099
- # Apply K-Means clustering
1100
- kmeans = KMeans(n_clusters=n_clusters, random_state=42)
1101
- clusters = kmeans.fit_predict(df[cluster_cols])
1102
-
1103
- # Add cluster labels to the DataFrame
1104
- df['Cluster'] = clusters
1105
-
1106
- # Visualize clusters
1107
- if len(cluster_cols) == 2:
1108
- fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
1109
- st.plotly_chart(fig, use_container_width=True)
1110
- elif len(cluster_cols) == 3:
1111
- fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
1112
- st.plotly_chart(fig, use_container_width=True)
1113
- else:
1114
- st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
 
850
  # Input data for prediction
851
  st.subheader("Enter Data for Prediction")
852
  input_data = {}
853
+ model_columns = st.session_state.model.steps[0][1].transformers_[0][2] + st.session_state.model.steps[0][1].transformers_[1][2]
854
+ if not set(model_columns).issubset(set(df.drop(columns=[st.session_state.model.steps[-1][0]]).columns)):
855
+ st.error("The model was trained on a dataframe that contains different columns than the currently uploaded dataframe. Please upload the correct dataframe.")
856
+ st.stop()
857
+
858
+ for col in model_columns:
859
  if pd.api.types.is_numeric_dtype(df[col]):
860
  input_data[col] = st.number_input(f"Enter {col}", value=df[col].mean())
861
  else:
 
863
 
864
  # Prediction Button
865
  if st.button("Make Prediction"):
866
+ try:
867
+ input_df = pd.DataFrame([input_data])
868
+ prediction = st.session_state.model.predict(input_df)[0]
869
+ st.subheader("Prediction Result")
870
+ st.write(f"The predicted value is: {prediction}")
871
+ except Exception as e:
872
+ st.error(f"An error occurred during prediction: {e}")
873
  else:
874
  st.write("Please train a model first in the 'Model Training' section.")
875
 
 
931
  y_col_funnel = st.selectbox("Select Y Column for Funnel Chart", df.columns)
932
  fig = px.funnel(df, x=x_col_funnel, y=y_col_funnel)
933
  st.plotly_chart(fig, use_container_width=True)
934
+
935
+ elif app_mode == "Visualization Lab" and st.session_state.cleaned_data is not None:
936
+ st.subheader("Clustering Analysis")
937
+ df = st.session_state.cleaned_data.copy()
938
+
939
+ # Select columns for clustering
940
+ numerical_cols = df.select_dtypes(include=np.number).columns.tolist()
941
+
942
+ if not numerical_cols:
943
+ st.warning("No numerical columns found for clustering.")
944
+ else:
945
+ cluster_cols = st.multiselect("Select Columns for Clustering", numerical_cols, default=numerical_cols[:2] if len(numerical_cols) >= 2 else numerical_cols)
946
+
947
+ if cluster_cols:
948
+ try:
949
+ # Scale the data
950
+ scaler = StandardScaler()
951
+ scaled_data = scaler.fit_transform(df[cluster_cols])
952
+
953
+ # Number of clusters
954
+ n_clusters = st.slider("Number of Clusters", 2, 10, 3, help="Number of clusters to form.")
955
+
956
+ # Apply K-Means clustering
957
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42)
958
+ clusters = kmeans.fit_predict(scaled_data)
959
+
960
+ # Add cluster labels to the DataFrame
961
+ df['Cluster'] = clusters
962
+
963
+ # Visualize clusters
964
+ if len(cluster_cols) == 2:
965
+ fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
966
+ st.plotly_chart(fig, use_container_width=True)
967
+ elif len(cluster_cols) == 3:
968
+ fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
969
+ st.plotly_chart(fig, use_container_width=True)
970
+ else:
971
+ st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
972
+
973
+ st.success("Clustering applied successfully!")
974
+ except Exception as e:
975
+ st.error(f"An error occurred during clustering: {e}")
976
+
977
+ elif app_mode == "Neural Network Studio":
978
  st.title("🧠 Neural Network Studio")
979
 
980
  if st.session_state.cleaned_data is not None:
 
1018
  # Model Training Button
1019
  if st.button("Train Neural Network Model"):
1020
  with st.spinner("Training neural network model..."):
1021
+ try:
1022
+ # Split data
1023
+ X = df[feature_columns]
1024
+ y = df[target_column]
1025
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
1026
+
1027
+ # Preprocessing
1028
+ numeric_transformer = Pipeline(steps=[
1029
+ ('imputer', SimpleImputer(strategy='median')),
1030
+ ('scaler', StandardScaler())
1031
+ ])
1032
+ categorical_transformer = Pipeline(steps=[
1033
+ ('imputer', SimpleImputer(strategy='most_frequent')),
1034
+ ('onehot', OneHotEncoder(handle_unknown='ignore'))
 
 
 
 
 
 
 
 
1035
  ])
1036
 
1037
+ numeric_features = X_train.select_dtypes(include=['int64', 'float64']).columns
1038
+ categorical_features = X_train.select_dtypes(include=['object']).columns
1039
+
1040
+ preprocessor = ColumnTransformer(
1041
+ transformers=[
1042
+ ('num', numeric_transformer, numeric_features),
1043
+ ('cat', categorical_transformer, categorical_features)
1044
+ ])
1045
+
1046
+ X_train_processed = preprocessor.fit_transform(X_train)
1047
+ X_test_processed = preprocessor.transform(X_test)
1048
+
1049
+ # Neural Network Model Selection and Training
1050
+ if model_type == "Simple Neural Network":
1051
+ model = keras.Sequential()
1052
+ model.add(layers.Input(shape=(X_train_processed.shape[1],)))
1053
+ for _ in range(hidden_layers):
1054
+ model.add(layers.Dense(neurons_per_layer, activation='relu'))
1055
+ model.add(layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)), activation='linear' if problem_type == "Regression" else 'softmax'))
1056
+
1057
+ model.compile(optimizer='adam',
1058
+ loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
1059
+ metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
1060
+
1061
+ model.fit(X_train_processed, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=0)
1062
+
1063
+ y_pred = model.predict(X_test_processed)
1064
+ if problem_type == "Classification":
1065
+ y_pred = np.argmax(y_pred, axis=1)
1066
+
1067
+ elif model_type == "Convolutional Neural Network (CNN)":
1068
+ X_train_cnn = np.expand_dims(X_train_processed, axis=2)
1069
+ X_test_cnn = np.expand_dims(X_test_processed, axis=2)
1070
+
1071
+ model = keras.Sequential()
1072
+ model.add(layers.Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
1073
+ model.add(layers.MaxPooling1D(pool_size=2))
1074
+ model.add(layers.Flatten())
1075
+ model.add(layers.Dense(50, activation='relu'))
1076
+ model.add(layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)), activation='linear' if problem_type == "Regression" else 'softmax'))
1077
+
1078
+ model.compile(optimizer='adam',
1079
+ loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
1080
+ metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
1081
+
1082
+ model.fit(X_train_cnn, y_train, epochs=epochs_cnn, batch_size=batch_size_cnn, validation_split=0.2, verbose=0)
1083
+
1084
+ y_pred = model.predict(X_test_cnn)
1085
+ if problem_type == "Classification":
1086
+ y_pred = np.argmax(y_pred, axis=1)
1087
+
1088
+ elif model_type == "Recurrent Neural Network (RNN)":
1089
+ try:
1090
+ X_train_rnn = np.reshape(X_train_processed, (X_train_processed.shape[0], sequence_length, X_train_processed.shape[1] // sequence_length))
1091
+ X_test_rnn = np.reshape(X_test_processed, (X_test_processed.shape[0], sequence_length, X_test_processed.shape[1] // sequence_length))
1092
+
1093
+ model = keras.Sequential()
1094
+ model.add(layers.SimpleRNN(50, activation='relu', input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])))
1095
+ model.add(layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)), activation='linear' if problem_type == "Regression" else 'softmax'))
1096
+
1097
+ model.compile(optimizer='adam',
1098
+ loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
1099
+ metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
1100
+
1101
+ model.fit(X_train_rnn, y_train, epochs=epochs_rnn, batch_size=batch_size_rnn, validation_split=0.2, verbose=0)
1102
+
1103
+ y_pred = model.predict(X_test_rnn)
1104
+ if problem_type == "Classification":
1105
+ y_pred = np.argmax(y_pred, axis=1)
1106
+ except Exception as e:
1107
+ st.error(f"Error during RNN training: {e}")
1108
+ st.stop() # Stop execution if RNN fails
1109
+
1110
+ # Evaluation
1111
+ if problem_type == "Regression":
1112
+ mse = mean_squared_error(y_test, y_pred)
1113
+ rmse = np.sqrt(mse)
1114
+ mae = mean_absolute_error(y_test, y_pred)
1115
+ r2 = r2_score(y_test, y_pred)
1116
+ st.write(f"Mean Squared Error: {mse:.4f}")
1117
+ st.write(f"Root Mean Squared Error: {rmse:.4f}")
1118
+ st.write(f"Mean Absolute Error: {mae:.4f}")
1119
+ st.write(f"R-squared: {r2:.4f}")
1120
+ else:
1121
+ accuracy = accuracy_score(y_test, y_pred)
1122
+ precision = precision_score(y_test, y_pred, average='weighted', zero_division=0) #Added zero_division
1123
+ recall = recall_score(y_test, y_pred, average='weighted', zero_division=0) #Added zero_division
1124
+ f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0) #Added zero_division
1125
+ st.write(f"Accuracy: {accuracy:.4f}")
1126
+ st.write(f"Precision: {precision:.4f}")
1127
+ st.write(f"Recall: {recall:.4f}")
1128
+ st.write(f"F1 Score: {f1:.4f}")
1129
+ st.write("Classification Report:")
1130
+ st.text(classification_report(y_test, y_pred)) #added classification report
1131
+
1132
+ st.success("Model trained successfully!")
1133
+
1134
+ except Exception as e:
1135
+ st.error(f"An error occurred during training: {e}")