Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -850,7 +850,12 @@ elif app_mode == "Predictions":
|
|
850 |
# Input data for prediction
|
851 |
st.subheader("Enter Data for Prediction")
|
852 |
input_data = {}
|
853 |
-
|
|
|
|
|
|
|
|
|
|
|
854 |
if pd.api.types.is_numeric_dtype(df[col]):
|
855 |
input_data[col] = st.number_input(f"Enter {col}", value=df[col].mean())
|
856 |
else:
|
@@ -858,10 +863,13 @@ elif app_mode == "Predictions":
|
|
858 |
|
859 |
# Prediction Button
|
860 |
if st.button("Make Prediction"):
|
861 |
-
|
862 |
-
|
863 |
-
|
864 |
-
|
|
|
|
|
|
|
865 |
else:
|
866 |
st.write("Please train a model first in the 'Model Training' section.")
|
867 |
|
@@ -923,7 +931,50 @@ elif app_mode == "Visualization Lab":
|
|
923 |
y_col_funnel = st.selectbox("Select Y Column for Funnel Chart", df.columns)
|
924 |
fig = px.funnel(df, x=x_col_funnel, y=y_col_funnel)
|
925 |
st.plotly_chart(fig, use_container_width=True)
|
926 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
927 |
st.title("🧠 Neural Network Studio")
|
928 |
|
929 |
if st.session_state.cleaned_data is not None:
|
@@ -967,148 +1018,118 @@ elif app_mode == "Visualization Lab":
|
|
967 |
# Model Training Button
|
968 |
if st.button("Train Neural Network Model"):
|
969 |
with st.spinner("Training neural network model..."):
|
970 |
-
|
971 |
-
|
972 |
-
|
973 |
-
|
974 |
-
|
975 |
-
|
976 |
-
|
977 |
-
|
978 |
-
|
979 |
-
|
980 |
-
|
981 |
-
|
982 |
-
|
983 |
-
|
984 |
-
|
985 |
-
numeric_features = X_train.select_dtypes(include=['int64', 'float64']).columns
|
986 |
-
categorical_features = X_train.select_dtypes(include=['object']).columns
|
987 |
-
|
988 |
-
preprocessor = ColumnTransformer(
|
989 |
-
transformers=[
|
990 |
-
('num', numeric_transformer, numeric_features),
|
991 |
-
('cat', categorical_transformer, categorical_features)
|
992 |
])
|
993 |
|
994 |
-
|
995 |
-
|
996 |
-
|
997 |
-
|
998 |
-
|
999 |
-
|
1000 |
-
|
1001 |
-
|
1002 |
-
|
1003 |
-
|
1004 |
-
|
1005 |
-
|
1006 |
-
|
1007 |
-
|
1008 |
-
|
1009 |
-
|
1010 |
-
|
1011 |
-
|
1012 |
-
|
1013 |
-
|
1014 |
-
|
1015 |
-
|
1016 |
-
|
1017 |
-
|
1018 |
-
|
1019 |
-
|
1020 |
-
|
1021 |
-
|
1022 |
-
|
1023 |
-
|
1024 |
-
|
1025 |
-
|
1026 |
-
|
1027 |
-
|
1028 |
-
|
1029 |
-
|
1030 |
-
|
1031 |
-
|
1032 |
-
|
1033 |
-
|
1034 |
-
|
1035 |
-
|
1036 |
-
|
1037 |
-
|
1038 |
-
|
1039 |
-
|
1040 |
-
|
1041 |
-
|
1042 |
-
|
1043 |
-
|
1044 |
-
|
1045 |
-
|
1046 |
-
|
1047 |
-
|
1048 |
-
|
1049 |
-
|
1050 |
-
|
1051 |
-
|
1052 |
-
|
1053 |
-
|
1054 |
-
|
1055 |
-
|
1056 |
-
|
1057 |
-
|
1058 |
-
|
1059 |
-
|
1060 |
-
|
1061 |
-
|
1062 |
-
|
1063 |
-
|
1064 |
-
|
1065 |
-
|
1066 |
-
|
1067 |
-
|
1068 |
-
|
1069 |
-
|
1070 |
-
|
1071 |
-
|
1072 |
-
|
1073 |
-
|
1074 |
-
|
1075 |
-
|
1076 |
-
|
1077 |
-
|
1078 |
-
|
1079 |
-
|
1080 |
-
|
1081 |
-
|
1082 |
-
|
1083 |
-
|
1084 |
-
|
1085 |
-
|
1086 |
-
|
1087 |
-
|
1088 |
-
|
1089 |
-
|
1090 |
-
|
1091 |
-
|
1092 |
-
|
1093 |
-
cluster_cols = st.multiselect("Select Columns for Clustering", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:2])
|
1094 |
-
|
1095 |
-
if cluster_cols:
|
1096 |
-
# Number of clusters
|
1097 |
-
n_clusters = st.slider("Number of Clusters", 2, 10, 3, help="Number of clusters to form.")
|
1098 |
-
|
1099 |
-
# Apply K-Means clustering
|
1100 |
-
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
1101 |
-
clusters = kmeans.fit_predict(df[cluster_cols])
|
1102 |
-
|
1103 |
-
# Add cluster labels to the DataFrame
|
1104 |
-
df['Cluster'] = clusters
|
1105 |
-
|
1106 |
-
# Visualize clusters
|
1107 |
-
if len(cluster_cols) == 2:
|
1108 |
-
fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
|
1109 |
-
st.plotly_chart(fig, use_container_width=True)
|
1110 |
-
elif len(cluster_cols) == 3:
|
1111 |
-
fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
|
1112 |
-
st.plotly_chart(fig, use_container_width=True)
|
1113 |
-
else:
|
1114 |
-
st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
|
|
|
850 |
# Input data for prediction
|
851 |
st.subheader("Enter Data for Prediction")
|
852 |
input_data = {}
|
853 |
+
model_columns = st.session_state.model.steps[0][1].transformers_[0][2] + st.session_state.model.steps[0][1].transformers_[1][2]
|
854 |
+
if not set(model_columns).issubset(set(df.drop(columns=[st.session_state.model.steps[-1][0]]).columns)):
|
855 |
+
st.error("The model was trained on a dataframe that contains different columns than the currently uploaded dataframe. Please upload the correct dataframe.")
|
856 |
+
st.stop()
|
857 |
+
|
858 |
+
for col in model_columns:
|
859 |
if pd.api.types.is_numeric_dtype(df[col]):
|
860 |
input_data[col] = st.number_input(f"Enter {col}", value=df[col].mean())
|
861 |
else:
|
|
|
863 |
|
864 |
# Prediction Button
|
865 |
if st.button("Make Prediction"):
|
866 |
+
try:
|
867 |
+
input_df = pd.DataFrame([input_data])
|
868 |
+
prediction = st.session_state.model.predict(input_df)[0]
|
869 |
+
st.subheader("Prediction Result")
|
870 |
+
st.write(f"The predicted value is: {prediction}")
|
871 |
+
except Exception as e:
|
872 |
+
st.error(f"An error occurred during prediction: {e}")
|
873 |
else:
|
874 |
st.write("Please train a model first in the 'Model Training' section.")
|
875 |
|
|
|
931 |
y_col_funnel = st.selectbox("Select Y Column for Funnel Chart", df.columns)
|
932 |
fig = px.funnel(df, x=x_col_funnel, y=y_col_funnel)
|
933 |
st.plotly_chart(fig, use_container_width=True)
|
934 |
+
|
935 |
+
elif app_mode == "Visualization Lab" and st.session_state.cleaned_data is not None:
|
936 |
+
st.subheader("Clustering Analysis")
|
937 |
+
df = st.session_state.cleaned_data.copy()
|
938 |
+
|
939 |
+
# Select columns for clustering
|
940 |
+
numerical_cols = df.select_dtypes(include=np.number).columns.tolist()
|
941 |
+
|
942 |
+
if not numerical_cols:
|
943 |
+
st.warning("No numerical columns found for clustering.")
|
944 |
+
else:
|
945 |
+
cluster_cols = st.multiselect("Select Columns for Clustering", numerical_cols, default=numerical_cols[:2] if len(numerical_cols) >= 2 else numerical_cols)
|
946 |
+
|
947 |
+
if cluster_cols:
|
948 |
+
try:
|
949 |
+
# Scale the data
|
950 |
+
scaler = StandardScaler()
|
951 |
+
scaled_data = scaler.fit_transform(df[cluster_cols])
|
952 |
+
|
953 |
+
# Number of clusters
|
954 |
+
n_clusters = st.slider("Number of Clusters", 2, 10, 3, help="Number of clusters to form.")
|
955 |
+
|
956 |
+
# Apply K-Means clustering
|
957 |
+
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
958 |
+
clusters = kmeans.fit_predict(scaled_data)
|
959 |
+
|
960 |
+
# Add cluster labels to the DataFrame
|
961 |
+
df['Cluster'] = clusters
|
962 |
+
|
963 |
+
# Visualize clusters
|
964 |
+
if len(cluster_cols) == 2:
|
965 |
+
fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
|
966 |
+
st.plotly_chart(fig, use_container_width=True)
|
967 |
+
elif len(cluster_cols) == 3:
|
968 |
+
fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
|
969 |
+
st.plotly_chart(fig, use_container_width=True)
|
970 |
+
else:
|
971 |
+
st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
|
972 |
+
|
973 |
+
st.success("Clustering applied successfully!")
|
974 |
+
except Exception as e:
|
975 |
+
st.error(f"An error occurred during clustering: {e}")
|
976 |
+
|
977 |
+
elif app_mode == "Neural Network Studio":
|
978 |
st.title("🧠 Neural Network Studio")
|
979 |
|
980 |
if st.session_state.cleaned_data is not None:
|
|
|
1018 |
# Model Training Button
|
1019 |
if st.button("Train Neural Network Model"):
|
1020 |
with st.spinner("Training neural network model..."):
|
1021 |
+
try:
|
1022 |
+
# Split data
|
1023 |
+
X = df[feature_columns]
|
1024 |
+
y = df[target_column]
|
1025 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
|
1026 |
+
|
1027 |
+
# Preprocessing
|
1028 |
+
numeric_transformer = Pipeline(steps=[
|
1029 |
+
('imputer', SimpleImputer(strategy='median')),
|
1030 |
+
('scaler', StandardScaler())
|
1031 |
+
])
|
1032 |
+
categorical_transformer = Pipeline(steps=[
|
1033 |
+
('imputer', SimpleImputer(strategy='most_frequent')),
|
1034 |
+
('onehot', OneHotEncoder(handle_unknown='ignore'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1035 |
])
|
1036 |
|
1037 |
+
numeric_features = X_train.select_dtypes(include=['int64', 'float64']).columns
|
1038 |
+
categorical_features = X_train.select_dtypes(include=['object']).columns
|
1039 |
+
|
1040 |
+
preprocessor = ColumnTransformer(
|
1041 |
+
transformers=[
|
1042 |
+
('num', numeric_transformer, numeric_features),
|
1043 |
+
('cat', categorical_transformer, categorical_features)
|
1044 |
+
])
|
1045 |
+
|
1046 |
+
X_train_processed = preprocessor.fit_transform(X_train)
|
1047 |
+
X_test_processed = preprocessor.transform(X_test)
|
1048 |
+
|
1049 |
+
# Neural Network Model Selection and Training
|
1050 |
+
if model_type == "Simple Neural Network":
|
1051 |
+
model = keras.Sequential()
|
1052 |
+
model.add(layers.Input(shape=(X_train_processed.shape[1],)))
|
1053 |
+
for _ in range(hidden_layers):
|
1054 |
+
model.add(layers.Dense(neurons_per_layer, activation='relu'))
|
1055 |
+
model.add(layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)), activation='linear' if problem_type == "Regression" else 'softmax'))
|
1056 |
+
|
1057 |
+
model.compile(optimizer='adam',
|
1058 |
+
loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
|
1059 |
+
metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
|
1060 |
+
|
1061 |
+
model.fit(X_train_processed, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=0)
|
1062 |
+
|
1063 |
+
y_pred = model.predict(X_test_processed)
|
1064 |
+
if problem_type == "Classification":
|
1065 |
+
y_pred = np.argmax(y_pred, axis=1)
|
1066 |
+
|
1067 |
+
elif model_type == "Convolutional Neural Network (CNN)":
|
1068 |
+
X_train_cnn = np.expand_dims(X_train_processed, axis=2)
|
1069 |
+
X_test_cnn = np.expand_dims(X_test_processed, axis=2)
|
1070 |
+
|
1071 |
+
model = keras.Sequential()
|
1072 |
+
model.add(layers.Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
|
1073 |
+
model.add(layers.MaxPooling1D(pool_size=2))
|
1074 |
+
model.add(layers.Flatten())
|
1075 |
+
model.add(layers.Dense(50, activation='relu'))
|
1076 |
+
model.add(layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)), activation='linear' if problem_type == "Regression" else 'softmax'))
|
1077 |
+
|
1078 |
+
model.compile(optimizer='adam',
|
1079 |
+
loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
|
1080 |
+
metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
|
1081 |
+
|
1082 |
+
model.fit(X_train_cnn, y_train, epochs=epochs_cnn, batch_size=batch_size_cnn, validation_split=0.2, verbose=0)
|
1083 |
+
|
1084 |
+
y_pred = model.predict(X_test_cnn)
|
1085 |
+
if problem_type == "Classification":
|
1086 |
+
y_pred = np.argmax(y_pred, axis=1)
|
1087 |
+
|
1088 |
+
elif model_type == "Recurrent Neural Network (RNN)":
|
1089 |
+
try:
|
1090 |
+
X_train_rnn = np.reshape(X_train_processed, (X_train_processed.shape[0], sequence_length, X_train_processed.shape[1] // sequence_length))
|
1091 |
+
X_test_rnn = np.reshape(X_test_processed, (X_test_processed.shape[0], sequence_length, X_test_processed.shape[1] // sequence_length))
|
1092 |
+
|
1093 |
+
model = keras.Sequential()
|
1094 |
+
model.add(layers.SimpleRNN(50, activation='relu', input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])))
|
1095 |
+
model.add(layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)), activation='linear' if problem_type == "Regression" else 'softmax'))
|
1096 |
+
|
1097 |
+
model.compile(optimizer='adam',
|
1098 |
+
loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
|
1099 |
+
metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
|
1100 |
+
|
1101 |
+
model.fit(X_train_rnn, y_train, epochs=epochs_rnn, batch_size=batch_size_rnn, validation_split=0.2, verbose=0)
|
1102 |
+
|
1103 |
+
y_pred = model.predict(X_test_rnn)
|
1104 |
+
if problem_type == "Classification":
|
1105 |
+
y_pred = np.argmax(y_pred, axis=1)
|
1106 |
+
except Exception as e:
|
1107 |
+
st.error(f"Error during RNN training: {e}")
|
1108 |
+
st.stop() # Stop execution if RNN fails
|
1109 |
+
|
1110 |
+
# Evaluation
|
1111 |
+
if problem_type == "Regression":
|
1112 |
+
mse = mean_squared_error(y_test, y_pred)
|
1113 |
+
rmse = np.sqrt(mse)
|
1114 |
+
mae = mean_absolute_error(y_test, y_pred)
|
1115 |
+
r2 = r2_score(y_test, y_pred)
|
1116 |
+
st.write(f"Mean Squared Error: {mse:.4f}")
|
1117 |
+
st.write(f"Root Mean Squared Error: {rmse:.4f}")
|
1118 |
+
st.write(f"Mean Absolute Error: {mae:.4f}")
|
1119 |
+
st.write(f"R-squared: {r2:.4f}")
|
1120 |
+
else:
|
1121 |
+
accuracy = accuracy_score(y_test, y_pred)
|
1122 |
+
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0) #Added zero_division
|
1123 |
+
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0) #Added zero_division
|
1124 |
+
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0) #Added zero_division
|
1125 |
+
st.write(f"Accuracy: {accuracy:.4f}")
|
1126 |
+
st.write(f"Precision: {precision:.4f}")
|
1127 |
+
st.write(f"Recall: {recall:.4f}")
|
1128 |
+
st.write(f"F1 Score: {f1:.4f}")
|
1129 |
+
st.write("Classification Report:")
|
1130 |
+
st.text(classification_report(y_test, y_pred)) #added classification report
|
1131 |
+
|
1132 |
+
st.success("Model trained successfully!")
|
1133 |
+
|
1134 |
+
except Exception as e:
|
1135 |
+
st.error(f"An error occurred during training: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|