alibayram commited on
Commit
ee582b8
1 Parent(s): c3f7a0c

add car evaluation analysis dashboard with data overview, exploratory analysis, model training, and comparison features

Browse files
Files changed (1) hide show
  1. app.py +293 -2
app.py CHANGED
@@ -1,4 +1,295 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.preprocessing import OneHotEncoder
8
+ from sklearn.svm import SVC
9
+ from sklearn.ensemble import RandomForestClassifier
10
+ from sklearn.linear_model import LogisticRegression
11
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
12
+ from ucimlrepo import fetch_ucirepo
13
 
14
+ # Page configuration
15
+ st.set_page_config(
16
+ page_title="Car Evaluation Analysis",
17
+ page_icon="🚗",
18
+ layout="wide"
19
+ )
20
+
21
+ # Title and introduction
22
+ st.title("🚗 Car Evaluation Analysis Dashboard")
23
+ st.markdown("""
24
+ This dashboard analyzes car evaluation data using different machine learning models.
25
+ The dataset includes various car attributes and their evaluation classifications.
26
+ """)
27
+
28
+
29
+ # Load and prepare data
30
+ @st.cache_data
31
+ def load_data():
32
+ car_evaluation = fetch_ucirepo(id=19)
33
+ X, y = car_evaluation.data.features, car_evaluation.data.targets
34
+ df = pd.concat([X, y], axis=1)
35
+ return df, X, y
36
+
37
+
38
+ df, X, y = load_data()
39
+
40
+ # Sidebar
41
+ st.sidebar.header("Navigation")
42
+ page = st.sidebar.radio("Go to", ["Data Overview", "Exploratory Analysis", "Model Training", "Model Comparison"])
43
+
44
+ # Data Overview Page
45
+ if page == "Data Overview":
46
+ st.header("Dataset Overview")
47
+
48
+ # Display metrics in cards
49
+ col1, col2, col3, col4 = st.columns(4)
50
+
51
+ with col1:
52
+ st.metric(
53
+ label="Total Records",
54
+ value=f"{len(df):,}"
55
+ )
56
+
57
+ with col2:
58
+ st.metric(
59
+ label="Features",
60
+ value=len(df.columns) - 1
61
+ )
62
+
63
+ with col3:
64
+ st.metric(
65
+ label="Target Classes",
66
+ value=len(df['class'].unique())
67
+ )
68
+
69
+ with col4:
70
+ st.metric(
71
+ label="Missing Values",
72
+ value=df.isnull().sum().sum()
73
+ )
74
+
75
+ st.write("")
76
+
77
+ # Sample Data
78
+ st.subheader("Sample Data")
79
+ st.dataframe(
80
+ df.head(),
81
+ use_container_width=True,
82
+ height=230
83
+ )
84
+
85
+ # Target Class Distribution
86
+ st.subheader("Target Class Distribution")
87
+
88
+ col1, col2 = st.columns([2, 1])
89
+
90
+ with col1:
91
+ fig, ax = plt.subplots(figsize=(10, 6))
92
+ sns.countplot(data=df, x='class', palette='viridis')
93
+ plt.title('Distribution of Car Evaluations')
94
+ st.pyplot(fig)
95
+
96
+ with col2:
97
+ st.write("")
98
+ st.write("")
99
+ class_distribution = df['class'].value_counts()
100
+ for class_name, count in class_distribution.items():
101
+ st.metric(
102
+ label=class_name,
103
+ value=count
104
+ )
105
+
106
+ # Exploratory Analysis Page
107
+ elif page == "Exploratory Analysis":
108
+ st.header("Exploratory Data Analysis")
109
+
110
+ # Feature Distribution
111
+ st.subheader("Feature Distributions")
112
+ feature_to_plot = st.selectbox("Select Feature", df.columns[:-1])
113
+
114
+ fig, ax = plt.subplots(figsize=(10, 6))
115
+ sns.countplot(data=df, x=feature_to_plot, palette='coolwarm')
116
+ plt.title(f'Distribution of {feature_to_plot}')
117
+ plt.xticks(rotation=45)
118
+ st.pyplot(fig)
119
+
120
+ # Feature vs Target
121
+ st.subheader("Feature vs Target Class")
122
+ fig, ax = plt.subplots(figsize=(12, 6))
123
+ sns.countplot(data=df, x=feature_to_plot, hue='class', palette='Set2')
124
+ plt.title(f'{feature_to_plot} Distribution by Class')
125
+ plt.xticks(rotation=45)
126
+ st.pyplot(fig)
127
+
128
+ # Correlation Heatmap
129
+ st.subheader("Correlation Heatmap")
130
+ encoded_df = pd.get_dummies(df, drop_first=True)
131
+ fig, ax = plt.subplots(figsize=(12, 8))
132
+ sns.heatmap(encoded_df.corr(), annot=True, fmt='.2f', cmap='coolwarm')
133
+ plt.title('Correlation Heatmap of Encoded Features')
134
+ st.pyplot(fig)
135
+
136
+ # Model Training Page
137
+ elif page == "Model Training":
138
+ st.header("Model Training and Evaluation")
139
+
140
+ # Data preprocessing
141
+ encoder = OneHotEncoder(sparse_output=False)
142
+ X_encoded = encoder.fit_transform(X)
143
+ y_encoded = y.values.ravel()
144
+
145
+ # Train-test split
146
+ test_size = st.slider("Select Test Size", 0.1, 0.4, 0.2, 0.05)
147
+ X_train, X_test, y_train, y_test = train_test_split(
148
+ X_encoded, y_encoded, test_size=test_size, random_state=42
149
+ )
150
+
151
+ # Model selection
152
+ model_choice = st.selectbox(
153
+ "Select Model",
154
+ ["Support Vector Machine", "Random Forest", "Logistic Regression"]
155
+ )
156
+
157
+ if st.button("Train Model"):
158
+ with st.spinner("Training model..."):
159
+ if model_choice == "Support Vector Machine":
160
+ model = SVC(kernel='linear', random_state=42)
161
+ elif model_choice == "Random Forest":
162
+ model = RandomForestClassifier(n_estimators=100, random_state=42)
163
+ else:
164
+ model = LogisticRegression(max_iter=500, random_state=42)
165
+
166
+ model.fit(X_train, y_train)
167
+ y_pred = model.predict(X_test)
168
+
169
+ # Display results
170
+ col1, col2 = st.columns(2)
171
+
172
+ with col1:
173
+ st.subheader("Model Performance")
174
+ accuracy = accuracy_score(y_test, y_pred)
175
+ st.metric(label="Accuracy", value=f"{accuracy:.4f}")
176
+ st.text("Classification Report:")
177
+ st.text(classification_report(y_test, y_pred))
178
+
179
+ with col2:
180
+ st.subheader("Confusion Matrix")
181
+ fig, ax = plt.subplots(figsize=(8, 6))
182
+ sns.heatmap(
183
+ confusion_matrix(y_test, y_pred),
184
+ annot=True,
185
+ fmt='d',
186
+ cmap='Blues',
187
+ xticklabels=np.unique(y_test),
188
+ yticklabels=np.unique(y_test)
189
+ )
190
+ plt.title(f'{model_choice} Confusion Matrix')
191
+ plt.xlabel('Predicted')
192
+ plt.ylabel('Actual')
193
+ st.pyplot(fig)
194
+
195
+ # Feature importance for Random Forest
196
+ if model_choice == "Random Forest":
197
+ st.subheader("Feature Importance")
198
+ feature_importance = pd.DataFrame({
199
+ 'feature': encoder.get_feature_names_out(),
200
+ 'importance': model.feature_importances_
201
+ })
202
+ feature_importance = feature_importance.sort_values(
203
+ 'importance', ascending=False
204
+ ).head(10)
205
+
206
+ fig, ax = plt.subplots(figsize=(10, 6))
207
+ sns.barplot(
208
+ data=feature_importance,
209
+ x='importance',
210
+ y='feature'
211
+ )
212
+ plt.title('Top 10 Most Important Features')
213
+ st.pyplot(fig)
214
+
215
+ # Model Comparison Page
216
+ else:
217
+ st.header("Model Comparison")
218
+
219
+ if st.button("Compare All Models"):
220
+ with st.spinner("Training all models..."):
221
+ # Data preprocessing
222
+ encoder = OneHotEncoder(sparse_output=False)
223
+ X_encoded = encoder.fit_transform(X)
224
+ y_encoded = y.values.ravel()
225
+
226
+ # Train-test split
227
+ X_train, X_test, y_train, y_test = train_test_split(
228
+ X_encoded, y_encoded, test_size=0.2, random_state=42
229
+ )
230
+
231
+ # Train all models
232
+ models = {
233
+ "SVM": SVC(kernel='linear', random_state=42),
234
+ "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
235
+ "Logistic Regression": LogisticRegression(max_iter=500, random_state=42)
236
+ }
237
+
238
+ results = {}
239
+ for name, model in models.items():
240
+ model.fit(X_train, y_train)
241
+ y_pred = model.predict(X_test)
242
+ results[name] = {
243
+ 'accuracy': accuracy_score(y_test, y_pred),
244
+ 'predictions': y_pred
245
+ }
246
+
247
+ # Display comparison results
248
+ st.subheader("Accuracy Comparison")
249
+ accuracy_df = pd.DataFrame({
250
+ 'Model': list(results.keys()),
251
+ 'Accuracy': [results[model]['accuracy'] for model in results.keys()]
252
+ })
253
+
254
+ col1, col2 = st.columns(2)
255
+
256
+ with col1:
257
+ st.dataframe(accuracy_df)
258
+
259
+ with col2:
260
+ fig, ax = plt.subplots(figsize=(10, 6))
261
+ sns.barplot(
262
+ data=accuracy_df,
263
+ x='Model',
264
+ y='Accuracy',
265
+ palette='viridis'
266
+ )
267
+ plt.title('Model Accuracy Comparison')
268
+ plt.ylim(0, 1)
269
+ st.pyplot(fig)
270
+
271
+ # Detailed model comparison
272
+ st.subheader("Detailed Model Performance")
273
+ for name in results.keys():
274
+ st.write(f"\n{name}:")
275
+ st.text(classification_report(y_test, results[name]['predictions']))
276
+
277
+ fig, ax = plt.subplots(figsize=(8, 6))
278
+ sns.heatmap(
279
+ confusion_matrix(y_test, results[name]['predictions']),
280
+ annot=True,
281
+ fmt='d',
282
+ cmap='Blues',
283
+ xticklabels=np.unique(y_test),
284
+ yticklabels=np.unique(y_test)
285
+ )
286
+ plt.title(f'{name} Confusion Matrix')
287
+ plt.xlabel('Predicted')
288
+ plt.ylabel('Actual')
289
+ st.pyplot(fig)
290
+
291
+ # Footer
292
+ st.markdown("""
293
+ ---
294
+ Created with ❤️ using Streamlit
295
+ """)