Alamgirapi commited on
Commit
4dcb991
ยท
verified ยท
1 Parent(s): e6c2921

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +294 -213
app.py CHANGED
@@ -7,6 +7,7 @@ import pickle
7
  import io
8
  import traceback
9
  import sys
 
10
  from datetime import datetime
11
 
12
  # Import ML libraries with error handling
@@ -29,7 +30,7 @@ except ImportError as e:
29
  st.info("Please ensure NoCodeTextClassifier package is installed")
30
 
31
  # Set page config
32
- st.set_page_config(page_title="Debug Text Classification", page_icon="๐Ÿ”", layout="wide")
33
 
34
  # Debug section
35
  st.sidebar.header("๐Ÿ” Debug Information")
@@ -41,118 +42,107 @@ def debug_log(message, level="INFO"):
41
  timestamp = datetime.now().strftime("%H:%M:%S")
42
  st.sidebar.write(f"**{timestamp} [{level}]:** {message}")
43
 
44
- def detailed_error_info(e):
45
- """Get detailed error information"""
46
- error_type = type(e).__name__
47
- error_message = str(e)
48
- error_traceback = traceback.format_exc()
49
 
50
- return {
51
- 'type': error_type,
52
- 'message': error_message,
53
- 'traceback': error_traceback
54
- }
55
-
56
- def inspect_uploaded_file(uploaded_file):
57
- """Inspect uploaded file properties"""
58
- debug_log("๐Ÿ” Inspecting uploaded file...")
59
 
60
- try:
61
- file_info = {
62
- 'name': uploaded_file.name,
63
- 'type': uploaded_file.type,
64
- 'size': uploaded_file.size,
65
- 'file_id': getattr(uploaded_file, 'file_id', 'Not available')
66
- }
67
-
68
- debug_log(f"File name: {file_info['name']}")
69
- debug_log(f"File type: {file_info['type']}")
70
- debug_log(f"File size: {file_info['size']} bytes")
71
- debug_log(f"File ID: {file_info['file_id']}")
72
-
73
- # Try to read first few bytes
74
- uploaded_file.seek(0)
75
- first_bytes = uploaded_file.read(100)
76
- debug_log(f"First 100 bytes type: {type(first_bytes)}")
77
- debug_log(f"First 100 bytes preview: {first_bytes[:50]}...")
78
-
79
- # Reset file pointer
80
- uploaded_file.seek(0)
 
 
 
 
 
81
 
82
- return file_info
83
 
84
- except Exception as e:
85
- error_info = detailed_error_info(e)
86
- debug_log(f"โŒ Error inspecting file: {error_info['type']}: {error_info['message']}", "ERROR")
87
- st.sidebar.error(f"File inspection error: {error_info['message']}")
88
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- def safe_read_csv_debug(uploaded_file, encoding_options=['utf-8', 'latin1', 'iso-8859-1', 'cp1252']):
91
- """Safely read CSV with extensive debugging"""
92
- debug_log("๐Ÿ”„ Starting CSV read process...")
93
 
94
- # Inspect file first
95
- file_info = inspect_uploaded_file(uploaded_file)
96
- if file_info is None:
97
- return None
 
 
98
 
99
- # Try different reading methods
100
- methods = [
101
- ("Direct pandas read", lambda f: pd.read_csv(f)),
102
- ("BytesIO method", lambda f: pd.read_csv(io.BytesIO(f.read()))),
103
- ("StringIO method", lambda f: pd.read_csv(io.StringIO(f.read().decode('utf-8')))),
104
- ]
 
 
 
105
 
106
- for method_name, method_func in methods:
107
- debug_log(f"๐Ÿ”„ Trying method: {method_name}")
108
-
109
- for encoding in encoding_options:
110
- try:
111
- debug_log(f" - Attempting encoding: {encoding}")
112
- uploaded_file.seek(0)
113
-
114
- if method_name == "Direct pandas read":
115
- df = pd.read_csv(uploaded_file, encoding=encoding)
116
- elif method_name == "BytesIO method":
117
- uploaded_file.seek(0)
118
- content = uploaded_file.read()
119
- df = pd.read_csv(io.BytesIO(content), encoding=encoding)
120
- elif method_name == "StringIO method":
121
- uploaded_file.seek(0)
122
- content = uploaded_file.read()
123
- if isinstance(content, bytes):
124
- content = content.decode(encoding)
125
- df = pd.read_csv(io.StringIO(content))
126
-
127
- debug_log(f"โœ… Success with {method_name} + {encoding}")
128
- debug_log(f"DataFrame shape: {df.shape}")
129
- debug_log(f"Columns: {list(df.columns)}")
130
-
131
- st.success(f"File loaded successfully using {method_name} with {encoding} encoding")
132
- return df
133
-
134
- except UnicodeDecodeError as e:
135
- debug_log(f" - Unicode error with {encoding}: {str(e)}", "WARNING")
136
- continue
137
- except Exception as e:
138
- error_info = detailed_error_info(e)
139
- debug_log(f" - Error with {method_name} + {encoding}: {error_info['type']}: {error_info['message']}", "ERROR")
140
-
141
- # Show detailed error for 403 or permission errors
142
- if "403" in str(e) or "permission" in str(e).lower():
143
- st.error("๐Ÿšจ PERMISSION ERROR DETECTED!")
144
- st.error(f"Method: {method_name}, Encoding: {encoding}")
145
- st.error(f"Error type: {error_info['type']}")
146
- st.error(f"Error message: {error_info['message']}")
147
- st.code(error_info['traceback'])
148
-
149
- continue
150
 
151
- debug_log("โŒ All reading methods failed", "ERROR")
152
- st.error("All CSV reading methods failed. Check debug log for details.")
153
- return None
154
 
155
- # Utility functions with debugging
156
  def save_artifacts(obj, folder_name, file_name):
157
  """Save artifacts with debugging"""
158
  debug_log(f"๐Ÿ’พ Saving {file_name} to {folder_name}")
@@ -167,9 +157,8 @@ def save_artifacts(obj, folder_name, file_name):
167
  return True
168
 
169
  except Exception as e:
170
- error_info = detailed_error_info(e)
171
- debug_log(f"โŒ Error saving {file_name}: {error_info['message']}", "ERROR")
172
- st.error(f"Save error: {error_info['message']}")
173
  return False
174
 
175
  def load_artifacts(folder_name, file_name):
@@ -189,9 +178,7 @@ def load_artifacts(folder_name, file_name):
189
  return obj
190
 
191
  except Exception as e:
192
- error_info = detailed_error_info(e)
193
- debug_log(f"โŒ Error loading {file_name}: {error_info['message']}", "ERROR")
194
- st.error(f"Load error: {error_info['message']}")
195
  return None
196
 
197
  def load_model(model_name):
@@ -221,11 +208,9 @@ def predict_text(model_name, text, vectorizer_type="tfidf"):
221
  debug_log("๐Ÿงน Cleaning text...")
222
  text_cleaner = TextCleaner()
223
  clean_text = text_cleaner.clean_text(text)
224
- debug_log(f"Cleaned text preview: {clean_text[:50]}...")
225
 
226
  debug_log("๐Ÿ”ข Vectorizing text...")
227
  text_vector = vectorizer.transform([clean_text])
228
- debug_log(f"Vector shape: {text_vector.shape}")
229
 
230
  debug_log("๐ŸŽฏ Making prediction...")
231
  prediction = model.predict(text_vector)
@@ -234,7 +219,6 @@ def predict_text(model_name, text, vectorizer_type="tfidf"):
234
  if hasattr(model, 'predict_proba'):
235
  try:
236
  prediction_proba = model.predict_proba(text_vector)[0]
237
- debug_log(f"Prediction probabilities: {prediction_proba}")
238
  except:
239
  debug_log("No prediction probabilities available", "WARNING")
240
 
@@ -244,142 +228,239 @@ def predict_text(model_name, text, vectorizer_type="tfidf"):
244
  return predicted_label, prediction_proba
245
 
246
  except Exception as e:
247
- error_info = detailed_error_info(e)
248
- debug_log(f"โŒ Prediction error: {error_info['message']}", "ERROR")
249
- st.error(f"Prediction error: {error_info['message']}")
250
- if debug_mode:
251
- st.code(error_info['traceback'])
252
  return None, None
253
 
254
  # Main App
255
- st.title('๐Ÿ” Debug Text Classification App')
256
- st.write('Debug version to identify and fix issues')
257
 
258
- # Environment info
259
  if debug_mode:
260
  st.sidebar.subheader("๐Ÿ–ฅ๏ธ Environment Info")
261
  st.sidebar.write(f"Python version: {sys.version}")
262
  st.sidebar.write(f"Streamlit version: {st.__version__}")
263
- st.sidebar.write(f"Pandas version: {pd.__version__}")
264
- st.sidebar.write(f"Current working directory: {os.getcwd()}")
265
-
266
- # Check directory permissions
267
- try:
268
- test_dir = "test_permissions"
269
- os.makedirs(test_dir, exist_ok=True)
270
- test_file = os.path.join(test_dir, "test.txt")
271
- with open(test_file, 'w') as f:
272
- f.write("test")
273
- os.remove(test_file)
274
- os.rmdir(test_dir)
275
- st.sidebar.success("โœ… File system permissions OK")
276
- except Exception as e:
277
- st.sidebar.error(f"โŒ File system permission issue: {e}")
278
 
279
- # Sidebar navigation
280
- section = st.sidebar.radio("Choose Section", ["File Upload Debug", "Data Analysis", "Train Model", "Predictions"])
 
 
281
 
282
- # Session state initialization
283
- if 'vectorizer_type' not in st.session_state:
284
- st.session_state.vectorizer_type = "tfidf"
285
  if 'train_df' not in st.session_state:
286
  st.session_state.train_df = None
 
 
 
 
287
 
288
- # File Upload Debug Section
289
- if section == "File Upload Debug":
290
- st.subheader("๐Ÿ” File Upload Debugging")
291
 
292
- st.info("This section helps debug file upload issues. Upload your file and see detailed error information.")
293
 
294
- train_data = st.file_uploader("Upload training data (DEBUG MODE)", type=["csv"], key="debug_upload")
295
-
296
- if train_data is not None:
297
- st.write("### File Upload Detected!")
 
 
298
 
299
- # Show raw file info
300
- st.write("**Raw File Information:**")
301
- st.json({
302
- "name": train_data.name,
303
- "type": train_data.type if hasattr(train_data, 'type') else "Unknown",
304
- "size": train_data.size if hasattr(train_data, 'size') else "Unknown"
305
- })
 
306
 
307
- # Try to read the file
308
- st.write("### Attempting to Read File...")
309
 
310
- with st.spinner("Reading file with debug mode..."):
311
- df = safe_read_csv_debug(train_data)
 
 
 
312
 
313
- if df is not None:
314
- st.success("๐ŸŽ‰ File successfully loaded!")
315
- st.write("**Data Preview:**")
316
- st.dataframe(df.head())
317
- st.write(f"**Shape:** {df.shape}")
318
- st.write(f"**Columns:** {list(df.columns)}")
319
- st.write(f"**Data Types:**")
320
- st.write(df.dtypes)
321
 
322
- # Store in session state
323
- st.session_state.train_df = df
 
 
324
 
325
- else:
326
- st.error("โŒ Failed to load file. Check the debug log for details.")
 
 
327
 
328
- # Additional troubleshooting
329
- st.write("### ๐Ÿ”ง Troubleshooting Steps:")
330
- st.write("1. Check if your file is a valid CSV")
331
- st.write("2. Try saving your CSV with different encoding (UTF-8 recommended)")
332
- st.write("3. Check if file size is within limits")
333
- st.write("4. Ensure no special characters in filename")
334
- st.write("5. Try uploading from a different location")
335
 
336
- # Other sections (simplified for debugging)
337
  elif section == "Data Analysis":
338
- st.subheader("๐Ÿ“Š Data Analysis")
339
-
340
  if st.session_state.train_df is not None:
341
  df = st.session_state.train_df
342
- st.write("Using loaded data from debug session:")
343
- st.dataframe(df.head())
344
-
345
- # Basic analysis without custom modules if they fail
346
- st.write(f"**Shape:** {df.shape}")
347
- st.write(f"**Columns:** {list(df.columns)}")
348
- st.write(f"**Missing values:**")
349
- st.write(df.isnull().sum())
350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  else:
352
- st.warning("No data loaded. Please use 'File Upload Debug' section first.")
353
 
 
354
  elif section == "Train Model":
355
- st.subheader("๐Ÿค– Train Model")
356
- st.info("Use this section after successfully loading data in debug mode.")
357
-
358
  if st.session_state.train_df is not None:
359
- st.success("Data available for training!")
360
- # Add your training logic here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  else:
362
- st.warning("No data loaded. Please use 'File Upload Debug' section first.")
363
 
 
364
  elif section == "Predictions":
365
- st.subheader("๐Ÿ”ฎ Predictions")
366
- st.info("Use this section after training a model.")
367
 
368
- # Check for trained models
369
- if os.path.exists("models"):
370
- models = [f for f in os.listdir("models") if f.endswith('.pkl')]
371
- if models:
372
- st.write(f"Available models: {models}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  else:
374
- st.info("No trained models found.")
375
  else:
376
- st.info("Models directory not found.")
377
 
378
- # Debug summary
379
- if debug_mode:
380
- st.sidebar.markdown("---")
381
- st.sidebar.subheader("๐Ÿ“‹ Debug Summary")
382
- if st.session_state.train_df is not None:
383
- st.sidebar.success("โœ… Data loaded successfully")
384
- else:
385
- st.sidebar.warning("โš ๏ธ No data loaded")
 
7
  import io
8
  import traceback
9
  import sys
10
+ import base64
11
  from datetime import datetime
12
 
13
  # Import ML libraries with error handling
 
30
  st.info("Please ensure NoCodeTextClassifier package is installed")
31
 
32
  # Set page config
33
+ st.set_page_config(page_title="Fixed Text Classification", page_icon="๐Ÿ”ง", layout="wide")
34
 
35
  # Debug section
36
  st.sidebar.header("๐Ÿ” Debug Information")
 
42
  timestamp = datetime.now().strftime("%H:%M:%S")
43
  st.sidebar.write(f"**{timestamp} [{level}]:** {message}")
44
 
45
+ # Alternative file upload methods
46
+ def alternative_file_upload():
47
+ """Alternative file upload methods to bypass 403 error"""
48
+ st.subheader("๐Ÿ”ง Alternative File Upload Methods")
 
49
 
50
+ # Method 1: Text area paste
51
+ st.markdown("### Method 1: Copy-Paste CSV Content")
52
+ st.info("Copy your CSV content and paste it in the text area below")
 
 
 
 
 
 
53
 
54
+ csv_content = st.text_area(
55
+ "Paste your CSV content here:",
56
+ height=200,
57
+ placeholder="name,age,city\nJohn,25,New York\nJane,30,London"
58
+ )
59
+
60
+ if csv_content and st.button("Load from Text Area", type="primary"):
61
+ try:
62
+ df = pd.read_csv(io.StringIO(csv_content))
63
+ st.success("โœ… CSV loaded from text area!")
64
+ return df, "text_area"
65
+ except Exception as e:
66
+ st.error(f"Error parsing CSV: {e}")
67
+ return None, None
68
+
69
+ # Method 2: Base64 upload (for advanced users)
70
+ st.markdown("### Method 2: Base64 Upload")
71
+ with st.expander("For Advanced Users - Base64 Upload"):
72
+ st.info("Convert your CSV to base64 and paste here")
73
+ st.code("""
74
+ # Python code to convert CSV to base64:
75
+ import base64
76
+ with open('your_file.csv', 'rb') as f:
77
+ encoded = base64.b64encode(f.read()).decode()
78
+ print(encoded)
79
+ """)
80
 
81
+ base64_content = st.text_area("Paste base64 encoded CSV:", height=100)
82
 
83
+ if base64_content and st.button("Load from Base64"):
84
+ try:
85
+ decoded = base64.b64decode(base64_content)
86
+ df = pd.read_csv(io.BytesIO(decoded))
87
+ st.success("โœ… CSV loaded from base64!")
88
+ return df, "base64"
89
+ except Exception as e:
90
+ st.error(f"Error decoding base64: {e}")
91
+ return None, None
92
+
93
+ # Method 3: Sample data
94
+ st.markdown("### Method 3: Use Sample Data")
95
+ if st.button("Load Sample Text Classification Data"):
96
+ # Create sample data
97
+ sample_data = {
98
+ 'text': [
99
+ 'I love this product, it works great!',
100
+ 'This is terrible, waste of money',
101
+ 'Good quality and fast delivery',
102
+ 'Not satisfied with the purchase',
103
+ 'Excellent service and support',
104
+ 'Poor quality, arrived damaged',
105
+ 'Amazing product, highly recommend',
106
+ 'Disappointed with the results'
107
+ ],
108
+ 'label': ['positive', 'negative', 'positive', 'negative',
109
+ 'positive', 'negative', 'positive', 'negative']
110
+ }
111
+ df = pd.DataFrame(sample_data)
112
+ st.success("โœ… Sample data loaded!")
113
+ return df, "sample"
114
+
115
+ return None, None
116
 
117
+ def safe_file_uploader_with_fallback():
118
+ """Try normal upload first, then fallback methods"""
119
+ st.markdown("### ๐Ÿ“ Upload Your CSV File")
120
 
121
+ # Try standard uploader first
122
+ uploaded_file = st.file_uploader(
123
+ "Choose a CSV file",
124
+ type=['csv'],
125
+ help="If upload fails with 403 error, use alternative methods below"
126
+ )
127
 
128
+ if uploaded_file is not None:
129
+ try:
130
+ debug_log("๐Ÿ“ File uploaded successfully via standard method")
131
+ df = pd.read_csv(uploaded_file)
132
+ st.success("โœ… File uploaded successfully!")
133
+ return df, "standard"
134
+ except Exception as e:
135
+ st.error(f"Error reading uploaded file: {e}")
136
+ debug_log(f"โŒ Standard upload failed: {e}", "ERROR")
137
 
138
+ # If standard upload fails or no file uploaded, show alternatives
139
+ st.markdown("---")
140
+ st.markdown("### ๐Ÿ”„ Alternative Upload Methods")
141
+ st.warning("If you're getting a 403 error, try one of these alternative methods:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
+ return alternative_file_upload()
 
 
144
 
145
+ # Utility functions (same as before but with debug)
146
  def save_artifacts(obj, folder_name, file_name):
147
  """Save artifacts with debugging"""
148
  debug_log(f"๐Ÿ’พ Saving {file_name} to {folder_name}")
 
157
  return True
158
 
159
  except Exception as e:
160
+ debug_log(f"โŒ Error saving {file_name}: {str(e)}", "ERROR")
161
+ st.error(f"Save error: {str(e)}")
 
162
  return False
163
 
164
  def load_artifacts(folder_name, file_name):
 
178
  return obj
179
 
180
  except Exception as e:
181
+ debug_log(f"โŒ Error loading {file_name}: {str(e)}", "ERROR")
 
 
182
  return None
183
 
184
  def load_model(model_name):
 
208
  debug_log("๐Ÿงน Cleaning text...")
209
  text_cleaner = TextCleaner()
210
  clean_text = text_cleaner.clean_text(text)
 
211
 
212
  debug_log("๐Ÿ”ข Vectorizing text...")
213
  text_vector = vectorizer.transform([clean_text])
 
214
 
215
  debug_log("๐ŸŽฏ Making prediction...")
216
  prediction = model.predict(text_vector)
 
219
  if hasattr(model, 'predict_proba'):
220
  try:
221
  prediction_proba = model.predict_proba(text_vector)[0]
 
222
  except:
223
  debug_log("No prediction probabilities available", "WARNING")
224
 
 
228
  return predicted_label, prediction_proba
229
 
230
  except Exception as e:
231
+ debug_log(f"โŒ Prediction error: {str(e)}", "ERROR")
232
+ st.error(f"Prediction error: {str(e)}")
 
 
 
233
  return None, None
234
 
235
  # Main App
236
+ st.title('๐Ÿ”ง Fixed Text Classification App')
237
+ st.write('Workaround version to bypass 403 upload errors')
238
 
239
+ # Show environment info in sidebar if debug mode
240
  if debug_mode:
241
  st.sidebar.subheader("๐Ÿ–ฅ๏ธ Environment Info")
242
  st.sidebar.write(f"Python version: {sys.version}")
243
  st.sidebar.write(f"Streamlit version: {st.__version__}")
244
+ st.sidebar.write(f"Current directory: {os.getcwd()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
+ # Navigation
247
+ section = st.sidebar.radio("Choose Section", [
248
+ "Upload Data", "Data Analysis", "Train Model", "Predictions"
249
+ ])
250
 
251
+ # Session state
 
 
252
  if 'train_df' not in st.session_state:
253
  st.session_state.train_df = None
254
+ if 'upload_method' not in st.session_state:
255
+ st.session_state.upload_method = None
256
+ if 'vectorizer_type' not in st.session_state:
257
+ st.session_state.vectorizer_type = "tfidf"
258
 
259
+ # Upload Data Section
260
+ if section == "Upload Data":
261
+ st.subheader("๐Ÿ“ Upload Your Dataset")
262
 
263
+ df, method = safe_file_uploader_with_fallback()
264
 
265
+ if df is not None:
266
+ st.session_state.train_df = df
267
+ st.session_state.upload_method = method
268
+
269
+ st.write("### ๐Ÿ“Š Data Preview")
270
+ st.dataframe(df.head())
271
 
272
+ st.write("### ๐Ÿ“ˆ Basic Info")
273
+ col1, col2, col3 = st.columns(3)
274
+ with col1:
275
+ st.metric("Rows", df.shape[0])
276
+ with col2:
277
+ st.metric("Columns", df.shape[1])
278
+ with col3:
279
+ st.metric("Missing Values", df.isnull().sum().sum())
280
 
281
+ st.write("### ๐Ÿท๏ธ Select Columns")
282
+ columns = df.columns.tolist()
283
 
284
+ col1, col2 = st.columns(2)
285
+ with col1:
286
+ text_column = st.selectbox("Select text column:", columns)
287
+ with col2:
288
+ target_column = st.selectbox("Select target/label column:", columns)
289
 
290
+ if text_column and target_column:
291
+ st.session_state.text_column = text_column
292
+ st.session_state.target_column = target_column
 
 
 
 
 
293
 
294
+ # Show sample data
295
+ st.write("### ๐Ÿ“ Sample Data")
296
+ sample_df = df[[text_column, target_column]].head()
297
+ st.dataframe(sample_df)
298
 
299
+ # Show target distribution
300
+ st.write("### ๐ŸŽฏ Target Distribution")
301
+ target_counts = df[target_column].value_counts()
302
+ st.bar_chart(target_counts)
303
 
304
+ st.success("โœ… Data ready for processing!")
 
 
 
 
 
 
305
 
306
+ # Data Analysis Section
307
  elif section == "Data Analysis":
 
 
308
  if st.session_state.train_df is not None:
309
  df = st.session_state.train_df
310
+ text_col = st.session_state.get('text_column')
311
+ target_col = st.session_state.get('target_column')
 
 
 
 
 
 
312
 
313
+ if text_col and target_col:
314
+ st.subheader("๐Ÿ“Š Data Analysis")
315
+
316
+ try:
317
+ # Process data using custom classes
318
+ info = Informations(df, text_col, target_col)
319
+ df['clean_text'] = info.clean_text()
320
+ df['text_length'] = info.text_length()
321
+
322
+ # Update session state
323
+ st.session_state.train_df = df
324
+
325
+ # Show analysis
326
+ st.write("**Data Shape:**", info.shape())
327
+ st.write("**Class Distribution:**", info.class_imbalanced())
328
+ st.write("**Missing Values:**", info.missing_values())
329
+
330
+ # Text length analysis
331
+ st.write("**Text Length Analysis:**")
332
+ st.write(info.analysis_text_length('text_length'))
333
+
334
+ # Visualizations
335
+ vis = Visualizations(df, text_col, target_col)
336
+
337
+ col1, col2 = st.columns(2)
338
+ with col1:
339
+ st.write("**Class Distribution:**")
340
+ vis.class_distribution()
341
+
342
+ with col2:
343
+ st.write("**Text Length Distribution:**")
344
+ vis.text_length_distribution()
345
+
346
+ except Exception as e:
347
+ st.error(f"Error in analysis: {e}")
348
+ debug_log(f"Analysis error: {e}", "ERROR")
349
+ else:
350
+ st.warning("Please select text and target columns in the Upload Data section.")
351
  else:
352
+ st.warning("Please upload data first.")
353
 
354
+ # Train Model Section
355
  elif section == "Train Model":
 
 
 
356
  if st.session_state.train_df is not None:
357
+ df = st.session_state.train_df
358
+ text_col = st.session_state.get('text_column')
359
+ target_col = st.session_state.get('target_column')
360
+
361
+ if text_col and target_col and 'clean_text' in df.columns:
362
+ st.subheader("๐Ÿค– Train Model")
363
+
364
+ col1, col2 = st.columns(2)
365
+
366
+ with col1:
367
+ model_choice = st.selectbox("Choose Model:", [
368
+ "Logistic Regression", "Decision Tree", "Random Forest",
369
+ "Linear SVC", "SVC", "Multinomial Naive Bayes"
370
+ ])
371
+
372
+ with col2:
373
+ vectorizer_choice = st.selectbox("Choose Vectorizer:",
374
+ ["Tfidf Vectorizer", "Count Vectorizer"])
375
+
376
+ if st.button("๐Ÿš€ Train Model", type="primary"):
377
+ with st.spinner("Training model..."):
378
+ try:
379
+ # Prepare data
380
+ if vectorizer_choice == "Tfidf Vectorizer":
381
+ vectorizer = TfidfVectorizer(max_features=10000)
382
+ st.session_state.vectorizer_type = "tfidf"
383
+ else:
384
+ vectorizer = CountVectorizer(max_features=10000)
385
+ st.session_state.vectorizer_type = "count"
386
+
387
+ # Label encoding
388
+ label_encoder = LabelEncoder()
389
+ y = label_encoder.fit_transform(df[target_col])
390
+ X = vectorizer.fit_transform(df['clean_text'])
391
+
392
+ # Split data
393
+ X_train, X_test, y_train, y_test = process.split_data(X, y)
394
+
395
+ # Save artifacts
396
+ save_artifacts(vectorizer, "artifacts", f"{st.session_state.vectorizer_type}_vectorizer.pkl")
397
+ save_artifacts(label_encoder, "artifacts", "encoder.pkl")
398
+
399
+ # Train model
400
+ models = Models(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
401
+
402
+ if model_choice == "Logistic Regression":
403
+ models.LogisticRegression()
404
+ elif model_choice == "Decision Tree":
405
+ models.DecisionTree()
406
+ elif model_choice == "Random Forest":
407
+ models.RandomForestClassifier()
408
+ elif model_choice == "Linear SVC":
409
+ models.LinearSVC()
410
+ elif model_choice == "SVC":
411
+ models.SVC()
412
+ elif model_choice == "Multinomial Naive Bayes":
413
+ models.MultinomialNB()
414
+
415
+ st.success("๐ŸŽ‰ Model trained successfully!")
416
+
417
+ except Exception as e:
418
+ st.error(f"Training error: {e}")
419
+ debug_log(f"Training error: {e}", "ERROR")
420
+ else:
421
+ st.warning("Please complete data analysis first to process the text data.")
422
  else:
423
+ st.warning("Please upload data first.")
424
 
425
+ # Predictions Section
426
  elif section == "Predictions":
427
+ st.subheader("๐Ÿ”ฎ Make Predictions")
 
428
 
429
+ # Check for models
430
+ if os.path.exists("models") and os.listdir("models"):
431
+ available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
432
+
433
+ if available_models:
434
+ selected_model = st.selectbox("Choose trained model:", available_models)
435
+
436
+ # Single prediction
437
+ st.write("### Single Text Prediction")
438
+ text_input = st.text_area("Enter text to classify:", height=100)
439
+
440
+ if st.button("๐ŸŽฏ Predict") and text_input:
441
+ prediction, probabilities = predict_text(
442
+ selected_model,
443
+ text_input,
444
+ st.session_state.get('vectorizer_type', 'tfidf')
445
+ )
446
+
447
+ if prediction is not None:
448
+ st.success(f"**Prediction:** {prediction}")
449
+
450
+ if probabilities is not None:
451
+ encoder = load_artifacts("artifacts", "encoder.pkl")
452
+ if encoder is not None:
453
+ prob_df = pd.DataFrame({
454
+ 'Class': encoder.classes_,
455
+ 'Probability': probabilities
456
+ }).sort_values('Probability', ascending=False)
457
+
458
+ st.bar_chart(prob_df.set_index('Class'))
459
  else:
460
+ st.info("No trained models found. Train a model first.")
461
  else:
462
+ st.info("No models directory found. Train a model first.")
463
 
464
+ # Show upload method used in sidebar
465
+ if st.session_state.upload_method:
466
+ st.sidebar.success(f"โœ… Data loaded via: {st.session_state.upload_method}")