ProfessorLeVesseur commited on
Commit
215d8b5
·
verified ·
1 Parent(s): 907c533

Update data_processor.py

Browse files
Files changed (1) hide show
  1. data_processor.py +77 -76
data_processor.py CHANGED
@@ -289,83 +289,84 @@ class DataProcessor:
289
  else:
290
  return 'Unknown'
291
 
292
- def compute_student_metrics(self, df):
293
- intervention_df = df[df[self.INTERVENTION_COLUMN].str.strip().str.lower() == 'yes']
294
- intervention_sessions_held = len(intervention_df)
295
- student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
296
-
297
- student_metrics = {}
298
- for col in student_columns:
299
- student_name = col.replace('Student Attendance [', '').replace(']', '').strip()
300
- student_data = intervention_df[[col]].copy()
301
- student_data[col] = student_data[col].fillna('Absent')
302
-
303
- attendance_values = student_data[col].apply(lambda x: 1 if self.classify_engagement(x) in [
304
- self.ENGAGED_STR,
305
- self.PARTIALLY_ENGAGED_STR,
306
- self.NOT_ENGAGED_STR
307
- ] else 0)
308
-
309
- sessions_attended = attendance_values.sum()
310
- attendance_pct = (sessions_attended / intervention_sessions_held) * 100 if intervention_sessions_held > 0 else 0
311
- attendance_pct = round(attendance_pct)
312
-
313
- engagement_counts = {
314
- self.ENGAGED_STR: 0,
315
- self.PARTIALLY_ENGAGED_STR: 0,
316
- self.NOT_ENGAGED_STR: 0,
317
- 'Absent': 0
318
- }
319
-
320
- for x in student_data[col]:
321
- classified_engagement = self.classify_engagement(x)
322
- if classified_engagement in engagement_counts:
323
- engagement_counts[classified_engagement] += 1
324
- else:
325
- engagement_counts['Absent'] += 1 # Count as Absent if not engaged
326
-
327
- # Calculate percentages for engagement states
328
- total_sessions = sum(engagement_counts.values())
329
-
330
- # Engagement (%)
331
- engagement_pct = (engagement_counts[self.ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
332
- engagement_pct = round(engagement_pct)
333
-
334
- engaged_pct = (engagement_counts[self.ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
335
- engaged_pct = round(engaged_pct)
336
-
337
- partially_engaged_pct = (engagement_counts[self.PARTIALLY_ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
338
- partially_engaged_pct = round(partially_engaged_pct)
339
-
340
- not_engaged_pct = (engagement_counts[self.NOT_ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
341
- not_engaged_pct = round(not_engaged_pct)
342
-
343
- absent_pct = (engagement_counts['Absent'] / total_sessions * 100) if total_sessions > 0 else 0
344
- absent_pct = round(absent_pct)
345
-
346
- # Determine if the student attended ≥ 90% of sessions
347
- attended_90 = "Yes" if attendance_pct >= 90 else "No"
348
-
349
- # Determine if the student was engaged ≥ 80% of the time
350
- engaged_80 = "Yes" if engaged_pct >= 80 else "No"
351
-
352
- # Store metrics in the required order
353
- student_metrics[student_name] = {
354
- 'Attended ≥ 90%': attended_90,
355
- 'Engagement ≥ 80%': engaged_80,
356
- 'Attendance (%)': attendance_pct,
357
- 'Engagement (%)': engagement_pct,
358
- f'{self.ENGAGED_STR} (%)': engaged_pct,
359
- f'{self.PARTIALLY_ENGAGED_STR} (%)': partially_engaged_pct,
360
- f'{self.NOT_ENGAGED_STR} (%)': not_engaged_pct,
361
- 'Absent (%)': absent_pct
362
- }
363
-
364
- # Create a DataFrame from student_metrics
365
- student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index()
366
- student_metrics_df.rename(columns={'index': 'Student'}, inplace=True)
367
- return student_metrics_df
368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  def compute_average_metrics(self, student_metrics_df):
370
  # Calculate the attendance and engagement average percentages across students
371
  attendance_avg_stats = student_metrics_df['Attendance (%)'].mean() # Calculate the average attendance percentage
 
289
  else:
290
  return 'Unknown'
291
 
292
+ def compute_student_metrics(self, df):
293
+ intervention_column = self.find_intervention_column(df)
294
+ intervention_df = df[df[intervention_column].str.strip().str.lower() == 'yes']
295
+ intervention_sessions_held = len(intervention_df)
296
+ student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
+ student_metrics = {}
299
+ for col in student_columns:
300
+ student_name = col.replace('Student Attendance [', '').replace(']', '').strip()
301
+ student_data = intervention_df[[col]].copy()
302
+ student_data[col] = student_data[col].fillna('Absent')
303
+
304
+ attendance_values = student_data[col].apply(lambda x: 1 if self.classify_engagement(x) in [
305
+ self.ENGAGED_STR,
306
+ self.PARTIALLY_ENGAGED_STR,
307
+ self.NOT_ENGAGED_STR
308
+ ] else 0)
309
+
310
+ sessions_attended = attendance_values.sum()
311
+ attendance_pct = (sessions_attended / intervention_sessions_held) * 100 if intervention_sessions_held > 0 else 0
312
+ attendance_pct = round(attendance_pct)
313
+
314
+ engagement_counts = {
315
+ self.ENGAGED_STR: 0,
316
+ self.PARTIALLY_ENGAGED_STR: 0,
317
+ self.NOT_ENGAGED_STR: 0,
318
+ 'Absent': 0
319
+ }
320
+
321
+ for x in student_data[col]:
322
+ classified_engagement = self.classify_engagement(x)
323
+ if classified_engagement in engagement_counts:
324
+ engagement_counts[classified_engagement] += 1
325
+ else:
326
+ engagement_counts['Absent'] += 1 # Count as Absent if not engaged
327
+
328
+ # Calculate percentages for engagement states
329
+ total_sessions = sum(engagement_counts.values())
330
+
331
+ # Engagement (%)
332
+ engagement_pct = (engagement_counts[self.ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
333
+ engagement_pct = round(engagement_pct)
334
+
335
+ engaged_pct = (engagement_counts[self.ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
336
+ engaged_pct = round(engaged_pct)
337
+
338
+ partially_engaged_pct = (engagement_counts[self.PARTIALLY_ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
339
+ partially_engaged_pct = round(partially_engaged_pct)
340
+
341
+ not_engaged_pct = (engagement_counts[self.NOT_ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
342
+ not_engaged_pct = round(not_engaged_pct)
343
+
344
+ absent_pct = (engagement_counts['Absent'] / total_sessions * 100) if total_sessions > 0 else 0
345
+ absent_pct = round(absent_pct)
346
+
347
+ # Determine if the student attended ≥ 90% of sessions
348
+ attended_90 = "Yes" if attendance_pct >= 90 else "No"
349
+
350
+ # Determine if the student was engaged ≥ 80% of the time
351
+ engaged_80 = "Yes" if engaged_pct >= 80 else "No"
352
+
353
+ # Store metrics in the required order
354
+ student_metrics[student_name] = {
355
+ 'Attended ≥ 90%': attended_90,
356
+ 'Engagement ≥ 80%': engaged_80,
357
+ 'Attendance (%)': attendance_pct,
358
+ 'Engagement (%)': engagement_pct,
359
+ f'{self.ENGAGED_STR} (%)': engaged_pct,
360
+ f'{self.PARTIALLY_ENGAGED_STR} (%)': partially_engaged_pct,
361
+ f'{self.NOT_ENGAGED_STR} (%)': not_engaged_pct,
362
+ 'Absent (%)': absent_pct
363
+ }
364
+
365
+ # Create a DataFrame from student_metrics
366
+ student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index()
367
+ student_metrics_df.rename(columns={'index': 'Student'}, inplace=True)
368
+ return student_metrics_df
369
+
370
  def compute_average_metrics(self, student_metrics_df):
371
  # Calculate the attendance and engagement average percentages across students
372
  attendance_avg_stats = student_metrics_df['Attendance (%)'].mean() # Calculate the average attendance percentage