ProfessorLeVesseur commited on
Commit
bab30c5
·
verified ·
1 Parent(s): 215d8b5

Update data_processor.py

Browse files
Files changed (1) hide show
  1. data_processor.py +76 -82
data_processor.py CHANGED
@@ -182,8 +182,6 @@
182
 
183
 
184
 
185
-
186
-
187
  import re
188
  import pandas as pd
189
  import os
@@ -210,7 +208,6 @@ class DataProcessor:
210
  return pd.read_excel(uploaded_file)
211
 
212
  def format_session_data(self, df):
213
- # Look for "Date of Session" or "Date" column
214
  date_column = next((col for col in df.columns if col in ["Date of Session", "Date"]), None)
215
  if date_column:
216
  df[date_column] = pd.to_datetime(df[date_column], errors='coerce').dt.date
@@ -276,10 +273,9 @@ class DataProcessor:
276
  'Intervention Sessions Not Held': [total_days - sessions_held],
277
  'Total Number of Days Available': [total_days]
278
  })
279
-
280
 
281
  def classify_engagement(self, engagement_str):
282
- engagement_str = engagement_str.lower()
283
  if engagement_str.startswith(self.ENGAGED_STR.lower()):
284
  return self.ENGAGED_STR
285
  elif engagement_str.startswith(self.PARTIALLY_ENGAGED_STR.lower()):
@@ -289,84 +285,82 @@ class DataProcessor:
289
  else:
290
  return 'Unknown'
291
 
292
- def compute_student_metrics(self, df):
293
- intervention_column = self.find_intervention_column(df)
294
- intervention_df = df[df[intervention_column].str.strip().str.lower() == 'yes']
295
- intervention_sessions_held = len(intervention_df)
296
- student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
297
-
298
- student_metrics = {}
299
- for col in student_columns:
300
- student_name = col.replace('Student Attendance [', '').replace(']', '').strip()
301
- student_data = intervention_df[[col]].copy()
302
- student_data[col] = student_data[col].fillna('Absent')
303
-
304
- attendance_values = student_data[col].apply(lambda x: 1 if self.classify_engagement(x) in [
305
- self.ENGAGED_STR,
306
- self.PARTIALLY_ENGAGED_STR,
307
- self.NOT_ENGAGED_STR
308
- ] else 0)
309
-
310
- sessions_attended = attendance_values.sum()
311
- attendance_pct = (sessions_attended / intervention_sessions_held) * 100 if intervention_sessions_held > 0 else 0
312
- attendance_pct = round(attendance_pct)
313
-
314
- engagement_counts = {
315
- self.ENGAGED_STR: 0,
316
- self.PARTIALLY_ENGAGED_STR: 0,
317
- self.NOT_ENGAGED_STR: 0,
318
- 'Absent': 0
319
- }
320
-
321
- for x in student_data[col]:
322
- classified_engagement = self.classify_engagement(x)
323
- if classified_engagement in engagement_counts:
324
- engagement_counts[classified_engagement] += 1
325
- else:
326
- engagement_counts['Absent'] += 1 # Count as Absent if not engaged
327
-
328
- # Calculate percentages for engagement states
329
- total_sessions = sum(engagement_counts.values())
330
-
331
- # Engagement (%)
332
- engagement_pct = (engagement_counts[self.ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
333
- engagement_pct = round(engagement_pct)
334
-
335
- engaged_pct = (engagement_counts[self.ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
336
- engaged_pct = round(engaged_pct)
337
-
338
- partially_engaged_pct = (engagement_counts[self.PARTIALLY_ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
339
- partially_engaged_pct = round(partially_engaged_pct)
340
-
341
- not_engaged_pct = (engagement_counts[self.NOT_ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
342
- not_engaged_pct = round(not_engaged_pct)
343
-
344
- absent_pct = (engagement_counts['Absent'] / total_sessions * 100) if total_sessions > 0 else 0
345
- absent_pct = round(absent_pct)
346
-
347
- # Determine if the student attended ≥ 90% of sessions
348
- attended_90 = "Yes" if attendance_pct >= 90 else "No"
349
-
350
- # Determine if the student was engaged ≥ 80% of the time
351
- engaged_80 = "Yes" if engaged_pct >= 80 else "No"
352
-
353
- # Store metrics in the required order
354
- student_metrics[student_name] = {
355
- 'Attended ≥ 90%': attended_90,
356
- 'Engagement ≥ 80%': engaged_80,
357
- 'Attendance (%)': attendance_pct,
358
- 'Engagement (%)': engagement_pct,
359
- f'{self.ENGAGED_STR} (%)': engaged_pct,
360
- f'{self.PARTIALLY_ENGAGED_STR} (%)': partially_engaged_pct,
361
- f'{self.NOT_ENGAGED_STR} (%)': not_engaged_pct,
362
- 'Absent (%)': absent_pct
363
- }
364
-
365
- # Create a DataFrame from student_metrics
366
- student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index()
367
- student_metrics_df.rename(columns={'index': 'Student'}, inplace=True)
368
- return student_metrics_df
369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  def compute_average_metrics(self, student_metrics_df):
371
  # Calculate the attendance and engagement average percentages across students
372
  attendance_avg_stats = student_metrics_df['Attendance (%)'].mean() # Calculate the average attendance percentage
 
182
 
183
 
184
 
 
 
185
  import re
186
  import pandas as pd
187
  import os
 
208
  return pd.read_excel(uploaded_file)
209
 
210
  def format_session_data(self, df):
 
211
  date_column = next((col for col in df.columns if col in ["Date of Session", "Date"]), None)
212
  if date_column:
213
  df[date_column] = pd.to_datetime(df[date_column], errors='coerce').dt.date
 
273
  'Intervention Sessions Not Held': [total_days - sessions_held],
274
  'Total Number of Days Available': [total_days]
275
  })
 
276
 
277
  def classify_engagement(self, engagement_str):
278
+ engagement_str = str(engagement_str).lower()
279
  if engagement_str.startswith(self.ENGAGED_STR.lower()):
280
  return self.ENGAGED_STR
281
  elif engagement_str.startswith(self.PARTIALLY_ENGAGED_STR.lower()):
 
285
  else:
286
  return 'Unknown'
287
 
288
+ def compute_student_metrics(self, df):
289
+ intervention_column = self.find_intervention_column(df)
290
+ intervention_df = df[df[intervention_column].str.strip().str.lower() == 'yes']
291
+ intervention_sessions_held = len(intervention_df)
292
+ student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
293
+
294
+ student_metrics = {}
295
+ for col in student_columns:
296
+ student_name = col.replace('Student Attendance [', '').replace(']', '').strip()
297
+ student_data = intervention_df[[col]].copy()
298
+ student_data[col] = student_data[col].fillna('Absent')
299
+
300
+ attendance_values = student_data[col].apply(lambda x: 1 if self.classify_engagement(x) in [
301
+ self.ENGAGED_STR,
302
+ self.PARTIALLY_ENGAGED_STR,
303
+ self.NOT_ENGAGED_STR
304
+ ] else 0)
305
+
306
+ sessions_attended = attendance_values.sum()
307
+ attendance_pct = (sessions_attended / intervention_sessions_held) * 100 if intervention_sessions_held > 0 else 0
308
+ attendance_pct = round(attendance_pct)
309
+
310
+ engagement_counts = {
311
+ self.ENGAGED_STR: 0,
312
+ self.PARTIALLY_ENGAGED_STR: 0,
313
+ self.NOT_ENGAGED_STR: 0,
314
+ 'Absent': 0
315
+ }
316
+
317
+ for x in student_data[col]:
318
+ classified_engagement = self.classify_engagement(x)
319
+ if classified_engagement in engagement_counts:
320
+ engagement_counts[classified_engagement] += 1
321
+ else:
322
+ engagement_counts['Absent'] += 1 # Count as Absent if not engaged
323
+
324
+ total_sessions = sum(engagement_counts.values())
325
+
326
+ engagement_pct = (engagement_counts[self.ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
327
+ engagement_pct = round(engagement_pct)
328
+
329
+ engaged_pct = (engagement_counts[self.ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
330
+ engaged_pct = round(engaged_pct)
331
+
332
+ partially_engaged_pct = (engagement_counts[self.PARTIALLY_ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
333
+ partially_engaged_pct = round(partially_engaged_pct)
334
+
335
+ not_engaged_pct = (engagement_counts[self.NOT_ENGAGED_STR] / total_sessions * 100) if total_sessions > 0 else 0
336
+ not_engaged_pct = round(not_engaged_pct)
337
+
338
+ absent_pct = (engagement_counts['Absent'] / total_sessions * 100) if total_sessions > 0 else 0
339
+ absent_pct = round(absent_pct)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
+ # Determine if the student attended ≥ 90% of sessions
342
+ attended_90 = "Yes" if attendance_pct >= 90 else "No"
343
+
344
+ # Determine if the student was engaged ≥ 80% of the time
345
+ engaged_80 = "Yes" if engaged_pct >= 80 else "No"
346
+
347
+ # Store metrics in the required order
348
+ student_metrics[student_name] = {
349
+ 'Attended ≥ 90%': attended_90,
350
+ 'Engagement ≥ 80%': engaged_80,
351
+ 'Attendance (%)': attendance_pct,
352
+ 'Engagement (%)': engagement_pct,
353
+ f'{self.ENGAGED_STR} (%)': engaged_pct,
354
+ f'{self.PARTIALLY_ENGAGED_STR} (%)': partially_engaged_pct,
355
+ f'{self.NOT_ENGAGED_STR} (%)': not_engaged_pct,
356
+ 'Absent (%)': absent_pct
357
+ }
358
+
359
+ # Create a DataFrame from student_metrics
360
+ student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index()
361
+ student_metrics_df.rename(columns={'index': 'Student'}, inplace=True)
362
+ return student_metrics_df
363
+
364
  def compute_average_metrics(self, student_metrics_df):
365
  # Calculate the attendance and engagement average percentages across students
366
  attendance_avg_stats = student_metrics_df['Attendance (%)'].mean() # Calculate the average attendance percentage