Rakshitjan's picture
Update main.py
e398fbd verified
raw
history blame contribute delete
26.6 kB
# # Import necessary libraries
# from fastapi import FastAPI, HTTPException
# from pydantic import BaseModel
# import gspread
# from google.oauth2.service_account import Credentials
# import pandas as pd
# from collections import defaultdict
# import os
# # Initialize the FastAPI app
# app = FastAPI()
# # Step 1: Define a function to get Google Sheets API credentials
# def get_credentials():
# """Get Google Sheets API credentials from environment variables."""
# try:
# # Construct the service account info dictionary
# service_account_info = {
# "type": os.getenv("SERVICE_ACCOUNT_TYPE"),
# "project_id": os.getenv("PROJECT_ID"),
# "private_key_id": os.getenv("PRIVATE_KEY_ID"),
# "private_key": os.getenv("PRIVATE_KEY").replace('\\n', '\n'),
# "client_email": os.getenv("CLIENT_EMAIL"),
# "client_id": os.getenv("CLIENT_ID"),
# "auth_uri": os.getenv("AUTH_URI"),
# "token_uri": os.getenv("TOKEN_URI"),
# "auth_provider_x509_cert_url": os.getenv("AUTH_PROVIDER_X509_CERT_URL"),
# "client_x509_cert_url": os.getenv("CLIENT_X509_CERT_URL"),
# "universe_domain": os.getenv("UNIVERSE_DOMAIN")
# }
# scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
# creds = Credentials.from_service_account_info(service_account_info, scopes=scope)
# return creds
# except Exception as e:
# print(f"Error getting credentials: {e}")
# return None
# # Step 2: Authorize gspread using the credentials
# creds = get_credentials()
# client = gspread.authorize(creds)
# # Input the paths and coaching code
# journal_file_path = ''
# panic_button_file_path = ''
# test_file_path = ''
# coachingCode = '1919'
# if coachingCode == '1919':
# journal_file_path = 'https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?usp=drive_link'
# panic_button_file_path = 'https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?usp=drive_link'
# test_file_path = 'https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?usp=drive_link'
# # Step 3: Open Google Sheets using the URLs
# journal_file = client.open_by_url(journal_file_path).worksheet('Sheet1')
# panic_button_file = client.open_by_url(panic_button_file_path).worksheet('Sheet1') # Fixed missing part
# test_file = client.open_by_url(test_file_path).worksheet('Sheet1')
# # Step 4: Convert the sheets into Pandas DataFrames
# journal_df = pd.DataFrame(journal_file.get_all_values())
# panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
# test_df = pd.DataFrame(test_file.get_all_values())
# # Label the columns manually since there are no headers
# journal_df.columns = ['user_id', 'productivity_yes_no', 'productivity_rate']
# panic_button_df.columns = ['user_id', 'panic_button']
# # Initialize a list for the merged data
# merged_data = []
# # Step 5: Group panic buttons by user_id and combine into a single comma-separated string
# panic_button_grouped = panic_button_df.groupby('user_id')['panic_button'].apply(lambda x: ','.join(x)).reset_index()
# # Merge journal and panic button data
# merged_journal_panic = pd.merge(journal_df, panic_button_grouped, on='user_id', how='outer')
# # Step 6: Process the test data
# test_data = []
# for index, row in test_df.iterrows():
# user_id = row[0]
# i = 1
# while i < len(row) and pd.notna(row[i]): # Process chapter and score pairs
# chapter = row[i].lower().strip()
# score = row[i + 1]
# if pd.notna(score):
# test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
# i += 2
# # Convert the processed test data into a DataFrame
# test_df_processed = pd.DataFrame(test_data)
# # Step 7: Merge the journal+panic button data with the test data
# merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
# # Step 8: Drop rows where all data (except user_id and test_chapter) is missing
# merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
# # Group the merged DataFrame by user_id
# df = pd.DataFrame(merged_data_cleaned)
# # Function to process panic button counts and test scores
# def process_group(group):
# # Panic button counts
# panic_button_series = group['panic_button'].dropna()
# panic_button_dict = panic_button_series.value_counts().to_dict()
# # Test scores aggregation
# test_scores = group[['test_chapter', 'test_score']].dropna()
# test_scores['test_score'] = pd.to_numeric(test_scores['test_score'], errors='coerce')
# # Create the test_scores_dict excluding NaN values
# test_scores_dict = test_scores.groupby('test_chapter')['test_score'].mean().dropna().to_dict()
# return pd.Series({
# 'productivity_yes_no': group['productivity_yes_no'].iloc[0],
# 'productivity_rate': group['productivity_rate'].iloc[0],
# 'panic_button': panic_button_dict,
# 'test_scores': test_scores_dict
# })
# # Apply the group processing function
# merged_df = df.groupby('user_id').apply(process_group).reset_index()
# # Step 9: Calculate potential score
# # Panic button weightages
# academic_weights = {'BACKLOGS': -5, 'MISSED CLASSES': -4, 'NOT UNDERSTANDING': -3, 'BAD MARKS': -3, 'LACK OF MOTIVATION': -3}
# non_academic_weights = {'EMOTIONAL FACTORS': -3, 'PROCRASTINATE': -2, 'LOST INTEREST': -4, 'LACK OF FOCUS': -2, 'GOALS NOT ACHIEVED': -2, 'LACK OF DISCIPLINE': -2}
# # Max weighted panic score
# max_weighted_panic_score = sum([max(academic_weights.values()) * 3, max(non_academic_weights.values()) * 3])
# # Function to calculate potential score
# def calculate_potential_score(row):
# # Test score normalization (70% weightage)
# if row['test_scores']: # Check if test_scores is not empty
# avg_test_score = sum(row['test_scores'].values()) / len(row['test_scores'])
# test_score_normalized = (avg_test_score / 40) * 70 # Scale test score to 70
# else:
# test_score_normalized = 0 # Default value for users with no test scores
# # Panic score calculation (20% weightage)
# student_panic_score = 0
# if row['panic_button']: # Ensure panic_button is not NaN or empty
# for factor, count in row['panic_button'].items():
# if factor in academic_weights:
# student_panic_score += academic_weights[factor] * count
# elif factor in non_academic_weights:
# student_panic_score += non_academic_weights[factor] * count
# else:
# student_panic_score = 0 # Default if no panic button issues
# # Panic score normalized to 20
# panic_score = 20 * (1 - (student_panic_score / max_weighted_panic_score) if max_weighted_panic_score != 0 else 1)
# # Journal score calculation (10% weightage)
# if pd.notna(row['productivity_yes_no']) and row['productivity_yes_no'] == 'Yes':
# if pd.notna(row['productivity_rate']):
# journal_score = (float(row['productivity_rate']) / 10) * 10 # Scale journal score to 10
# else:
# journal_score = 0 # Default if productivity_rate is missing
# elif pd.notna(row['productivity_yes_no']) and row['productivity_yes_no'] == 'No':
# if pd.notna(row['productivity_rate']):
# journal_score = (float(row['productivity_rate']) / 10) * 5 # Scale journal score to 5 if "No"
# else:
# journal_score = 0 # Default if productivity_rate is missing
# else:
# journal_score = 0 # Default if productivity_yes_no is missing
# # Total score based on new weightages
# total_potential_score = test_score_normalized + panic_score + journal_score
# return total_potential_score
# # Apply potential score calculation to the dataframe
# merged_df['potential_score'] = merged_df.apply(calculate_potential_score, axis=1)
# merged_df['potential_score'] = merged_df['potential_score'].round(2)
# # Step 10: Sort by potential score
# sorted_df = merged_df[['user_id', 'potential_score']].sort_values(by='potential_score', ascending=False)
# # Step 11: Define API endpoint to get the sorted potential scores
# @app.get("/sorted-potential-scores")
# async def get_sorted_potential_scores():
# try:
# result = sorted_df.to_dict(orient="records")
# return {"sorted_scores": result}
# except Exception as e:
# raise HTTPException(status_code=500, detail=str(e))
# Import necessary libraries
# from fastapi import FastAPI, HTTPException, Query
# from pydantic import BaseModel
# import gspread
# from google.oauth2.service_account import Credentials
# import pandas as pd
# from collections import defaultdict
# import os
# from fastapi.middleware.cors import CORSMiddleware
# # Initialize the FastAPI app
# app = FastAPI()
# app.add_middleware(
# CORSMiddleware,
# allow_origins=["*"], # You can specify domains instead of "*" to restrict access
# allow_credentials=True,
# allow_methods=["*"], # Allows all HTTP methods (POST, GET, OPTIONS, etc.)
# allow_headers=["*"], # Allows all headers
# )
# # Step 1: Define a function to get Google Sheets API credentials
# def get_credentials():
# """Get Google Sheets API credentials from environment variables."""
# try:
# # Construct the service account info dictionary
# service_account_info = {
# "type": os.getenv("SERVICE_ACCOUNT_TYPE"),
# "project_id": os.getenv("PROJECT_ID"),
# "private_key_id": os.getenv("PRIVATE_KEY_ID"),
# "private_key": os.getenv("PRIVATE_KEY").replace('\\n', '\n'),
# "client_email": os.getenv("CLIENT_EMAIL"),
# "client_id": os.getenv("CLIENT_ID"),
# "auth_uri": os.getenv("AUTH_URI"),
# "token_uri": os.getenv("TOKEN_URI"),
# "auth_provider_x509_cert_url": os.getenv("AUTH_PROVIDER_X509_CERT_URL"),
# "client_x509_cert_url": os.getenv("CLIENT_X509_CERT_URL"),
# "universe_domain": os.getenv("UNIVERSE_DOMAIN")
# }
# scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
# creds = Credentials.from_service_account_info(service_account_info, scopes=scope)
# return creds
# except Exception as e:
# print(f"Error getting credentials: {e}")
# return None
# # Step 2: Authorize gspread using the credentials
# creds = get_credentials()
# client = gspread.authorize(creds)
# # Function to get file paths based on coaching code
# def get_file_paths(coaching_code):
# if coaching_code == '1919':
# return {
# 'journal': 'https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?usp=drive_link',
# 'panic_button': 'https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?usp=drive_link',
# 'test': 'https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?usp=drive_link'
# }
# if coaching_code == '0946':
# return {
# 'journal': 'https://docs.google.com/spreadsheets/d/1c1TkL7sOUvFn6UPz3gwp135UVjOou9u1weohWzpmx6I/edit?usp=drive_link',
# 'panic_button': 'https://docs.google.com/spreadsheets/d/1RhbPQnNNBUthKKJyoW4q6x3uaWl1YSqmsFlfJ2THphE/edit?usp=drive_link',
# 'test': 'https://docs.google.com/spreadsheets/d/1JO5wDkfl2fr2ZQenI8OEu48jkWm48veYN1Fsw5Ctkzw/edit?usp=drive_link'
# }
# # Panic button weightages
# academic_weights = {'BACKLOGS': -5, 'MISSED CLASSES': -4, 'NOT UNDERSTANDING': -3, 'BAD MARKS': -3, 'LACK OF MOTIVATION': -3}
# non_academic_weights = {'EMOTIONAL FACTORS': -3, 'PROCRASTINATE': -2, 'LOST INTEREST': -4, 'LACK OF FOCUS': -2, 'GOALS NOT ACHIEVED': -2, 'LACK OF DISCIPLINE': -2}
# # Max weighted panic score
# max_weighted_panic_score = sum([max(academic_weights.values()) * 3, max(non_academic_weights.values()) * 3])
# # Function to calculate potential score
# def calculate_potential_score(row):
# # Test score normalization (70% weightage)
# if row['test_scores']: # Check if test_scores is not empty
# avg_test_score = sum(row['test_scores'].values()) / len(row['test_scores'])
# test_score_normalized = (avg_test_score / 40) * 70 # Scale test score to 70
# else:
# test_score_normalized = 0 # Default value for users with no test scores
# # Panic score calculation (20% weightage)
# student_panic_score = 0
# if row['panic_button']: # Ensure panic_button is not NaN or empty
# for factor, count in row['panic_button'].items():
# if factor in academic_weights:
# student_panic_score += academic_weights[factor] * count
# elif factor in non_academic_weights:
# student_panic_score += non_academic_weights[factor] * count
# else:
# student_panic_score = 0 # Default if no panic button issues
# # Panic score normalized to 20
# panic_score = 20 * (1 - (student_panic_score / max_weighted_panic_score) if max_weighted_panic_score != 0 else 1)
# # Journal score calculation (10% weightage)
# if pd.notna(row['productivity_yes_no']) and row['productivity_yes_no'] == 'Yes':
# if pd.notna(row['productivity_rate']):
# journal_score = (float(row['productivity_rate']) / 10) * 10 # Scale journal score to 10
# else:
# journal_score = 0 # Default if productivity_rate is missing
# elif pd.notna(row['productivity_yes_no']) and row['productivity_yes_no'] == 'No':
# if pd.notna(row['productivity_rate']):
# journal_score = (float(row['productivity_rate']) / 10) * 5 # Scale journal score to 5 if "No"
# else:
# journal_score = 0 # Default if productivity_rate is missing
# else:
# journal_score = 0 # Default if productivity_yes_no is missing
# # Total score based on new weightages
# total_potential_score = test_score_normalized + panic_score + journal_score
# return total_potential_score
# # Step 11: Define API endpoint to get the sorted potential scores
# @app.get("/sorted-potential-scores")
# async def get_sorted_potential_scores(coaching_code: str = Query(..., description="Coaching code to determine file paths")):
# try:
# file_paths = get_file_paths(coaching_code)
# if not file_paths:
# raise HTTPException(status_code=400, detail="Invalid coaching code")
# print("A");
# # Open Google Sheets using the URLs
# journal_file = client.open_by_url(file_paths['journal']).worksheet('Sheet1')
# panic_button_file = client.open_by_url(file_paths['panic_button']).worksheet('Sheet1')
# test_file = client.open_by_url(file_paths['test']).worksheet('Sheet1')
# print("B");
# # Convert the sheets into Pandas DataFrames
# journal_df = pd.DataFrame(journal_file.get_all_values())
# panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
# test_df = pd.DataFrame(test_file.get_all_values())
# print("C");
# # Label the columns manually since there are no headers
# journal_df.columns = ['user_id', 'productivity_yes_no', 'productivity_rate']
# panic_button_df.columns = ['user_id', 'panic_button']
# print("D")
# # Initialize a list for the merged data
# merged_data = []
# # Group panic buttons by user_id and combine into a single comma-separated string
# panic_button_grouped = panic_button_df.groupby('user_id')['panic_button'].apply(lambda x: ','.join(x)).reset_index()
# print("E")
# # Merge journal and panic button data
# merged_journal_panic = pd.merge(journal_df, panic_button_grouped, on='user_id', how='outer')
# print("F")
# # Process the test data
# test_data = []
# for index, row in test_df.iterrows():
# user_id = row[0]
# i = 1
# while i < len(row) and pd.notna(row[i]): # Process chapter and score pairs
# chapter = row[i].lower().strip()
# score = row[i + 1]
# if pd.notna(score):
# test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
# i += 2
# print("G")
# # Convert the processed test data into a DataFrame
# test_df_processed = pd.DataFrame(test_data)
# print("H")
# # Merge the journal+panic button data with the test data
# merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
# print("I")
# # Drop rows where all data (except user_id and test_chapter) is missing
# merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
# print("J")
# # Group the merged DataFrame by user_id
# df = pd.DataFrame(merged_data_cleaned)
# print("K")
# # Function to process panic button counts and test scores
# def process_group(group):
# # Panic button counts
# panic_button_series = group['panic_button'].dropna()
# panic_button_dict = panic_button_series.value_counts().to_dict()
# # Test scores aggregation
# test_scores = group[['test_chapter', 'test_score']].dropna()
# test_scores['test_score'] = pd.to_numeric(test_scores['test_score'], errors='coerce')
# # Create the test_scores_dict excluding NaN values
# test_scores_dict = test_scores.groupby('test_chapter')['test_score'].mean().dropna().to_dict()
# return pd.Series({
# 'productivity_yes_no': group['productivity_yes_no'].iloc[0],
# 'productivity_rate': group['productivity_rate'].iloc[0],
# 'panic_button': panic_button_dict,
# 'test_scores': test_scores_dict
# })
# # Apply the group processing function
# merged_df = df.groupby('user_id').apply(process_group).reset_index()
# print("L")
# # Calculate potential scores and sort
# merged_df['potential_score'] = merged_df.apply(calculate_potential_score, axis=1)
# merged_df['potential_score'] = merged_df['potential_score'].round(2)
# sorted_df = merged_df[['user_id', 'potential_score']].sort_values(by='potential_score', ascending=False)
# print("M")
# result = sorted_df.to_dict(orient="records")
# return {"sorted_scores": result}
# except Exception as e:
# raise HTTPException(status_code=500, detail=str(e))
from fastapi import FastAPI, HTTPException, Query
from pydantic import BaseModel
import gspread
from google.oauth2.service_account import Credentials
import pandas as pd
from collections import defaultdict
import os
from fastapi.middleware.cors import CORSMiddleware
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # You can specify domains instead of "*" to restrict access
allow_credentials=True,
allow_methods=["*"], # Allows all HTTP methods (POST, GET, OPTIONS, etc.)
allow_headers=["*"], # Allows all headers
)
# Model for request
class CoachingCodeRequest(BaseModel):
coachingCode: str
# Function to get credentials
def get_credentials():
"""Get Google Sheets API credentials from environment variables."""
try:
# Construct the service account info dictionary
service_account_info = {
"type": os.getenv("SERVICE_ACCOUNT_TYPE"),
"project_id": os.getenv("PROJECT_ID"),
"private_key_id": os.getenv("PRIVATE_KEY_ID"),
"private_key": os.getenv("PRIVATE_KEY").replace('\\n', '\n'),
"client_email": os.getenv("CLIENT_EMAIL"),
"client_id": os.getenv("CLIENT_ID"),
"auth_uri": os.getenv("AUTH_URI"),
"token_uri": os.getenv("TOKEN_URI"),
"auth_provider_x509_cert_url": os.getenv("AUTH_PROVIDER_X509_CERT_URL"),
"client_x509_cert_url": os.getenv("CLIENT_X509_CERT_URL"),
"universe_domain": os.getenv("UNIVERSE_DOMAIN")
}
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
creds = Credentials.from_service_account_info(service_account_info, scopes=scope)
return creds
except Exception as e:
print(f"Error getting credentials: {e}")
return None
# Select files based on coaching code
def select_files(coaching_code):
creds = get_credentials()
client = gspread.authorize(creds)
if coaching_code == "1919":
journal_file = client.open_by_url('https://docs.google.com/spreadsheets/d/1EFf2lr4A10nt4RhIqxCD_fxe-l3sXH09II0TEkMmvhA/edit?gid=0#gid=0').worksheet('Sheet1')
panic_button_file = client.open_by_url('https://docs.google.com/spreadsheets/d/1nFZGkCvRV6qS-mhsORhX3dxI0JSge32_UwWgWKl3eyw/edit?gid=0#gid=0').worksheet('Sheet1')
test_file = client.open_by_url('https://docs.google.com/spreadsheets/d/13PUHySUXWtKBusjugoe7Dbsm39PwBUfG4tGLipspIx4/edit?gid=0#gid=0').worksheet('Sheet1')
elif coaching_code == "1099":
journal_file = client.open_by_url('https://docs.google.com/spreadsheets/d/12UQzr7xy70-MvbKUuqM6YMUF-y2kY1rumX0vOj0hKXI/edit?gid=0#gid=0').worksheet('Sheet1')
panic_button_file = client.open_by_url('https://docs.google.com/spreadsheets/d/1zaKSRKgf2Nd7lWIf315YzvQeTQ3gU_PIRIS_bEAhl90/edit?gid=0#gid=0').worksheet('Sheet1')
test_file = client.open_by_url('https://docs.google.com/spreadsheets/d/1ms_SdloQqlXO85NK_xExhHT0LEeLsth0VBmdHQt55jc/edit?gid=0#gid=0').worksheet('Sheet1')
else:
raise HTTPException(status_code=404, detail="Invalid coaching code")
return journal_file, panic_button_file, test_file
# Main route to get sorted scores
@app.post("/get_revision_chapters")
async def get_revison_chapters(data: CoachingCodeRequest):
journal_file, panic_button_file, test_file = select_files(data.coachingCode)
# Load data into DataFrames
journal_df = pd.DataFrame(journal_file.get_all_values())
panic_button_df = pd.DataFrame(panic_button_file.get_all_values())
test_df = pd.DataFrame(test_file.get_all_values())
# Processing logic
panic_data = []
for index, row in panic_button_df.iterrows():
user_id = row[0]
row_pairs = row[1:].dropna().to_list()[-5:]
for i in range(0, len(row_pairs), 2):
panic = row_pairs[i].upper().strip()
if pd.notna(panic):
panic_data.append({'user_id': user_id, 'panic_button': panic})
panic_df_processed = pd.DataFrame(panic_data)
test_data = []
for index, row in test_df.iterrows():
user_id = row[0]
row_pairs = row[1:].dropna().to_list()
chapter_scores = {}
for i in range(0, len(row_pairs), 2):
chapter = row_pairs[i].lower().strip()
score = row_pairs[i + 1]
if pd.notna(score):
if chapter not in chapter_scores:
chapter_scores[chapter] = []
chapter_scores[chapter].append(score)
for chapter, scores in chapter_scores.items():
last_5_scores = scores[-5:]
for score in last_5_scores:
test_data.append({'user_id': user_id, 'test_chapter': chapter, 'test_score': score})
test_df_processed = pd.DataFrame(test_data)
journal_data = []
for index, row in journal_df.iterrows():
user_id = row[0]
row_pairs = row[1:].dropna().to_list()[-10:]
for i in range(0, len(row_pairs), 2):
productivity_yes_no = row_pairs[i].lower().strip()
productivity_rate = row_pairs[i + 1]
if pd.notna(productivity_rate):
journal_data.append({'user_id': user_id, 'productivity_yes_no': productivity_yes_no, 'productivity_rate': productivity_rate})
journal_df_processed = pd.DataFrame(journal_data)
merged_journal_panic = pd.merge(panic_df_processed, journal_df_processed, on='user_id', how='outer')
merged_data = pd.merge(merged_journal_panic, test_df_processed, on='user_id', how='outer')
merged_data_cleaned = merged_data.dropna(subset=['productivity_yes_no', 'productivity_rate', 'panic_button', 'test_chapter'], how='all')
def process_group(group):
# Panic button counts
panic_button_series = group['panic_button'].dropna()
panic_button_dict = panic_button_series.value_counts().to_dict()
# Test scores aggregation
test_scores = group[['test_chapter', 'test_score']].dropna()
test_scores['test_score'] = pd.to_numeric(test_scores['test_score'], errors='coerce')
# Create the test_scores_dict excluding NaN values
test_scores_dict = test_scores.groupby('test_chapter')['test_score'].mean().dropna().to_dict()
return pd.Series({
'productivity_yes_no': group['productivity_yes_no'].iloc[0],
'productivity_rate': group['productivity_rate'].iloc[0],
'panic_button': panic_button_dict,
'test_scores': test_scores_dict
})
merged_df = merged_data_cleaned.groupby('user_id').apply(process_group).reset_index()
revision_dict = {}
# Define the score threshold for requiring revision
threshold = 16
# Iterate through each student's test_scores
for index, row in merged_df.iterrows():
user_id = row['user_id']
test_scores = row['test_scores']
# Check each chapter's score for this student
for chapter, score in test_scores.items():
if score < threshold:
if chapter not in revision_dict:
revision_dict[chapter] = []
revision_dict[chapter].append(user_id)
# Convert the dictionary to the desired DataFrame format
revision_df = pd.DataFrame({
'chapter_name': list(revision_dict.keys()),
'user_ids_needing_revision': [', '.join(users) for users in revision_dict.values()]
})
revision_json = revision_df.to_dict(orient="records")
return {"data": revision_json}