Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import tempfile | |
| import os | |
| def process_excel(file): | |
| if file is None: | |
| return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None | |
| try: | |
| df = pd.read_excel(file) | |
| except Exception as e: | |
| return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None | |
| # Fill forward key metadata columns if they exist | |
| for col in ['Student', 'Email', 'Status', 'Survey']: | |
| if col in df.columns: | |
| df[col] = df[col].ffill() | |
| # Clean column names (strip spaces) | |
| df.columns = [col.strip() for col in df.columns] | |
| # Map for flexible column names (remove spaces, '/', '_', lowercase) | |
| col_map = {col.lower().replace(' ', '').replace('/', '').replace('_', ''): col for col in df.columns} | |
| student_col = col_map.get('student', None) | |
| email_col = col_map.get('email', None) | |
| question_col = None | |
| for candidate in ['answersquestion', 'question']: | |
| if candidate in col_map: | |
| question_col = col_map[candidate] | |
| break | |
| answers_col = col_map.get('answers', None) | |
| # If any required column missing, return empty DataFrame | |
| if not all([student_col, email_col, question_col, answers_col]): | |
| return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None | |
| # Extract Score and Comment from 'Answers' column (e.g. "4: Some comment") | |
| df[['Score', 'Comment']] = df[answers_col].astype(str).str.extract(r'^\s*(\d+)\s*[:\-]\s*(.*)$') | |
| # Handle rows with just a score and no comment (e.g. "4") | |
| score_only_mask = df[answers_col].astype(str).str.match(r'^\s*\d+\s*$') | |
| df.loc[score_only_mask, 'Score'] = df.loc[score_only_mask, answers_col].str.strip() | |
| df.loc[score_only_mask, 'Comment'] = "" | |
| # Build output DataFrame with required columns | |
| output_df = pd.DataFrame() | |
| output_df['Student'] = df[student_col] | |
| output_df['Email'] = df[email_col] | |
| output_df['Answers/Question'] = df[question_col] | |
| output_df['Score'] = df['Score'] | |
| output_df['Comment'] = df['Comment'] | |
| # Drop rows missing any of these essential fields | |
| output_df = output_df.dropna(subset=['Student', 'Email', 'Answers/Question', 'Score']) | |
| output_df = output_df.reset_index(drop=True) | |
| # Save the cleaned data to a temporary Excel file for download | |
| temp_dir = tempfile.gettempdir() | |
| temp_path = os.path.join(temp_dir, "survey_answers_clean.xlsx") | |
| output_df.to_excel(temp_path, index=False) | |
| # Return preview (first 100 rows) and file path for download | |
| return output_df.head(100), temp_path | |
| title = "Survey Data Transformer" | |
| description = """ | |
| Upload your survey Excel report.<br> | |
| The output will always have these columns:<br><b>Student, Email, Answers/Question, Score, Comment</b>.<br> | |
| Check the preview and download the cleaned Excel file. | |
| """ | |
| gr.Interface( | |
| fn=process_excel, | |
| inputs=gr.File(label="Upload Excel File"), | |
| outputs=[ | |
| gr.Dataframe(label="Preview (first 100 rows)"), | |
| gr.File(label="Download Cleaned Excel") | |
| ], | |
| title=title, | |
| description=description, | |
| allow_flagging="never", | |
| ).launch() | |