import gradio as gr import pandas as pd import tempfile import os def process_excel(file): if file is None: return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None try: df = pd.read_excel(file) except Exception as e: return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None # Fill forward key metadata columns if they exist for col in ['Student', 'Email', 'Status', 'Survey']: if col in df.columns: df[col] = df[col].ffill() # Clean column names (strip spaces) df.columns = [col.strip() for col in df.columns] # Map for flexible column names (remove spaces, '/', '_', lowercase) col_map = {col.lower().replace(' ', '').replace('/', '').replace('_', ''): col for col in df.columns} student_col = col_map.get('student', None) email_col = col_map.get('email', None) question_col = None for candidate in ['answersquestion', 'question']: if candidate in col_map: question_col = col_map[candidate] break answers_col = col_map.get('answers', None) # If any required column missing, return empty DataFrame if not all([student_col, email_col, question_col, answers_col]): return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None # Extract Score and Comment from 'Answers' column (e.g. "4: Some comment") df[['Score', 'Comment']] = df[answers_col].astype(str).str.extract(r'^\s*(\d+)\s*[:\-]\s*(.*)$') # Handle rows with just a score and no comment (e.g. "4") score_only_mask = df[answers_col].astype(str).str.match(r'^\s*\d+\s*$') df.loc[score_only_mask, 'Score'] = df.loc[score_only_mask, answers_col].str.strip() df.loc[score_only_mask, 'Comment'] = "" # Build output DataFrame with required columns output_df = pd.DataFrame() output_df['Student'] = df[student_col] output_df['Email'] = df[email_col] output_df['Answers/Question'] = df[question_col] output_df['Score'] = df['Score'] output_df['Comment'] = df['Comment'] # Drop rows missing any of these essential fields output_df = output_df.dropna(subset=['Student', 'Email', 'Answers/Question', 'Score']) output_df = output_df.reset_index(drop=True) # Save the cleaned data to a temporary Excel file for download temp_dir = tempfile.gettempdir() temp_path = os.path.join(temp_dir, "survey_answers_clean.xlsx") output_df.to_excel(temp_path, index=False) # Return preview (first 100 rows) and file path for download return output_df.head(100), temp_path title = "Survey Data Transformer" description = """ Upload your survey Excel report.
The output will always have these columns:
Student, Email, Answers/Question, Score, Comment.
Check the preview and download the cleaned Excel file. """ gr.Interface( fn=process_excel, inputs=gr.File(label="Upload Excel File"), outputs=[ gr.Dataframe(label="Preview (first 100 rows)"), gr.File(label="Download Cleaned Excel") ], title=title, description=description, allow_flagging="never", ).launch()