Spaces:
Sleeping
Sleeping
File size: 3,215 Bytes
9a987e3 6dc5058 9a987e3 858bba9 6dc5058 858bba9 9a987e3 858bba9 6dc5058 858bba9 6dc5058 858bba9 6dc5058 858bba9 6dc5058 858bba9 6dc5058 858bba9 6dc5058 858bba9 6dc5058 858bba9 6dc5058 858bba9 6dc5058 858bba9 6dc5058 858bba9 9a987e3 858bba9 de03522 6dc5058 de03522 858bba9 9a987e3 858bba9 9a987e3 858bba9 9a987e3 858bba9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | import gradio as gr
import pandas as pd
import tempfile
import os
def process_excel(file):
if file is None:
return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None
try:
df = pd.read_excel(file)
except Exception as e:
return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None
# Fill forward key metadata columns if they exist
for col in ['Student', 'Email', 'Status', 'Survey']:
if col in df.columns:
df[col] = df[col].ffill()
# Clean column names (strip spaces)
df.columns = [col.strip() for col in df.columns]
# Map for flexible column names (remove spaces, '/', '_', lowercase)
col_map = {col.lower().replace(' ', '').replace('/', '').replace('_', ''): col for col in df.columns}
student_col = col_map.get('student', None)
email_col = col_map.get('email', None)
question_col = None
for candidate in ['answersquestion', 'question']:
if candidate in col_map:
question_col = col_map[candidate]
break
answers_col = col_map.get('answers', None)
# If any required column missing, return empty DataFrame
if not all([student_col, email_col, question_col, answers_col]):
return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None
# Extract Score and Comment from 'Answers' column (e.g. "4: Some comment")
df[['Score', 'Comment']] = df[answers_col].astype(str).str.extract(r'^\s*(\d+)\s*[:\-]\s*(.*)$')
# Handle rows with just a score and no comment (e.g. "4")
score_only_mask = df[answers_col].astype(str).str.match(r'^\s*\d+\s*$')
df.loc[score_only_mask, 'Score'] = df.loc[score_only_mask, answers_col].str.strip()
df.loc[score_only_mask, 'Comment'] = ""
# Build output DataFrame with required columns
output_df = pd.DataFrame()
output_df['Student'] = df[student_col]
output_df['Email'] = df[email_col]
output_df['Answers/Question'] = df[question_col]
output_df['Score'] = df['Score']
output_df['Comment'] = df['Comment']
# Drop rows missing any of these essential fields
output_df = output_df.dropna(subset=['Student', 'Email', 'Answers/Question', 'Score'])
output_df = output_df.reset_index(drop=True)
# Save the cleaned data to a temporary Excel file for download
temp_dir = tempfile.gettempdir()
temp_path = os.path.join(temp_dir, "survey_answers_clean.xlsx")
output_df.to_excel(temp_path, index=False)
# Return preview (first 100 rows) and file path for download
return output_df.head(100), temp_path
title = "Survey Data Transformer"
description = """
Upload your survey Excel report.<br>
The output will always have these columns:<br><b>Student, Email, Answers/Question, Score, Comment</b>.<br>
Check the preview and download the cleaned Excel file.
"""
gr.Interface(
fn=process_excel,
inputs=gr.File(label="Upload Excel File"),
outputs=[
gr.Dataframe(label="Preview (first 100 rows)"),
gr.File(label="Download Cleaned Excel")
],
title=title,
description=description,
allow_flagging="never",
).launch()
|