hari2425's picture
Update app.py
6dc5058 verified
Raw
History Blame Contribute Delete
3.22 kB
import gradio as gr
import pandas as pd
import tempfile
import os
def process_excel(file):
if file is None:
return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None
try:
df = pd.read_excel(file)
except Exception as e:
return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None
# Fill forward key metadata columns if they exist
for col in ['Student', 'Email', 'Status', 'Survey']:
if col in df.columns:
df[col] = df[col].ffill()
# Clean column names (strip spaces)
df.columns = [col.strip() for col in df.columns]
# Map for flexible column names (remove spaces, '/', '_', lowercase)
col_map = {col.lower().replace(' ', '').replace('/', '').replace('_', ''): col for col in df.columns}
student_col = col_map.get('student', None)
email_col = col_map.get('email', None)
question_col = None
for candidate in ['answersquestion', 'question']:
if candidate in col_map:
question_col = col_map[candidate]
break
answers_col = col_map.get('answers', None)
# If any required column missing, return empty DataFrame
if not all([student_col, email_col, question_col, answers_col]):
return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None
# Extract Score and Comment from 'Answers' column (e.g. "4: Some comment")
df[['Score', 'Comment']] = df[answers_col].astype(str).str.extract(r'^\s*(\d+)\s*[:\-]\s*(.*)$')
# Handle rows with just a score and no comment (e.g. "4")
score_only_mask = df[answers_col].astype(str).str.match(r'^\s*\d+\s*$')
df.loc[score_only_mask, 'Score'] = df.loc[score_only_mask, answers_col].str.strip()
df.loc[score_only_mask, 'Comment'] = ""
# Build output DataFrame with required columns
output_df = pd.DataFrame()
output_df['Student'] = df[student_col]
output_df['Email'] = df[email_col]
output_df['Answers/Question'] = df[question_col]
output_df['Score'] = df['Score']
output_df['Comment'] = df['Comment']
# Drop rows missing any of these essential fields
output_df = output_df.dropna(subset=['Student', 'Email', 'Answers/Question', 'Score'])
output_df = output_df.reset_index(drop=True)
# Save the cleaned data to a temporary Excel file for download
temp_dir = tempfile.gettempdir()
temp_path = os.path.join(temp_dir, "survey_answers_clean.xlsx")
output_df.to_excel(temp_path, index=False)
# Return preview (first 100 rows) and file path for download
return output_df.head(100), temp_path
title = "Survey Data Transformer"
description = """
Upload your survey Excel report.<br>
The output will always have these columns:<br><b>Student, Email, Answers/Question, Score, Comment</b>.<br>
Check the preview and download the cleaned Excel file.
"""
gr.Interface(
fn=process_excel,
inputs=gr.File(label="Upload Excel File"),
outputs=[
gr.Dataframe(label="Preview (first 100 rows)"),
gr.File(label="Download Cleaned Excel")
],
title=title,
description=description,
allow_flagging="never",
).launch()