File size: 3,215 Bytes
9a987e3
 
6dc5058
 
9a987e3
 
858bba9
 
 
 
 
 
 
 
6dc5058
858bba9
9a987e3
 
858bba9
6dc5058
858bba9
 
6dc5058
858bba9
6dc5058
858bba9
 
 
6dc5058
 
 
858bba9
 
 
6dc5058
 
858bba9
 
6dc5058
858bba9
 
6dc5058
 
 
 
 
 
858bba9
6dc5058
 
 
 
 
858bba9
6dc5058
858bba9
 
 
6dc5058
 
 
 
 
 
 
858bba9
9a987e3
 
858bba9
de03522
6dc5058
de03522
858bba9
9a987e3
858bba9
9a987e3
 
 
858bba9
9a987e3
 
 
 
 
858bba9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
import pandas as pd
import tempfile
import os

def process_excel(file):
    if file is None:
        return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None

    try:
        df = pd.read_excel(file)
    except Exception as e:
        return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None

    # Fill forward key metadata columns if they exist
    for col in ['Student', 'Email', 'Status', 'Survey']:
        if col in df.columns:
            df[col] = df[col].ffill()

    # Clean column names (strip spaces)
    df.columns = [col.strip() for col in df.columns]

    # Map for flexible column names (remove spaces, '/', '_', lowercase)
    col_map = {col.lower().replace(' ', '').replace('/', '').replace('_', ''): col for col in df.columns}

    student_col = col_map.get('student', None)
    email_col = col_map.get('email', None)
    question_col = None
    for candidate in ['answersquestion', 'question']:
        if candidate in col_map:
            question_col = col_map[candidate]
            break
    answers_col = col_map.get('answers', None)

    # If any required column missing, return empty DataFrame
    if not all([student_col, email_col, question_col, answers_col]):
        return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None

    # Extract Score and Comment from 'Answers' column (e.g. "4: Some comment")
    df[['Score', 'Comment']] = df[answers_col].astype(str).str.extract(r'^\s*(\d+)\s*[:\-]\s*(.*)$')

    # Handle rows with just a score and no comment (e.g. "4")
    score_only_mask = df[answers_col].astype(str).str.match(r'^\s*\d+\s*$')
    df.loc[score_only_mask, 'Score'] = df.loc[score_only_mask, answers_col].str.strip()
    df.loc[score_only_mask, 'Comment'] = ""

    # Build output DataFrame with required columns
    output_df = pd.DataFrame()
    output_df['Student'] = df[student_col]
    output_df['Email'] = df[email_col]
    output_df['Answers/Question'] = df[question_col]
    output_df['Score'] = df['Score']
    output_df['Comment'] = df['Comment']

    # Drop rows missing any of these essential fields
    output_df = output_df.dropna(subset=['Student', 'Email', 'Answers/Question', 'Score'])
    output_df = output_df.reset_index(drop=True)

    # Save the cleaned data to a temporary Excel file for download
    temp_dir = tempfile.gettempdir()
    temp_path = os.path.join(temp_dir, "survey_answers_clean.xlsx")
    output_df.to_excel(temp_path, index=False)

    # Return preview (first 100 rows) and file path for download
    return output_df.head(100), temp_path


title = "Survey Data Transformer"
description = """
Upload your survey Excel report.<br>
The output will always have these columns:<br><b>Student, Email, Answers/Question, Score, Comment</b>.<br>
Check the preview and download the cleaned Excel file.
"""

gr.Interface(
    fn=process_excel,
    inputs=gr.File(label="Upload Excel File"),
    outputs=[
        gr.Dataframe(label="Preview (first 100 rows)"),
        gr.File(label="Download Cleaned Excel")
    ],
    title=title,
    description=description,
    allow_flagging="never",
).launch()