Spaces:

hari2425
/

Module_Transform

Sleeping

App Files Files Community

Module_Transform / app.py

hari2425

Update app.py

6dc5058 verified 11 months ago

Raw

History Blame Contribute Delete

3.22 kB

	import gradio as gr
	import pandas as pd
	import tempfile
	import os

	def process_excel(file):
	if file is None:
	return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None

	try:
	df = pd.read_excel(file)
	except Exception as e:
	return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None

	# Fill forward key metadata columns if they exist
	for col in ['Student', 'Email', 'Status', 'Survey']:
	if col in df.columns:
	df[col] = df[col].ffill()

	# Clean column names (strip spaces)
	df.columns = [col.strip() for col in df.columns]

	# Map for flexible column names (remove spaces, '/', '_', lowercase)
	col_map = {col.lower().replace(' ', '').replace('/', '').replace('_', ''): col for col in df.columns}

	student_col = col_map.get('student', None)
	email_col = col_map.get('email', None)
	question_col = None
	for candidate in ['answersquestion', 'question']:
	if candidate in col_map:
	question_col = col_map[candidate]
	break
	answers_col = col_map.get('answers', None)

	# If any required column missing, return empty DataFrame
	if not all([student_col, email_col, question_col, answers_col]):
	return pd.DataFrame(columns=['Student', 'Email', 'Answers/Question', 'Score', 'Comment']), None

	# Extract Score and Comment from 'Answers' column (e.g. "4: Some comment")
	df[['Score', 'Comment']] = df[answers_col].astype(str).str.extract(r'^\s(\d+)\s[:\-]\s(.)$')

	# Handle rows with just a score and no comment (e.g. "4")
	score_only_mask = df[answers_col].astype(str).str.match(r'^\s\d+\s$')
	df.loc[score_only_mask, 'Score'] = df.loc[score_only_mask, answers_col].str.strip()
	df.loc[score_only_mask, 'Comment'] = ""

	# Build output DataFrame with required columns
	output_df = pd.DataFrame()
	output_df['Student'] = df[student_col]
	output_df['Email'] = df[email_col]
	output_df['Answers/Question'] = df[question_col]
	output_df['Score'] = df['Score']
	output_df['Comment'] = df['Comment']

	# Drop rows missing any of these essential fields
	output_df = output_df.dropna(subset=['Student', 'Email', 'Answers/Question', 'Score'])
	output_df = output_df.reset_index(drop=True)

	# Save the cleaned data to a temporary Excel file for download
	temp_dir = tempfile.gettempdir()
	temp_path = os.path.join(temp_dir, "survey_answers_clean.xlsx")
	output_df.to_excel(temp_path, index=False)

	# Return preview (first 100 rows) and file path for download
	return output_df.head(100), temp_path


	title = "Survey Data Transformer"
	description = """
	Upload your survey Excel report.<br>
	The output will always have these columns:<br><b>Student, Email, Answers/Question, Score, Comment</b>.<br>
	Check the preview and download the cleaned Excel file.
	"""

	gr.Interface(
	fn=process_excel,
	inputs=gr.File(label="Upload Excel File"),
	outputs=[
	gr.Dataframe(label="Preview (first 100 rows)"),
	gr.File(label="Download Cleaned Excel")
	],
	title=title,
	description=description,
	allow_flagging="never",
	).launch()