Essay_Grader / app.py
gabrielganan's picture
Update app.py
629c826 verified
import gradio as gr
import pandas as pd
import base64
import os
from openai import OpenAI
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
model_ft_id = os.getenv('model_ft_id')
def process_excel_file(file_path, non_question_columns_str):
# Load the Excel file into a DataFrame
df = pd.read_excel(file_path, header=0, index_col=None)
# Convert non-question columns string to a list and make it lowercase for comparison
non_question_columns = [col.strip().lower() for col in non_question_columns_str.split(',')]
# Identify columns that are considered as questions and non-questions(user defined)
question_columns = [col for col in df.columns if col.lower() not in non_question_columns]
non_question_data = df[[col for col in df.columns if col.lower() in non_question_columns]]
# Filter the DataFrame to include only question columns
df_questions = df[question_columns]
# Drop Timestamp column
if "Timestamp" in df_questions.columns:
df_questions.drop(columns=["Timestamp"], inplace=True)
# Extract headers as questions
questions = list(df_questions.columns)
# Extract the last row as answer keys
answer_keys = df_questions.iloc[-1].to_dict()
# Extract participant answers (excluding the last row which is the answer keys)
participant_answers = df_questions.iloc[:-1]
return questions, answer_keys, participant_answers, non_question_data
def process_and_grade(file_path, non_question_columns_str):
questions, answer_keys, participant_answers, non_question_data = process_excel_file(file_path, non_question_columns_str)
# Initialize columns for individual question scores based on numeric indices and total score
num_questions = len(questions)
for i in range(num_questions):
participant_answers[f"Q{i+1} Score"] = 0 # Creates score columns .
participant_answers['Total Points'] = 0
participant_answers['Grade'] = 0
# Grade each participant's answers and calculate total score
for index, row in participant_answers.iterrows():
total_score = 0
for i, question in enumerate(questions, start=1):
participant_answer = row[question]
correct_answer = answer_keys[question]
score = grade_answer(question, participant_answer, correct_answer)
participant_answers.at[index, f"Q{i} Score"] = score # Update score using numeric index
total_score += score
participant_answers.at[index, 'Total Points'] = total_score # Update total points
participant_answers.at[index, 'Grade'] = (total_score/(num_questions*2))*100
# Concatenate non-question data with updated participant answers for final DataFrame
final_df = pd.concat([non_question_data.iloc[:-1].reset_index(drop=True), participant_answers.reset_index(drop=True)], axis=1)
# Save to a new Excel file
output_file_path = "graded_results.xlsx"
final_df.to_excel(output_file_path, index=False)
return output_file_path
def grade_answer(question, participant_answer, key_answer):
question = str(question)
participant_answer = str(participant_answer)
key_answer = str(key_answer)
"""
Call the fine-tuned model to grade an answer based on the question,
the participant's answer, and the key answer.
"""
if participant_answer.strip() == "-" or participant_answer.strip() == "":
return 0
task="Evaluate the correctness of a participant's answer compared to the intended answer"
prompt = f"{task}, Question: {question}\nIntended Answer: {key_answer}\nParticipant Answer: {participant_answer}\n"
response = client.completions.create(
model=model_ft_id,
prompt=prompt,
max_tokens=1,
temperature=0,
)
# Assuming the model's response is directly the score (0, 1, or 2)
try:
score = int(response.choices[0].text.strip())
return score
except ValueError:
# Handle unexpected model response
return 0
# Set up the Gradio interface
iface = gr.Interface(
fn=process_and_grade,
inputs=[
gr.File(file_count="single", type="filepath"),
gr.Textbox(lines=2, placeholder="Column1, Column2, Column3, ...", label="Non-Question Columns Like Nams, Id, etc."),
],
outputs=gr.File(label="Download Graded Results"),
title="Essay Question Grading V0.01",
description="Upload a spreadsheet and specify non-question columns (separated by commas) to grade. Format your Excel file with the 1st row for questions and the last row for key answers.",
allow_flagging="never"
)
iface.launch(share=True, debug=True)