Spaces:

DeepJudge
/

Applicant-Task-Submission

Running

App Files Files

Applicant-Task-Submission / app.py

Timothy-Vinzent

Update app.py

3f8b483 verified 3 months ago

raw

history blame

5.6 kB

	import os
	import re
	import json

	import gradio as gr
	from openai import OpenAI

	# Initialize the OpenAI client with the API key from environment variables.
	client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

	# In-memory storage to track submitted emails (not persistent; resets on app restart).
	submitted_emails = set()

	def get_evaluation_questions():
	"""
	Loads evaluation questions and expected answers from environment variables.

	Expected environment variable names are:
	- TEST_QUESTION_1: a JSON array of user query strings.
	- TEST_EXPECTED: a JSON array of JSON-like strings representing the expected outputs.

	Both lists must be of equal length.
	"""
	questions_str = os.environ.get("TEST_QUESTION_1")
	expected_str = os.environ.get("TEST_EXPECTED")
	if not questions_str or not expected_str:
	return []
	try:
	questions_list = json.loads(questions_str)
	expected_list = json.loads(expected_str)
	except Exception as e:
	print(f"Error parsing evaluation questions: {str(e)}")
	return []
	if len(questions_list) != len(expected_list):
	print("Mismatch in length: questions list and expected answers list must have the same length.")
	return []
	return [{"question": q, "expected": e} for q, e in zip(questions_list, expected_list)]

	# Load the evaluation questions once at startup.
	EVALUATION_QUESTIONS = get_evaluation_questions()

	def sanitize_input(text):
	"""
	Sanitizes input to prevent harmful content and limits its length.
	"""
	# Allow alphanumerics and some punctuation, then truncate to 500 characters.
	clean_text = re.sub(r"[^a-zA-Z0-9\s.,!?@:\-]", "", text)
	return clean_text.strip()[:500]

	def validate_email(email):
	"""
	Validates that the provided email is in a valid format.
	Returns True if valid, False otherwise.
	"""
	email_regex = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
	return re.match(email_regex, email) is not None

	def submit_prompt(email, name, system_prompt):
	"""
	Handles user submission:
	- Validates email format.
	- Checks if the email has already been used for submission.
	- Evaluates the system prompt against predefined test questions.
	- Prevents multiple submissions from the same email.
	Returns the evaluation results or an error message if the submission is invalid.
	"""
	# Validate email format.
	if not validate_email(email):
	return "Invalid email address. Please enter a valid email."

	# Check if this email has already been used for submission.
	if email in submitted_emails:
	return f"Submission already received for {email}. You can only submit once."

	# Sanitize inputs.
	email = sanitize_input(email)
	name = sanitize_input(name)
	system_prompt = sanitize_input(system_prompt)

	score = 0
	responses = []

	for item in EVALUATION_QUESTIONS:
	question = item["question"]
	expected = item["expected"]
	try:
	# Use the new client-based API for chat completions.
	response = client.chat.completions.create(
	model="gpt-4o-mini", # Ensure this identifier matches the deployed model.
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": question}
	]
	)
	# Extract the answer from the response object.
	answer = response.choices[0].message.content.strip()
	except Exception as e:
	answer = f"Error during OpenAI API call: {str(e)}"

	# Simple evaluation: check if the expected output is a substring of the answer (case-insensitive).
	if expected.lower() in answer.lower():
	score += 1
	verdict = "Correct"
	else:
	verdict = "Incorrect"

	responses.append(
	f"Question: {question}\n"
	f"Answer: {answer}\n"
	f"Expected: {expected}\n"
	f"Result: {verdict}\n"
	)

	result_details = "\n".join(responses)

	# Record this email as having submitted their prompt.
	submitted_emails.add(email)

	return (
	f"Thank you for your submission, {name}!\n\n"
	f"Your evaluation score is {score} out of {len(EVALUATION_QUESTIONS)}.\n\nDetails:\n{result_details}"
	)

	def build_interface():
	"""
	Constructs the Gradio interface with a submission button and single-submission mechanism.
	"""
	with gr.Blocks() as demo:
	gr.Markdown("# GPT-4o Mini Prompt Submission")
	gr.Markdown(
	"Please enter your details and submit your system prompt below. "
	"You can only submit once."
	)

	email_input = gr.Textbox(label="Email", placeholder="your.email@example.com")
	name_input = gr.Textbox(label="Name", placeholder="Your name")
	system_prompt_input = gr.Textbox(
	label="System Prompt",
	placeholder="Enter your system prompt here...",
	lines=6,
	)
	submit_button = gr.Button("Submit")
	output_text = gr.Textbox(label="Results", lines=15)

	submit_button.click(
	fn=submit_prompt,
	inputs=[email_input, name_input, system_prompt_input],
	outputs=output_text,
	)

	return demo

	if __name__ == "__main__":
	interface = build_interface()
	# Launch the app on 0.0.0.0 so it is accessible externally (e.g., in a container).
	interface.launch(server_name="0.0.0.0", server_port=7860)