Spaces:

DeepJudge
/

Applicant-Task-Submission

Sleeping

File size: 5,600 Bytes

import os
import re
import json

import gradio as gr
from openai import OpenAI

# Initialize the OpenAI client with the API key from environment variables.
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

# In-memory storage to track submitted emails (not persistent; resets on app restart).
submitted_emails = set()

def get_evaluation_questions():
    """
    Loads evaluation questions and expected answers from environment variables.
    
    Expected environment variable names are:
      - TEST_QUESTION_1: a JSON array of user query strings.
      - TEST_EXPECTED: a JSON array of JSON-like strings representing the expected outputs.
    
    Both lists must be of equal length.
    """
    questions_str = os.environ.get("TEST_QUESTION_1")
    expected_str = os.environ.get("TEST_EXPECTED")
    if not questions_str or not expected_str:
        return []
    try:
        questions_list = json.loads(questions_str)
        expected_list = json.loads(expected_str)
    except Exception as e:
        print(f"Error parsing evaluation questions: {str(e)}")
        return []
    if len(questions_list) != len(expected_list):
        print("Mismatch in length: questions list and expected answers list must have the same length.")
        return []
    return [{"question": q, "expected": e} for q, e in zip(questions_list, expected_list)]

# Load the evaluation questions once at startup.
EVALUATION_QUESTIONS = get_evaluation_questions()

def sanitize_input(text):
    """
    Sanitizes input to prevent harmful content and limits its length.
    """
    # Allow alphanumerics and some punctuation, then truncate to 500 characters.
    clean_text = re.sub(r"[^a-zA-Z0-9\s.,!?@:\-]", "", text)
    return clean_text.strip()[:500]

def validate_email(email):
    """
    Validates that the provided email is in a valid format.
    Returns True if valid, False otherwise.
    """
    email_regex = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
    return re.match(email_regex, email) is not None

def submit_prompt(email, name, system_prompt):
    """
    Handles user submission:
      - Validates email format.
      - Checks if the email has already been used for submission.
      - Evaluates the system prompt against predefined test questions.
      - Prevents multiple submissions from the same email.
    Returns the evaluation results or an error message if the submission is invalid.
    """
    # Validate email format.
    if not validate_email(email):
        return "Invalid email address. Please enter a valid email."

    # Check if this email has already been used for submission.
    if email in submitted_emails:
        return f"Submission already received for {email}. You can only submit once."

    # Sanitize inputs.
    email = sanitize_input(email)
    name = sanitize_input(name)
    system_prompt = sanitize_input(system_prompt)

    score = 0
    responses = []

    for item in EVALUATION_QUESTIONS:
        question = item["question"]
        expected = item["expected"]
        try:
            # Use the new client-based API for chat completions.
            response = client.chat.completions.create(
                model="gpt-4o-mini",  # Ensure this identifier matches the deployed model.
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": question}
                ]
            )
            # Extract the answer from the response object.
            answer = response.choices[0].message.content.strip()
        except Exception as e:
            answer = f"Error during OpenAI API call: {str(e)}"

        # Simple evaluation: check if the expected output is a substring of the answer (case-insensitive).
        if expected.lower() in answer.lower():
            score += 1
            verdict = "Correct"
        else:
            verdict = "Incorrect"

        responses.append(
            f"Question: {question}\n"
            f"Answer: {answer}\n"
            f"Expected: {expected}\n"
            f"Result: {verdict}\n"
        )

    result_details = "\n".join(responses)

    # Record this email as having submitted their prompt.
    submitted_emails.add(email)

    return (
        f"Thank you for your submission, {name}!\n\n"
        f"Your evaluation score is {score} out of {len(EVALUATION_QUESTIONS)}.\n\nDetails:\n{result_details}"
    )

def build_interface():
    """
    Constructs the Gradio interface with a submission button and single-submission mechanism.
    """
    with gr.Blocks() as demo:
        gr.Markdown("# GPT-4o Mini Prompt Submission")
        gr.Markdown(
            "Please enter your details and submit your system prompt below. "
            "You can only submit once."
        )

        email_input = gr.Textbox(label="Email", placeholder="your.email@example.com")
        name_input = gr.Textbox(label="Name", placeholder="Your name")
        system_prompt_input = gr.Textbox(
            label="System Prompt",
            placeholder="Enter your system prompt here...",
            lines=6,
        )
        submit_button = gr.Button("Submit")
        output_text = gr.Textbox(label="Results", lines=15)

        submit_button.click(
            fn=submit_prompt,
            inputs=[email_input, name_input, system_prompt_input],
            outputs=output_text,
        )
    
    return demo

if __name__ == "__main__":
    interface = build_interface()
    # Launch the app on 0.0.0.0 so it is accessible externally (e.g., in a container).
    interface.launch(server_name="0.0.0.0", server_port=7860)