Spaces:
Running
Running
import os | |
import re | |
import json | |
import gradio as gr | |
from openai import OpenAI | |
# Initialize the OpenAI client with the API key from environment variables. | |
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) | |
# In-memory storage to track submitted emails (not persistent; resets on app restart). | |
submitted_emails = set() | |
def get_evaluation_questions(): | |
""" | |
Loads evaluation questions and expected answers from environment variables. | |
Expected environment variable names are: | |
- TEST_QUESTION_1: a JSON array of user query strings. | |
- TEST_EXPECTED: a JSON array of JSON-like strings representing the expected outputs. | |
Both lists must be of equal length. | |
""" | |
questions_str = os.environ.get("TEST_QUESTION_1") | |
expected_str = os.environ.get("TEST_EXPECTED") | |
if not questions_str or not expected_str: | |
return [] | |
try: | |
questions_list = json.loads(questions_str) | |
expected_list = json.loads(expected_str) | |
except Exception as e: | |
print(f"Error parsing evaluation questions: {str(e)}") | |
return [] | |
if len(questions_list) != len(expected_list): | |
print("Mismatch in length: questions list and expected answers list must have the same length.") | |
return [] | |
return [{"question": q, "expected": e} for q, e in zip(questions_list, expected_list)] | |
# Load the evaluation questions once at startup. | |
EVALUATION_QUESTIONS = get_evaluation_questions() | |
def sanitize_input(text): | |
""" | |
Sanitizes input to prevent harmful content and limits its length. | |
""" | |
# Allow alphanumerics and some punctuation, then truncate to 500 characters. | |
clean_text = re.sub(r"[^a-zA-Z0-9\s.,!?@:\-]", "", text) | |
return clean_text.strip()[:500] | |
def validate_email(email): | |
""" | |
Validates that the provided email is in a valid format. | |
Returns True if valid, False otherwise. | |
""" | |
email_regex = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$" | |
return re.match(email_regex, email) is not None | |
def submit_prompt(email, name, system_prompt): | |
""" | |
Handles user submission: | |
- Validates email format. | |
- Checks if the email has already been used for submission. | |
- Evaluates the system prompt against predefined test questions. | |
- Prevents multiple submissions from the same email. | |
Returns the evaluation results or an error message if the submission is invalid. | |
""" | |
# Validate email format. | |
if not validate_email(email): | |
return "Invalid email address. Please enter a valid email." | |
# Check if this email has already been used for submission. | |
if email in submitted_emails: | |
return f"Submission already received for {email}. You can only submit once." | |
# Sanitize inputs. | |
email = sanitize_input(email) | |
name = sanitize_input(name) | |
system_prompt = sanitize_input(system_prompt) | |
score = 0 | |
responses = [] | |
for item in EVALUATION_QUESTIONS: | |
question = item["question"] | |
expected = item["expected"] | |
try: | |
# Use the new client-based API for chat completions. | |
response = client.chat.completions.create( | |
model="gpt-4o-mini", # Ensure this identifier matches the deployed model. | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": question} | |
] | |
) | |
# Extract the answer from the response object. | |
answer = response.choices[0].message.content.strip() | |
except Exception as e: | |
answer = f"Error during OpenAI API call: {str(e)}" | |
# Simple evaluation: check if the expected output is a substring of the answer (case-insensitive). | |
if expected.lower() in answer.lower(): | |
score += 1 | |
verdict = "Correct" | |
else: | |
verdict = "Incorrect" | |
responses.append( | |
f"Question: {question}\n" | |
f"Answer: {answer}\n" | |
f"Expected: {expected}\n" | |
f"Result: {verdict}\n" | |
) | |
result_details = "\n".join(responses) | |
# Record this email as having submitted their prompt. | |
submitted_emails.add(email) | |
return ( | |
f"Thank you for your submission, {name}!\n\n" | |
f"Your evaluation score is {score} out of {len(EVALUATION_QUESTIONS)}.\n\nDetails:\n{result_details}" | |
) | |
def build_interface(): | |
""" | |
Constructs the Gradio interface with a submission button and single-submission mechanism. | |
""" | |
with gr.Blocks() as demo: | |
gr.Markdown("# GPT-4o Mini Prompt Submission") | |
gr.Markdown( | |
"Please enter your details and submit your system prompt below. " | |
"You can only submit once." | |
) | |
email_input = gr.Textbox(label="Email", placeholder="your.email@example.com") | |
name_input = gr.Textbox(label="Name", placeholder="Your name") | |
system_prompt_input = gr.Textbox( | |
label="System Prompt", | |
placeholder="Enter your system prompt here...", | |
lines=6, | |
) | |
submit_button = gr.Button("Submit") | |
output_text = gr.Textbox(label="Results", lines=15) | |
submit_button.click( | |
fn=submit_prompt, | |
inputs=[email_input, name_input, system_prompt_input], | |
outputs=output_text, | |
) | |
return demo | |
if __name__ == "__main__": | |
interface = build_interface() | |
# Launch the app on 0.0.0.0 so it is accessible externally (e.g., in a container). | |
interface.launch(server_name="0.0.0.0", server_port=7860) | |