Spaces:

snap-stanford
/

stark-leaderboard

Sleeping

App Files Files Community

Shiyu Zhao commited on 30 days ago

Commit

680cbe9

•

1 Parent(s): 53e6c12

Update space

Browse files

Files changed (3) hide show

README.md +1 -0
app.py +293 -41
requirements.txt +3 -1

README.md CHANGED Viewed

@@ -8,6 +8,7 @@ app_file: app.py
 pinned: true
 license: mit
 short_description: leaderboard of Semi-structured Retrieval Benchmark (STaRK)
 ---
 # Start the configuration

 pinned: true
 license: mit
 short_description: leaderboard of Semi-structured Retrieval Benchmark (STaRK)
+hf_oauth: write
 ---
 # Start the configuration

app.py CHANGED Viewed

@@ -8,6 +8,9 @@ import json
 import torch
 from tqdm import tqdm
 from concurrent.futures import ProcessPoolExecutor, as_completed
 from stark_qa import load_qa
 from stark_qa.evaluator import Evaluator
@@ -283,62 +286,311 @@ def update_leaderboard_data(submission_data):
         # Add new row
         df_to_update.loc[len(df_to_update)] = new_row
 def process_submission(
     method_name, team_name, dataset, split, contact_email,
     code_repo, csv_file, model_description, hardware, paper_link
 ):
     """Process and validate submission"""
     try:
-        # [Previous validation code remains the same]
-        # Process CSV file through evaluation pipeline
-        results = compute_metrics(
-            csv_file.name,
-            dataset=dataset.lower(),
-            split=split,
-            num_workers=4
-        )
-        if isinstance(results, str) and results.startswith("Error"):
-            return f"Evaluation error: {results}"
-        # Prepare submission data
-        submission_data = {
-            "method_name": method_name,
-            "team_name": team_name,
-            "dataset": dataset,
-            "split": split,
-            "contact_email": contact_email,
-            "code_repo": code_repo,
-            "model_description": model_description,
-            "hardware": hardware,
-            "paper_link": paper_link,
-            "results": results,
-            "status": "pending_review",
-            "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         }
-        # Save submission and get ID
-        submission_id = save_submission(submission_data, csv_file)
-        # Update leaderboard data if submission is valid
-        update_leaderboard_data(submission_data)
-        return f"""
-        Submission successful! Your submission ID is: {submission_id}
-        Evaluation Results:
-        Hit@1: {results['hit@1']:.2f}
-        Hit@5: {results['hit@5']:.2f}
-        Recall@20: {results['recall@20']:.2f}
-        MRR: {results['mrr']:.2f}
-        Your submission has been saved and is pending review.
-        Once approved, your results will appear in the leaderboard under the method name: {method_name}
-        """
     except Exception as e:
-        return f"Error processing submission: {str(e)}"
 def filter_by_model_type(df, selected_types):
     if not selected_types:

 import torch
 from tqdm import tqdm
 from concurrent.futures import ProcessPoolExecutor, as_completed
+import smtplib
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
 from stark_qa import load_qa
 from stark_qa.evaluator import Evaluator
         # Add new row
         df_to_update.loc[len(df_to_update)] = new_row
+# Function to get emails from meta_data
+def get_emails_from_metadata(meta_data):
+    """
+    Extracts emails from the meta_data dictionary.
+    Args:
+        meta_data (dict): The metadata dictionary that contains the 'Contact Email(s)' field.
+    Returns:
+        list: A list of email addresses.
+    """
+    return [email.strip() for email in meta_data.get("Contact Email(s)", "").split(";")]
+# Function to format meta_data as an HTML table (without Prediction CSV)
+def format_metadata_as_table(meta_data):
+    """
+    Formats metadata dictionary into an HTML table for the email.
+    Handles multiple contact emails separated by a semicolon.
+    Args:
+        meta_data (dict): Dictionary containing submission metadata.
+    Returns:
+        str: HTML string representing the metadata table.
+    """
+    table_rows = ""
+    for key, value in meta_data.items():
+        if key == "Contact Email(s)":
+            # Ensure that contact emails are split by semicolon
+            emails = value.split(';')
+            formatted_emails = "; ".join([email.strip() for email in emails])
+            table_rows += f"<tr><td><b>{key}</b></td><td>{formatted_emails}</td></tr>"
+        elif key != "Prediction CSV":  # Exclude the Prediction CSV field
+            table_rows += f"<tr><td><b>{key}</b></td><td>{value}</td></tr>"
+    table_html = f"""
+    <table border="1" cellpadding="5" cellspacing="0">
+        {table_rows}
+    </table>
+    """
+    return table_html
+# Function to get emails from meta_data
+def get_emails_from_metadata(meta_data):
+    """
+    Extracts emails from the meta_data dictionary.
+    Args:
+        meta_data (dict): The metadata dictionary that contains the 'Contact Email(s)' field.
+    Returns:
+        list: A list of email addresses.
+    """
+    return [email.strip() for email in meta_data.get("Contact Email(s)", "").split(";")]
+def send_error_notification(meta_data, error_info):
+    """
+    Sends an email notification about an error during the evaluation process.
+    Args:
+        meta_data (dict): Submission metadata to be included in the email.
+        error_info (str): Error message or notification content to be included in the email.
+    Returns:
+        None
+    """
+    emails_to_send = get_emails_from_metadata(meta_data)
+    send_from = 'stark-qa@cs.stanford.edu'
+    recipients_str = ', '.join(emails_to_send)
+    # Create the email container
+    msg = MIMEMultipart('alternative')
+    msg['Subject'] = 'STaRK Leaderboard Submission - Error Notification'
+    msg['From'] = send_from
+    msg['To'] = recipients_str
+    # Format the metadata table
+    metadata_table = format_metadata_as_table(meta_data)
+    # Email body content with metadata table
+    body = f"""
+    <p>Dear STaRK Leaderboard Participant,</p>
+    <p>We encountered an issue during the evaluation of your recent submission:</p>
+    <p><i>{error_info}</i></p>
+    <p>Please verify your inputs and resubmit. If the issue persists, feel free to contact us at stark-qa@cs.stanford.edu with the error details and your dataset information.</p>
+    <p>Submitted Metadata:</p>
+    {metadata_table}
+    <p>Thank you for your participation.</p>
+    <p>Best regards,<br>The STaRK QA Team</p>
+    """
+    msg.attach(MIMEText(body, 'html'))
+    # Send the email
+    try:
+        with smtplib.SMTP('localhost') as server:
+            server.sendmail(send_from, emails_to_send, msg.as_string())  # No CC for error notification
+        print("Error notification sent successfully.")
+    except Exception as e:
+        print(f"Failed to send error notification: {e}")
+# Function to send a submission confirmation with evaluation results and metadata, CCing the sender
+def send_submission_confirmation(meta_data, eval_results):
+    """
+    Sends an email notification confirming submission and including evaluation results and metadata,
+    with an option to CC the sender.
+    Args:
+        meta_data (dict): Submission metadata to be included in the email.
+        eval_results (dict): Dictionary of evaluation results to include in the email.
+    Returns:
+        None
+    """
+    emails_to_send = get_emails_from_metadata(meta_data)
+    send_from = 'stark-qa@cs.stanford.edu'
+    recipients_str = ', '.join(emails_to_send)
+    # Create the email container
+    msg = MIMEMultipart('alternative')
+    msg['Subject'] = 'STaRK Leaderboard Submission - Evaluation Results'
+    msg['From'] = send_from
+    msg['To'] = recipients_str
+    msg['Cc'] = send_from  # CC the sender only for success notification
+    # Format the evaluation results and metadata table
+    formatted_results = format_evaluation_results(eval_results)
+    metadata_table = format_metadata_as_table(meta_data)
+    # Email body content with evaluation results and metadata table
+    body = f"""
+    <p>Dear STaRK Leaderboard Participant,</p>
+    <p>Thank you for your submission to the STaRK leaderboard. We are pleased to inform you that the evaluation has been completed. Below are the results of your submission:</p>
+    <pre>{formatted_results}</pre>
+    <p>Submitted Metadata:</p>
+    {metadata_table}
+    <p>Your submission will be reviewed. Once approved, the results will be updated on the leaderboard within the next 48 business hours. If there are problems in the metadata that you submitted, one of our team members will reach out to you.</p>
+    <p>If you would like to withdraw your submission, simply reply to this email with "withdrawn."</p>
+    <p>We appreciate your participation and look forward to sharing your results on our leaderboard.</p>
+    <p>Best regards,<br>The STaRK QA Team</p>
+    """
+    msg.attach(MIMEText(body, 'html'))
+    # Send the email
+    try:
+        with smtplib.SMTP('localhost') as server:
+            server.sendmail(send_from, emails_to_send + [send_from], msg.as_string())  # Include sender in recipients for CC
+        print("Submission confirmation sent successfully.")
+    except Exception as e:
+        print(f"Failed to send submission confirmation: {e}")
 def process_submission(
     method_name, team_name, dataset, split, contact_email,
     code_repo, csv_file, model_description, hardware, paper_link
 ):
     """Process and validate submission"""
     try:
+        # Input validation
+        if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file]):
+            return "Error: Please fill in all required fields"
+        # Length validation
+        if len(method_name) > 25:
+            return "Error: Method name must be 25 characters or less"
+        if len(team_name) > 25:
+            return "Error: Team name must be 25 characters or less"
+        if not validate_email(contact_email):
+            return "Error: Invalid email format"
+        if not validate_github_url(code_repo):
+            return "Error: Invalid GitHub repository URL"
+        # Prepare metadata for email
+        meta_data = {
+            "Method Name": method_name,
+            "Team Name": team_name,
+            "Dataset": dataset,
+            "Split": split,
+            "Contact Email(s)": contact_email,
+            "Code Repository": code_repo,
+            "Model Description": model_description,
+            "Hardware": hardware,
+            "(Optional) Paper link": paper_link
         }
+        # Save CSV file
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        model_name_clean = sanitize_name(method_name)
+        team_name_clean = sanitize_name(team_name)
+        # Create directory structure in the HuggingFace space
+        base_dir = "submissions"  # This will be in the HF space root
+        submission_dir = os.path.join(base_dir, f"{model_name_clean}_{team_name_clean}")
+        os.makedirs(submission_dir, exist_ok=True)
+        # Save CSV file
+        csv_filename = f"predictions_{timestamp}.csv"
+        csv_path = os.path.join(submission_dir, csv_filename)
+        if hasattr(csv_file, 'name'):
+            with open(csv_file.name, 'rb') as source, open(csv_path, 'wb') as target:
+                target.write(source.read())
+        # Validate CSV file
+        csv_valid, csv_message = validate_csv(csv_file)
+        if not csv_valid:
+            error_message = f"Error with CSV file: {csv_message}"
+            send_error_notification(meta_data, error_message)
+            return error_message
+        # Process CSV file through evaluation pipeline
+        try:
+            results = compute_metrics(
+                csv_file.name,
+                dataset=dataset.lower(),
+                split=split,
+                num_workers=4
+            )
+            if isinstance(results, str) and results.startswith("Error"):
+                send_error_notification(meta_data, results)
+                return f"Evaluation error: {results}"
+            # Multiply results by 100 and round to 2 decimal places
+            processed_results = {
+                "hit@1": round(results['hit@1'] * 100, 2),
+                "hit@5": round(results['hit@5'] * 100, 2),
+                "recall@20": round(results['recall@20'] * 100, 2),
+                "mrr": round(results['mrr'] * 100, 2)
+            }
+            # Prepare submission data
+            submission_data = {
+                "method_name": method_name,
+                "team_name": team_name,
+                "dataset": dataset,
+                "split": split,
+                "contact_email": contact_email,
+                "code_repo": code_repo,
+                "model_description": model_description,
+                "hardware": hardware,
+                "paper_link": paper_link,
+                "results": processed_results,
+                "status": "pending_review",
+                "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                "csv_path": csv_path
+            }
+            # Save metadata
+            metadata_path = os.path.join(submission_dir, f"metadata_{timestamp}.json")
+            with open(metadata_path, 'w') as f:
+                json.dump(submission_data, f, indent=4)
+            # Save latest.json
+            latest_path = os.path.join(submission_dir, "latest.json")
+            with open(latest_path, 'w') as f:
+                json.dump({
+                    "latest_submission": timestamp,
+                    "status": "pending_review",
+                    "method_name": method_name
+                }, f, indent=4)
+            # Send email confirmation
+            send_submission_confirmation(meta_data, processed_results)
+            # Update leaderboard data
+            update_leaderboard_data(submission_data)
+            return f"""
+            Submission successful!
+            Evaluation Results:
+            Hit@1: {processed_results['hit@1']:.2f}%
+            Hit@5: {processed_results['hit@5']:.2f}%
+            Recall@20: {processed_results['recall@20']:.2f}%
+            MRR: {processed_results['mrr']:.2f}%
+            Your submission has been saved and is pending review.
+            A confirmation email has been sent to {contact_email}.
+            Once approved, your results will appear in the leaderboard under the method name: {method_name}
+            """
+        except Exception as e:
+            error_message = f"Error processing submission: {str(e)}"
+            send_error_notification(meta_data, error_message)
+            return error_message
     except Exception as e:
+        error_message = f"Error processing submission: {str(e)}"
+        send_error_notification(meta_data, error_message)
+        return error_message
 def filter_by_model_type(df, selected_types):
     if not selected_types:

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
 APScheduler
 black
 datasets
 gradio
 gradio[oauth]
 gradio_leaderboard==0.0.9
@@ -15,4 +16,5 @@ transformers
 torch
 tokenizers>=0.15.0
 sentencepiece
-stark_qa

 APScheduler
 black
 datasets
+email
 gradio
 gradio[oauth]
 gradio_leaderboard==0.0.9
 torch
 tokenizers>=0.15.0
 sentencepiece
+stark_qa
+smtplib