Shiyu Zhao commited on
Commit
680cbe9
1 Parent(s): 53e6c12

Update space

Browse files
Files changed (3) hide show
  1. README.md +1 -0
  2. app.py +293 -41
  3. requirements.txt +3 -1
README.md CHANGED
@@ -8,6 +8,7 @@ app_file: app.py
8
  pinned: true
9
  license: mit
10
  short_description: leaderboard of Semi-structured Retrieval Benchmark (STaRK)
 
11
  ---
12
 
13
  # Start the configuration
 
8
  pinned: true
9
  license: mit
10
  short_description: leaderboard of Semi-structured Retrieval Benchmark (STaRK)
11
+ hf_oauth: write
12
  ---
13
 
14
  # Start the configuration
app.py CHANGED
@@ -8,6 +8,9 @@ import json
8
  import torch
9
  from tqdm import tqdm
10
  from concurrent.futures import ProcessPoolExecutor, as_completed
 
 
 
11
 
12
  from stark_qa import load_qa
13
  from stark_qa.evaluator import Evaluator
@@ -283,62 +286,311 @@ def update_leaderboard_data(submission_data):
283
  # Add new row
284
  df_to_update.loc[len(df_to_update)] = new_row
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  def process_submission(
287
  method_name, team_name, dataset, split, contact_email,
288
  code_repo, csv_file, model_description, hardware, paper_link
289
  ):
290
  """Process and validate submission"""
291
  try:
292
- # [Previous validation code remains the same]
293
-
294
- # Process CSV file through evaluation pipeline
295
- results = compute_metrics(
296
- csv_file.name,
297
- dataset=dataset.lower(),
298
- split=split,
299
- num_workers=4
300
- )
301
 
302
- if isinstance(results, str) and results.startswith("Error"):
303
- return f"Evaluation error: {results}"
 
 
 
 
 
 
 
304
 
305
- # Prepare submission data
306
- submission_data = {
307
- "method_name": method_name,
308
- "team_name": team_name,
309
- "dataset": dataset,
310
- "split": split,
311
- "contact_email": contact_email,
312
- "code_repo": code_repo,
313
- "model_description": model_description,
314
- "hardware": hardware,
315
- "paper_link": paper_link,
316
- "results": results,
317
- "status": "pending_review",
318
- "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
319
  }
320
 
321
- # Save submission and get ID
322
- submission_id = save_submission(submission_data, csv_file)
323
-
324
- # Update leaderboard data if submission is valid
325
- update_leaderboard_data(submission_data)
326
 
327
- return f"""
328
- Submission successful! Your submission ID is: {submission_id}
 
 
329
 
330
- Evaluation Results:
331
- Hit@1: {results['hit@1']:.2f}
332
- Hit@5: {results['hit@5']:.2f}
333
- Recall@20: {results['recall@20']:.2f}
334
- MRR: {results['mrr']:.2f}
 
335
 
336
- Your submission has been saved and is pending review.
337
- Once approved, your results will appear in the leaderboard under the method name: {method_name}
338
- """
 
 
 
339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  except Exception as e:
341
- return f"Error processing submission: {str(e)}"
 
 
342
 
343
  def filter_by_model_type(df, selected_types):
344
  if not selected_types:
 
8
  import torch
9
  from tqdm import tqdm
10
  from concurrent.futures import ProcessPoolExecutor, as_completed
11
+ import smtplib
12
+ from email.mime.multipart import MIMEMultipart
13
+ from email.mime.text import MIMEText
14
 
15
  from stark_qa import load_qa
16
  from stark_qa.evaluator import Evaluator
 
286
  # Add new row
287
  df_to_update.loc[len(df_to_update)] = new_row
288
 
289
+ # Function to get emails from meta_data
290
+ def get_emails_from_metadata(meta_data):
291
+ """
292
+ Extracts emails from the meta_data dictionary.
293
+
294
+ Args:
295
+ meta_data (dict): The metadata dictionary that contains the 'Contact Email(s)' field.
296
+
297
+ Returns:
298
+ list: A list of email addresses.
299
+ """
300
+ return [email.strip() for email in meta_data.get("Contact Email(s)", "").split(";")]
301
+
302
+ # Function to format meta_data as an HTML table (without Prediction CSV)
303
+ def format_metadata_as_table(meta_data):
304
+ """
305
+ Formats metadata dictionary into an HTML table for the email.
306
+ Handles multiple contact emails separated by a semicolon.
307
+
308
+ Args:
309
+ meta_data (dict): Dictionary containing submission metadata.
310
+
311
+ Returns:
312
+ str: HTML string representing the metadata table.
313
+ """
314
+ table_rows = ""
315
+
316
+ for key, value in meta_data.items():
317
+ if key == "Contact Email(s)":
318
+ # Ensure that contact emails are split by semicolon
319
+ emails = value.split(';')
320
+ formatted_emails = "; ".join([email.strip() for email in emails])
321
+ table_rows += f"<tr><td><b>{key}</b></td><td>{formatted_emails}</td></tr>"
322
+ elif key != "Prediction CSV": # Exclude the Prediction CSV field
323
+ table_rows += f"<tr><td><b>{key}</b></td><td>{value}</td></tr>"
324
+
325
+ table_html = f"""
326
+ <table border="1" cellpadding="5" cellspacing="0">
327
+ {table_rows}
328
+ </table>
329
+ """
330
+ return table_html
331
+
332
+ # Function to get emails from meta_data
333
+ def get_emails_from_metadata(meta_data):
334
+ """
335
+ Extracts emails from the meta_data dictionary.
336
+
337
+ Args:
338
+ meta_data (dict): The metadata dictionary that contains the 'Contact Email(s)' field.
339
+
340
+ Returns:
341
+ list: A list of email addresses.
342
+ """
343
+ return [email.strip() for email in meta_data.get("Contact Email(s)", "").split(";")]
344
+
345
+ def send_error_notification(meta_data, error_info):
346
+ """
347
+ Sends an email notification about an error during the evaluation process.
348
+
349
+ Args:
350
+ meta_data (dict): Submission metadata to be included in the email.
351
+ error_info (str): Error message or notification content to be included in the email.
352
+
353
+ Returns:
354
+ None
355
+ """
356
+ emails_to_send = get_emails_from_metadata(meta_data)
357
+ send_from = 'stark-qa@cs.stanford.edu'
358
+ recipients_str = ', '.join(emails_to_send)
359
+
360
+ # Create the email container
361
+ msg = MIMEMultipart('alternative')
362
+ msg['Subject'] = 'STaRK Leaderboard Submission - Error Notification'
363
+ msg['From'] = send_from
364
+ msg['To'] = recipients_str
365
+
366
+ # Format the metadata table
367
+ metadata_table = format_metadata_as_table(meta_data)
368
+
369
+ # Email body content with metadata table
370
+ body = f"""
371
+ <p>Dear STaRK Leaderboard Participant,</p>
372
+
373
+ <p>We encountered an issue during the evaluation of your recent submission:</p>
374
+
375
+ <p><i>{error_info}</i></p>
376
+
377
+ <p>Please verify your inputs and resubmit. If the issue persists, feel free to contact us at stark-qa@cs.stanford.edu with the error details and your dataset information.</p>
378
+
379
+ <p>Submitted Metadata:</p>
380
+ {metadata_table}
381
+
382
+ <p>Thank you for your participation.</p>
383
+
384
+ <p>Best regards,<br>The STaRK QA Team</p>
385
+ """
386
+
387
+ msg.attach(MIMEText(body, 'html'))
388
+
389
+ # Send the email
390
+ try:
391
+ with smtplib.SMTP('localhost') as server:
392
+ server.sendmail(send_from, emails_to_send, msg.as_string()) # No CC for error notification
393
+ print("Error notification sent successfully.")
394
+ except Exception as e:
395
+ print(f"Failed to send error notification: {e}")
396
+
397
+ # Function to send a submission confirmation with evaluation results and metadata, CCing the sender
398
+ def send_submission_confirmation(meta_data, eval_results):
399
+ """
400
+ Sends an email notification confirming submission and including evaluation results and metadata,
401
+ with an option to CC the sender.
402
+
403
+ Args:
404
+ meta_data (dict): Submission metadata to be included in the email.
405
+ eval_results (dict): Dictionary of evaluation results to include in the email.
406
+
407
+ Returns:
408
+ None
409
+ """
410
+ emails_to_send = get_emails_from_metadata(meta_data)
411
+ send_from = 'stark-qa@cs.stanford.edu'
412
+ recipients_str = ', '.join(emails_to_send)
413
+
414
+ # Create the email container
415
+ msg = MIMEMultipart('alternative')
416
+ msg['Subject'] = 'STaRK Leaderboard Submission - Evaluation Results'
417
+ msg['From'] = send_from
418
+ msg['To'] = recipients_str
419
+ msg['Cc'] = send_from # CC the sender only for success notification
420
+
421
+ # Format the evaluation results and metadata table
422
+ formatted_results = format_evaluation_results(eval_results)
423
+ metadata_table = format_metadata_as_table(meta_data)
424
+
425
+ # Email body content with evaluation results and metadata table
426
+ body = f"""
427
+ <p>Dear STaRK Leaderboard Participant,</p>
428
+
429
+ <p>Thank you for your submission to the STaRK leaderboard. We are pleased to inform you that the evaluation has been completed. Below are the results of your submission:</p>
430
+
431
+ <pre>{formatted_results}</pre>
432
+
433
+ <p>Submitted Metadata:</p>
434
+ {metadata_table}
435
+
436
+ <p>Your submission will be reviewed. Once approved, the results will be updated on the leaderboard within the next 48 business hours. If there are problems in the metadata that you submitted, one of our team members will reach out to you.</p>
437
+
438
+ <p>If you would like to withdraw your submission, simply reply to this email with "withdrawn."</p>
439
+
440
+ <p>We appreciate your participation and look forward to sharing your results on our leaderboard.</p>
441
+
442
+ <p>Best regards,<br>The STaRK QA Team</p>
443
+ """
444
+
445
+ msg.attach(MIMEText(body, 'html'))
446
+
447
+ # Send the email
448
+ try:
449
+ with smtplib.SMTP('localhost') as server:
450
+ server.sendmail(send_from, emails_to_send + [send_from], msg.as_string()) # Include sender in recipients for CC
451
+ print("Submission confirmation sent successfully.")
452
+ except Exception as e:
453
+ print(f"Failed to send submission confirmation: {e}")
454
+
455
+
456
  def process_submission(
457
  method_name, team_name, dataset, split, contact_email,
458
  code_repo, csv_file, model_description, hardware, paper_link
459
  ):
460
  """Process and validate submission"""
461
  try:
462
+ # Input validation
463
+ if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file]):
464
+ return "Error: Please fill in all required fields"
 
 
 
 
 
 
465
 
466
+ # Length validation
467
+ if len(method_name) > 25:
468
+ return "Error: Method name must be 25 characters or less"
469
+ if len(team_name) > 25:
470
+ return "Error: Team name must be 25 characters or less"
471
+ if not validate_email(contact_email):
472
+ return "Error: Invalid email format"
473
+ if not validate_github_url(code_repo):
474
+ return "Error: Invalid GitHub repository URL"
475
 
476
+ # Prepare metadata for email
477
+ meta_data = {
478
+ "Method Name": method_name,
479
+ "Team Name": team_name,
480
+ "Dataset": dataset,
481
+ "Split": split,
482
+ "Contact Email(s)": contact_email,
483
+ "Code Repository": code_repo,
484
+ "Model Description": model_description,
485
+ "Hardware": hardware,
486
+ "(Optional) Paper link": paper_link
 
 
 
487
  }
488
 
489
+ # Save CSV file
490
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
491
+ model_name_clean = sanitize_name(method_name)
492
+ team_name_clean = sanitize_name(team_name)
 
493
 
494
+ # Create directory structure in the HuggingFace space
495
+ base_dir = "submissions" # This will be in the HF space root
496
+ submission_dir = os.path.join(base_dir, f"{model_name_clean}_{team_name_clean}")
497
+ os.makedirs(submission_dir, exist_ok=True)
498
 
499
+ # Save CSV file
500
+ csv_filename = f"predictions_{timestamp}.csv"
501
+ csv_path = os.path.join(submission_dir, csv_filename)
502
+ if hasattr(csv_file, 'name'):
503
+ with open(csv_file.name, 'rb') as source, open(csv_path, 'wb') as target:
504
+ target.write(source.read())
505
 
506
+ # Validate CSV file
507
+ csv_valid, csv_message = validate_csv(csv_file)
508
+ if not csv_valid:
509
+ error_message = f"Error with CSV file: {csv_message}"
510
+ send_error_notification(meta_data, error_message)
511
+ return error_message
512
 
513
+ # Process CSV file through evaluation pipeline
514
+ try:
515
+ results = compute_metrics(
516
+ csv_file.name,
517
+ dataset=dataset.lower(),
518
+ split=split,
519
+ num_workers=4
520
+ )
521
+
522
+ if isinstance(results, str) and results.startswith("Error"):
523
+ send_error_notification(meta_data, results)
524
+ return f"Evaluation error: {results}"
525
+
526
+ # Multiply results by 100 and round to 2 decimal places
527
+ processed_results = {
528
+ "hit@1": round(results['hit@1'] * 100, 2),
529
+ "hit@5": round(results['hit@5'] * 100, 2),
530
+ "recall@20": round(results['recall@20'] * 100, 2),
531
+ "mrr": round(results['mrr'] * 100, 2)
532
+ }
533
+
534
+ # Prepare submission data
535
+ submission_data = {
536
+ "method_name": method_name,
537
+ "team_name": team_name,
538
+ "dataset": dataset,
539
+ "split": split,
540
+ "contact_email": contact_email,
541
+ "code_repo": code_repo,
542
+ "model_description": model_description,
543
+ "hardware": hardware,
544
+ "paper_link": paper_link,
545
+ "results": processed_results,
546
+ "status": "pending_review",
547
+ "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
548
+ "csv_path": csv_path
549
+ }
550
+
551
+ # Save metadata
552
+ metadata_path = os.path.join(submission_dir, f"metadata_{timestamp}.json")
553
+ with open(metadata_path, 'w') as f:
554
+ json.dump(submission_data, f, indent=4)
555
+
556
+ # Save latest.json
557
+ latest_path = os.path.join(submission_dir, "latest.json")
558
+ with open(latest_path, 'w') as f:
559
+ json.dump({
560
+ "latest_submission": timestamp,
561
+ "status": "pending_review",
562
+ "method_name": method_name
563
+ }, f, indent=4)
564
+
565
+ # Send email confirmation
566
+ send_submission_confirmation(meta_data, processed_results)
567
+
568
+ # Update leaderboard data
569
+ update_leaderboard_data(submission_data)
570
+
571
+ return f"""
572
+ Submission successful!
573
+
574
+ Evaluation Results:
575
+ Hit@1: {processed_results['hit@1']:.2f}%
576
+ Hit@5: {processed_results['hit@5']:.2f}%
577
+ Recall@20: {processed_results['recall@20']:.2f}%
578
+ MRR: {processed_results['mrr']:.2f}%
579
+
580
+ Your submission has been saved and is pending review.
581
+ A confirmation email has been sent to {contact_email}.
582
+ Once approved, your results will appear in the leaderboard under the method name: {method_name}
583
+ """
584
+
585
+ except Exception as e:
586
+ error_message = f"Error processing submission: {str(e)}"
587
+ send_error_notification(meta_data, error_message)
588
+ return error_message
589
+
590
  except Exception as e:
591
+ error_message = f"Error processing submission: {str(e)}"
592
+ send_error_notification(meta_data, error_message)
593
+ return error_message
594
 
595
  def filter_by_model_type(df, selected_types):
596
  if not selected_types:
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  APScheduler
2
  black
3
  datasets
 
4
  gradio
5
  gradio[oauth]
6
  gradio_leaderboard==0.0.9
@@ -15,4 +16,5 @@ transformers
15
  torch
16
  tokenizers>=0.15.0
17
  sentencepiece
18
- stark_qa
 
 
1
  APScheduler
2
  black
3
  datasets
4
+ email
5
  gradio
6
  gradio[oauth]
7
  gradio_leaderboard==0.0.9
 
16
  torch
17
  tokenizers>=0.15.0
18
  sentencepiece
19
+ stark_qa
20
+ smtplib