Spaces:
Runtime error
Runtime error
| from typing import List | |
| # Common dictionary to map the columns names | |
| COLUMNS_PRETTY = { | |
| "bleu": "BLEU", | |
| "chrf": "ChrF", | |
| "rouge1": "ROUGE-1", | |
| "rouge2": "ROUGE-2", | |
| "rougeL": "ROUGE-L", | |
| "bertscore": "BERTScore", | |
| "bertscore_normalized": "BERTScore (Normalized)", | |
| "model_name": "Model Name", | |
| "model_availability": "Availability", | |
| "urls": "Resources", | |
| "context_size": "Context Size", | |
| "submitted_by": "Submitted By", | |
| "EM infile": "EM infile", | |
| "EM inproject": "EM inproject", | |
| "EM common": "EM common", | |
| "EM commited": "EM committed", | |
| "EM non_informative": "EM non-informative", | |
| "EM random": "EM random", | |
| "EM all": "EM all", | |
| "context_composer": "Context Composer", | |
| "context_length": "Context Size", | |
| "dataset": "Dataset", | |
| "CompScore": "CompScore", | |
| "context": "Context", | |
| "task_type": "Task type", | |
| } | |
| # Add your metrics | |
| METRICS_PER_TASK = { | |
| "aggregated": [ | |
| "Mean Rank", | |
| "Mean Score", | |
| "Library-based CG", | |
| "CI builds repair", | |
| "CMG", | |
| "Bug localization", | |
| "Module summarization", | |
| ], | |
| "commit_message_generation": [ | |
| "BLEU", | |
| "ChrF", | |
| "ROUGE-1", | |
| "ROUGE-2", | |
| "ROUGE-L", | |
| "BERTScore", | |
| "BERTScore (Normalized)", | |
| ], | |
| "project_code_completion": [ | |
| "EM infile", | |
| "EM inproject", | |
| "EM common", | |
| "EM committed", | |
| "EM non-informative", | |
| "EM random", | |
| "EM all", | |
| ], | |
| "bug_localization": [ | |
| "P", | |
| "R", | |
| "FPR", | |
| "F1-score", | |
| "All_correct", | |
| "All_incorrect", | |
| "Output_count", | |
| ], | |
| "module_summarization": [ | |
| "CompScore", | |
| ], | |
| "library_based_code_generation": [ | |
| "API Recall\nno context", | |
| "API Recall\n20 APIs", | |
| "API Recall\n200 APIs", | |
| "API Recall\n2,000 APIs", | |
| "API Recall\nall APIs", | |
| "ChrF\nno context", | |
| "ChrF\n20 APIs", | |
| "ChrF\n200 APIs", | |
| "ChrF\n2,000 APIs", | |
| "ChrF\nall APIs", | |
| ], | |
| "ci_builds_repair": [ | |
| "Pass@1", | |
| ], | |
| } | |
| SORT_COLUMN_PER_TASK = { | |
| "commit_message_generation": "ROUGE-1", | |
| "project_code_completion": "EM inproject", | |
| "bug_localization": "Model Name", | |
| "module_summarization": "CompScore", | |
| "library_based_code_generation": "API Recall\nall APIs", | |
| "ci_builds_repair": "Pass@1", | |
| } | |
| def get_columns_per_task(task_id: str) -> List[str]: | |
| metrics_per_task = METRICS_PER_TASK[task_id] | |
| if task_id == 'aggregated': | |
| return ["Model Name"] + metrics_per_task | |
| if task_id == 'project_code_completion': | |
| return ["Model Name", "Context Composer", "Context Size", "Dataset Name", "Dataset"] + metrics_per_task + ["Submitted By", "Resources"] | |
| if task_id == 'bug_localization': | |
| return ["Model Name", "Availability", "Context Size"] + metrics_per_task + ["Submitted By", "Resources"] | |
| if task_id == 'module_summarization': | |
| return ["Model Name", "Context Size"] + metrics_per_task + ["Submitted By", "Resources"] | |
| if task_id == 'library_based_code_generation': | |
| return ["Model Name"] + metrics_per_task + ["Availability", "Submitted By", "Resources"] | |
| if task_id == 'ci_builds_repair': | |
| return ["Model Name", "Context Size", "Task type"] + metrics_per_task + ["Availability", "Submitted By", "Resources"] | |
| return ["Model Name", "Context Size", "Availability"] + metrics_per_task + ["Submitted By", "Resources"] | |
| def get_types_per_task(task_id: str) -> List[str]: | |
| metrics_per_task = METRICS_PER_TASK.get(task_id, (0, 0, 0, 0, 0)) | |
| if task_id == 'project_code_completion': | |
| return ["html", "markdown", "markdown", "markdown", "html"] + ["number" for _ in metrics_per_task] + ["markdown", "html"] | |
| if task_id == 'bug_localization': | |
| return ["html", "markdown", "markdown"] + ["number" for _ in metrics_per_task] + ["markdown", "html"] | |
| if task_id == 'ci_builds_repair': | |
| return ["html", "markdown", "markdown"] + ["number" for _ in metrics_per_task] + ["markdown", "markdown", "html"] | |
| return ["html", "markdown", "markdown"] + ["number" for _ in metrics_per_task] + ["markdown", "html"] | |