MotzWanted's picture
feat: fork biomed leaderboard
be62d39
raw
history blame
No virus
1.32 kB
import os
import torch
from dataclasses import dataclass
from enum import Enum
from src.envs import CACHE_PATH
@dataclass
class Task:
benchmark: str
# metric: str # yeah i don't think we need this.
col_name: str
num_fewshot: int
# how are these differentiated with Tasks in display/utils.py ?
class Tasks(Enum):
# task0 = Task("pubmedqa", "acc", "PubMedQA", 0) # 64, as in the ATLAS paper
# task1 = Task("hellaswag", "acc_norm", "HellaSwag", 0) # 64, as in the ATLAS paper
# task0 = Task("medqa", "acc_norm", "MedQA", 0) # medqa_4options?
# task0 = Task("medmcqa", "acc_norm", "MedMCQA", 0)
# task1 = Task("pubmedqa", "acc", "PubMedQA", 0)
task0 = Task("medmcqa", "MedMCQA", 0)
task1 = Task("pubmedqa", "PubMedQA", 0)
task2 = Task("pubmedqa_no_context", "PubMedQA_no_context", 0)
task3 = Task("biolama_umls", "BioLAMA-UMLS", 0)
num_fewshots = {
"medqa": 0,
"medmcqa": 0,
"pubmedqa": 0,
"pubmedqa_no_context":0,
"biolama_umls":0,
}
# NUM_FEWSHOT = 64 # Change with your few shot
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
DEVICE = "cuda" if torch.cuda.is_available() else 'mps'
LIMIT = None # Testing; needs to be None