Spaces:
Running
Running
File size: 3,535 Bytes
e284167 566f3c9 e284167 566f3c9 e284167 566f3c9 e284167 566f3c9 e284167 566f3c9 e284167 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
"""Task abstract class for evaluation and results."""
import logging
from abc import ABC, abstractmethod
from enum import Enum
from importlib.metadata import version
from typing import Any, List, Literal, Optional
import datasets
from pydantic import BaseModel, model_validator
# HACK: if Modality is not defined, then import it from modality.py
try:
from ..modality import Modality
except Exception:
# if not, super hack to get the leaderboard working.
# SHOULD MATCH the code exactly in modality.py
# can we read the file and run that code?
from enum import Enum
class Modality(Enum):
"""Data modality, either DNA or protein sequence."""
PROTEIN = "protein"
DNA = "dna"
logging.basicConfig(level=logging.INFO)
TaskType = Literal[
"classification",
"pair_classification",
"clustering",
"eds",
"bigene_mining",
"retrieval",
]
class TaskMetric(BaseModel):
id: str
display_name: str
description: Optional[str] = None
value: float = 0.0
class LayerResult(BaseModel):
layer_number: int
layer_display_name: str
metrics: List[TaskMetric]
class DGEBModel(BaseModel):
hf_name: str
num_layers: int
num_params: int
embed_dim: int
class Dataset(BaseModel):
path: str
revision: str
def load(self) -> datasets.DatasetDict:
ds = datasets.load_dataset(self.path, revision=self.revision)
if not isinstance(ds, datasets.DatasetDict):
raise ValueError(
f"Dataset {self.path} is not a datasets.DatasetDict object."
)
return ds
class TaskMetadata(BaseModel):
id: str
display_name: str
description: str
modality: Modality
type: TaskType
# List of datasets used by the task.
# Each dataset is a dict of all arguments to pass to `datasets.load_dataset()`.
datasets: List[Dataset]
primary_metric_id: str
# tasks.py
class TaskResult(BaseModel):
dgeb_version: str
task: "TaskMetadata"
# TODO: Convert model to ModelMetadata
model: DGEBModel
results: List[LayerResult]
@model_validator(mode="after")
def check_valid_primary_metric(self):
for result in self.results:
if all(
metric.id != self.task.primary_metric_id for metric in result.metrics
):
raise ValueError(
f"Primary metric {self.task.primary_metric_id} not found in results.metrics"
)
return self
@staticmethod
def from_dict(
task_metadata: "TaskMetadata",
layer_results: LayerResult,
model_metadata: DGEBModel,
):
return TaskResult(
dgeb_version=version("dgeb"),
task=task_metadata,
model=model_metadata,
results=list(
LayerResult(
layer_number=int(layer),
layer_display_name=str(layer),
metrics=[
TaskMetric(id=metric, display_name=metric, value=value)
for metric, value in metrics.items()
],
)
for layer, metrics in layer_results["layers"].items()
),
)
# move to model.py?
class Task(ABC):
metadata: TaskMetadata
# using Any instead of "BioSeqTransformer" to avoid installing all deps in leaderboard
@abstractmethod
def run(self, model: Any, layers: Optional[List[int]] = None) -> TaskResult:
pass
|