File size: 1,327 Bytes
cd79658 6a1bcc1 cd79658 6a1bcc1 3adebf4 6a1bcc1 cd79658 6a1bcc1 cd79658 6a1bcc1 cd79658 6a1bcc1 cd79658 6a1bcc1 cd79658 6a1bcc1 cd79658 6a1bcc1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
from abc import ABC
from dataclasses import field
from typing import Any, Dict
from datasets import Dataset, Features, Sequence, Value
from .operator import StreamInstanceOperator
class Validator(ABC):
pass
class ValidateSchema(Validator, StreamInstanceOperator):
schema: Features = None
def verify(self):
assert isinstance(self.schema, Features), "Schema must be an instance of Features"
assert self.schema is not None, "Schema must be specified"
def verify_first_instance(self, instance):
for field in self.standart_fields:
assert field in instance, f'Field "{field}" is missing in the first instance'
def process(self, instance: Dict[str, Any], stream_name: str = None) -> Dict[str, Any]:
return instance
class StandardSchema(Features):
def __init__(self):
super().__init__(
{
"source": Value("string"),
"target": Value("string"),
"references": Sequence(Value("string")),
"metrics": Sequence(Value("string")),
"parser": Value("string"),
# 'group': Value('string'),
# 'guidance': Value('string'),
}
)
class ValidateStandartSchema:
schema: Features = field(default_factory=StandardSchema)
|