File size: 1,418 Bytes
cd79658 c3a1efb 6a1bcc1 c3a1efb 6a1bcc1 3adebf4 6a1bcc1 cd79658 6a1bcc1 cd79658 6a1bcc1 c3a1efb cd79658 6a1bcc1 c3a1efb 6a1bcc1 cd79658 6a1bcc1 cd79658 6a1bcc1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
from abc import ABC
from dataclasses import field
from typing import Any, Dict, Optional
from datasets import Features, Sequence, Value
from .operator import StreamInstanceOperator
class Validator(ABC):
pass
class ValidateSchema(Validator, StreamInstanceOperator):
schema: Features = None
def verify(self):
assert isinstance(
self.schema, Features
), "Schema must be an instance of Features"
assert self.schema is not None, "Schema must be specified"
def verify_first_instance(self, instance):
for std_field in self.standart_fields:
assert (
std_field in instance
), f'Field "{std_field}" is missing in the first instance'
def process(
self, instance: Dict[str, Any], stream_name: Optional[str] = None
) -> Dict[str, Any]:
return instance
class StandardSchema(Features):
def __init__(self):
super().__init__(
{
"source": Value("string"),
"target": Value("string"),
"references": Sequence(Value("string")),
"metrics": Sequence(Value("string")),
"parser": Value("string"),
# 'group': Value('string'),
# 'guidance': Value('string'),
}
)
class ValidateStandartSchema:
schema: Features = field(default_factory=StandardSchema)
|