File size: 1,327 Bytes
cd79658
 
 
6a1bcc1
cd79658
6a1bcc1
3adebf4
 
6a1bcc1
 
 
 
cd79658
6a1bcc1
 
cd79658
6a1bcc1
cd79658
 
 
6a1bcc1
 
 
cd79658
6a1bcc1
 
 
cd79658
6a1bcc1
 
cd79658
 
 
 
 
 
 
 
 
 
 
 
6a1bcc1
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from abc import ABC
from dataclasses import field
from typing import Any, Dict

from datasets import Dataset, Features, Sequence, Value

from .operator import StreamInstanceOperator


class Validator(ABC):
    pass


class ValidateSchema(Validator, StreamInstanceOperator):
    schema: Features = None

    def verify(self):
        assert isinstance(self.schema, Features), "Schema must be an instance of Features"
        assert self.schema is not None, "Schema must be specified"

    def verify_first_instance(self, instance):
        for field in self.standart_fields:
            assert field in instance, f'Field "{field}" is missing in the first instance'

    def process(self, instance: Dict[str, Any], stream_name: str = None) -> Dict[str, Any]:
        return instance


class StandardSchema(Features):
    def __init__(self):
        super().__init__(
            {
                "source": Value("string"),
                "target": Value("string"),
                "references": Sequence(Value("string")),
                "metrics": Sequence(Value("string")),
                "parser": Value("string"),
                # 'group': Value('string'),
                # 'guidance': Value('string'),
            }
        )


class ValidateStandartSchema:
    schema: Features = field(default_factory=StandardSchema)