File size: 1,418 Bytes
cd79658
 
c3a1efb
6a1bcc1
c3a1efb
6a1bcc1
3adebf4
 
6a1bcc1
 
 
 
cd79658
6a1bcc1
 
cd79658
6a1bcc1
c3a1efb
 
 
cd79658
 
6a1bcc1
c3a1efb
 
 
 
 
 
 
 
6a1bcc1
 
cd79658
6a1bcc1
 
cd79658
 
 
 
 
 
 
 
 
 
 
 
6a1bcc1
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from abc import ABC
from dataclasses import field
from typing import Any, Dict, Optional

from datasets import Features, Sequence, Value

from .operator import StreamInstanceOperator


class Validator(ABC):
    pass


class ValidateSchema(Validator, StreamInstanceOperator):
    schema: Features = None

    def verify(self):
        assert isinstance(
            self.schema, Features
        ), "Schema must be an instance of Features"
        assert self.schema is not None, "Schema must be specified"

    def verify_first_instance(self, instance):
        for std_field in self.standart_fields:
            assert (
                std_field in instance
            ), f'Field "{std_field}" is missing in the first instance'

    def process(
        self, instance: Dict[str, Any], stream_name: Optional[str] = None
    ) -> Dict[str, Any]:
        return instance


class StandardSchema(Features):
    def __init__(self):
        super().__init__(
            {
                "source": Value("string"),
                "target": Value("string"),
                "references": Sequence(Value("string")),
                "metrics": Sequence(Value("string")),
                "parser": Value("string"),
                # 'group': Value('string'),
                # 'guidance': Value('string'),
            }
        )


class ValidateStandartSchema:
    schema: Features = field(default_factory=StandardSchema)