Elron commited on
Commit
cd79658
1 Parent(s): 4af03fb

Upload validate.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. validate.py +24 -26
validate.py CHANGED
@@ -1,46 +1,44 @@
1
- from .text_utils import split_words
2
- from .stream import StreamInstanceOperator, InstanceOperatorWithGlobalAccess, Artifact
 
3
 
4
- from datasets import Value, Features, Dataset, Sequence
5
 
6
- from dataclasses import field
7
- from typing import Dict, Any
8
- from abc import ABC, abstractmethod
9
 
10
  class Validator(ABC):
11
  pass
12
 
 
13
  class ValidateSchema(Validator, StreamInstanceOperator):
14
-
15
  schema: Features = None
16
-
17
  def verify(self):
18
- assert isinstance(self.schema, Features), 'Schema must be an instance of Features'
19
- assert self.schema is not None, 'Schema must be specified'
20
-
21
  def verify_first_instance(self, instance):
22
  for field in self.standart_fields:
23
  assert field in instance, f'Field "{field}" is missing in the first instance'
24
-
25
  def process(self, instance: Dict[str, Any], stream_name: str = None) -> Dict[str, Any]:
26
  return instance
27
 
 
28
  class StandardSchema(Features):
29
-
30
  def __init__(self):
31
- super().__init__({
32
- 'source': Value('string'),
33
- 'target': Value('string'),
34
- 'references': Sequence(Value('string')),
35
- 'metrics': Sequence(Value('string')),
36
- 'parser': Value('string'),
37
- # 'group': Value('string'),
38
- # 'guidance': Value('string'),
39
- })
 
 
 
40
 
41
  class ValidateStandartSchema:
42
-
43
  schema: Features = field(default_factory=StandardSchema)
44
-
45
-
46
-
 
1
+ from abc import ABC
2
+ from dataclasses import field
3
+ from typing import Any, Dict
4
 
5
+ from datasets import Dataset, Features, Sequence, Value
6
 
7
+ from .operator import StreamInstanceOperator
 
 
8
 
9
  class Validator(ABC):
10
  pass
11
 
12
+
13
  class ValidateSchema(Validator, StreamInstanceOperator):
 
14
  schema: Features = None
15
+
16
  def verify(self):
17
+ assert isinstance(self.schema, Features), "Schema must be an instance of Features"
18
+ assert self.schema is not None, "Schema must be specified"
19
+
20
  def verify_first_instance(self, instance):
21
  for field in self.standart_fields:
22
  assert field in instance, f'Field "{field}" is missing in the first instance'
23
+
24
  def process(self, instance: Dict[str, Any], stream_name: str = None) -> Dict[str, Any]:
25
  return instance
26
 
27
+
28
  class StandardSchema(Features):
 
29
  def __init__(self):
30
+ super().__init__(
31
+ {
32
+ "source": Value("string"),
33
+ "target": Value("string"),
34
+ "references": Sequence(Value("string")),
35
+ "metrics": Sequence(Value("string")),
36
+ "parser": Value("string"),
37
+ # 'group': Value('string'),
38
+ # 'guidance': Value('string'),
39
+ }
40
+ )
41
+
42
 
43
  class ValidateStandartSchema:
 
44
  schema: Features = field(default_factory=StandardSchema)