Elron commited on
Commit
3da1d9d
1 Parent(s): 0e631d9

Upload schema.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. schema.py +4 -7
schema.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from dataclasses import field
2
  from typing import Any, Dict, List, Optional
3
 
@@ -13,9 +14,7 @@ UNITXT_DATASET_SCHEMA = Features(
13
  "metrics": Sequence(Value("string")),
14
  "group": Value("string"),
15
  "postprocessors": Sequence(Value("string")),
16
- "additional_inputs": Sequence(
17
- {"key": Value(dtype="string"), "value": Value("string")}
18
- ),
19
  }
20
  )
21
 
@@ -44,10 +43,8 @@ class ToUnitxtGroup(StreamInstanceOperatorValidator):
44
  def process(
45
  self, instance: Dict[str, Any], stream_name: Optional[str] = None
46
  ) -> Dict[str, Any]:
47
- additional_inputs = {**instance["inputs"], **instance["outputs"]}
48
- instance["additional_inputs"] = self._to_lists_of_keys_and_values(
49
- additional_inputs
50
- )
51
 
52
  if self.remove_unnecessary_fields:
53
  keys_to_delete = []
 
1
+ import json
2
  from dataclasses import field
3
  from typing import Any, Dict, List, Optional
4
 
 
14
  "metrics": Sequence(Value("string")),
15
  "group": Value("string"),
16
  "postprocessors": Sequence(Value("string")),
17
+ "task_data": Value(dtype="string"),
 
 
18
  }
19
  )
20
 
 
43
  def process(
44
  self, instance: Dict[str, Any], stream_name: Optional[str] = None
45
  ) -> Dict[str, Any]:
46
+ task_data = {**instance["inputs"], **instance["outputs"]}
47
+ instance["task_data"] = json.dumps(task_data)
 
 
48
 
49
  if self.remove_unnecessary_fields:
50
  keys_to_delete = []