Spaces:

unitxt
/

metric

Running

App Files Files Community

Elron commited on Aug 15, 2024

Commit

9d5b4c0

verified ·

1 Parent(s): 24278cc

Upload folder using huggingface_hub

Browse files

Files changed (21) hide show

artifact.py +1 -2
blocks.py +1 -1
collections_operators.py +6 -1
dataset.py +1 -0
error_utils.py +50 -0
generator_utils.py +2 -2
inference.py +44 -29
loaders.py +1 -1
metric.py +1 -0
metric_utils.py +1 -1
metrics.py +152 -44
operators.py +1 -2
schema.py +14 -11
splitters.py +56 -47
standard.py +114 -67
stream.py +1 -1
struct_data_operators.py +1 -1
task.py +27 -15
templates.py +76 -21
utils.py +5 -0
version.py +1 -1

artifact.py CHANGED Viewed

@@ -5,7 +5,6 @@ import os
 import pkgutil
 import re
 from abc import abstractmethod
-from copy import deepcopy
 from typing import Any, Dict, List, Optional, Tuple, Union, final
 from .dataclass import (
@@ -23,7 +22,7 @@ from .parsing_utils import (
 from .settings_utils import get_constants, get_settings
 from .text_utils import camel_to_snake_case, is_camel_case
 from .type_utils import issubtype
-from .utils import artifacts_json_cache, json_dump, save_to_file
 logger = get_logger()
 settings = get_settings()

 import pkgutil
 import re
 from abc import abstractmethod
 from typing import Any, Dict, List, Optional, Tuple, Union, final
 from .dataclass import (
 from .settings_utils import get_constants, get_settings
 from .text_utils import camel_to_snake_case, is_camel_case
 from .type_utils import issubtype
+from .utils import artifacts_json_cache, deepcopy, json_dump, save_to_file
 logger = get_logger()
 settings = get_settings()

blocks.py CHANGED Viewed

@@ -18,7 +18,7 @@ from .operators import (
 )
 from .processors import ToString, ToStringStripped
 from .recipe import SequentialRecipe
-from .splitters import RandomSampler, SliceSplit, SplitRandomMix, SpreadSplit
 from .stream import MultiStream
 from .struct_data_operators import (
     ListToKeyValPairs,

 )
 from .processors import ToString, ToStringStripped
 from .recipe import SequentialRecipe
+from .splitters import RandomSampler, Sample, SliceSplit, SplitRandomMix
 from .stream import MultiStream
 from .struct_data_operators import (
     ListToKeyValPairs,

collections_operators.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from copy import deepcopy
 from typing import Any, Generator, List, Optional
 from .operators import FieldOperator, StreamOperator
 from .stream import Stream
 class Dictify(FieldOperator):
@@ -100,3 +100,8 @@ class DuplicateBySubLists(StreamOperator):
                         to_field: elements[:i],
                     }
                 yield instance_copy

 from typing import Any, Generator, List, Optional
 from .operators import FieldOperator, StreamOperator
 from .stream import Stream
+from .utils import deepcopy
 class Dictify(FieldOperator):
                         to_field: elements[:i],
                     }
                 yield instance_copy
+class GetLength(FieldOperator):
+    def process_value(self, collection: Any) -> Any:
+        return len(collection)

dataset.py CHANGED Viewed

@@ -15,6 +15,7 @@ from .dataset_utils import get_dataset_artifact
 from .deprecation_utils import __file__ as _
 from .dialog_operators import __file__ as _
 from .dict_utils import __file__ as _
 from .eval_utils import __file__ as _
 from .file_utils import __file__ as _
 from .formats import __file__ as _

 from .deprecation_utils import __file__ as _
 from .dialog_operators import __file__ as _
 from .dict_utils import __file__ as _
+from .error_utils import __file__ as _
 from .eval_utils import __file__ as _
 from .file_utils import __file__ as _
 from .formats import __file__ as _

error_utils.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from typing import Optional
+from .logging_utils import get_logger
+logger = get_logger()
+class Documentation:
+    URL = "https://www.unitxt.ai/en/latest/"
+    HUGGINGFACE_METRICS = "docs/adding_metric.html#adding-a-hugginface-metric"
+    ADDING_TASK = "docs/adding_task.html"
+    ADDING_TEMPLATE = "docs/adding_template.html"
+    MULTIPLE_METRICS_OUTPUTS = (
+        "docs/adding_metric.html#metric-outputs-with-multiple-metrics"
+    )
+def additional_info(path: str) -> str:
+    return f"\nFor more information: see {Documentation.URL}/{path} \n"
+class UnitxtError(Exception):
+    """Exception raised for Unitxt errors.
+    Attributes:
+        message : str -- explanation of the error
+        additional_info_id : Optional[str] -- relative path to additional documentation on web
+        If set, should be one of the DOCUMENATION_* constants in the error_utils.py file.
+    """
+    def __init__(self, message: str, additional_info_id: Optional[str] = None):
+        if additional_info_id is not None:
+            message += additional_info(additional_info_id)
+        super().__init__(message)
+class UnitxtWarning:
+    """Object to format warning message to log.
+    Attributes:
+        message -- explanation of the warning
+        additional_info_id : Optional[str] -- relative path to additional documentation on web
+        If set, should be one of the DOCUMENATION_* constants in the error_utils.py file.
+    """
+    def __init__(self, message: str, additional_info_id: Optional[str] = None):
+        if additional_info_id is not None:
+            message += additional_info(additional_info_id)
+        logger.warning(message)

generator_utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
-import copy
 from typing import Any, Dict, List
 from .dataclass import Dataclass, OptionalField
 class ReusableGenerator(Dataclass):
@@ -22,7 +22,7 @@ class ReusableGenerator(Dataclass):
 class CopyingReusableGenerator(ReusableGenerator):
     def __iter__(self):
         for instance in self.activate():
-            yield copy.deepcopy(instance)
 # if __name__ == "__main__":

 from typing import Any, Dict, List
 from .dataclass import Dataclass, OptionalField
+from .utils import deepcopy
 class ReusableGenerator(Dataclass):
 class CopyingReusableGenerator(ReusableGenerator):
     def __iter__(self):
         for instance in self.activate():
+            yield deepcopy(instance)
 # if __name__ == "__main__":

inference.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
 from tqdm import tqdm
 from .artifact import Artifact
 from .deprecation_utils import deprecation
 from .logging_utils import get_logger
 from .operator import PackageRequirementsMixin
@@ -376,13 +377,11 @@ class WMLInferenceEngine(
     """Runs inference using ibm-watsonx-ai.
     Attributes:
-        client: By default, it is created by a class instance but can be directly
-            provided instead as an instance of 'ibm_watsonx_ai.client.APIClient'.
-        credentials: By default, it is created by a class instance which tries to retrieve
-            proper environment variables ("WML_URL", "WML_PROJECT_ID", "WML_APIKEY").
-            However, either a dictionary with the following keys: "url", "apikey",
-            "project_id", or an instance of 'ibm_watsonx_ai.credentials.Credentials'
-            can be directly provided instead.
         model_name (str, optional): ID of a model to be used for inference. Mutually
             exclusive with 'deployment_id'.
         deployment_id (str, optional): Deployment ID of a tuned model to be used for
@@ -412,8 +411,7 @@ class WMLInferenceEngine(
         results = wml_inference.infer(dataset["test"])
     """
-    client: Any = None
-    credentials: Any = None
     model_name: Optional[str] = None
     deployment_id: Optional[str] = None
     label: str = "wml"
@@ -422,11 +420,40 @@ class WMLInferenceEngine(
         "It is advised to have Python version >=3.10 installed, as at lower version this package "
         "may cause conflicts with other installed packages."
     }
-    data_classification_policy = ["proprietary"]
     parameters: Optional[WMLInferenceEngineParams] = None
     @staticmethod
-    def _read_wml_credentials_from_env() -> Dict[str, str]:
         credentials = {}
         for env_var_name in ["WML_URL", "WML_PROJECT_ID", "WML_APIKEY"]:
             env_var = os.environ.get(env_var_name)
@@ -453,32 +480,20 @@ class WMLInferenceEngine(
         return client
     def prepare(self):
-        if self.client is None:
-            self.client = self._initialize_wml_client()
         self._set_inference_parameters()
-    def verify(self):
-        assert (
-            self.model_name
-            or self.deployment_id
-            and not (self.model_name and self.deployment_id)
-        ), "Either 'model_name' or 'deployment_id' must be specified, but not both at the same time."
-        super().verify()
     def _infer(self, dataset):
         from ibm_watsonx_ai.foundation_models import ModelInference
         model = ModelInference(
             model_id=self.model_name,
             deployment_id=self.deployment_id,
-            api_client=self.client,
         )
-        return [
-            model.generate_text(
-                prompt=instance["source"],
-                params=self.to_dict([WMLInferenceEngineParamsMixin], keep_empty=False),
-            )
-            for instance in dataset
-        ]

 from tqdm import tqdm
 from .artifact import Artifact
+from .dataclass import InternalField
 from .deprecation_utils import deprecation
 from .logging_utils import get_logger
 from .operator import PackageRequirementsMixin
     """Runs inference using ibm-watsonx-ai.
     Attributes:
+        credentials (Dict[str, str], optional): By default, it is created by a class
+            instance which tries to retrieve proper environment variables
+            ("WML_URL", "WML_PROJECT_ID", "WML_APIKEY"). However, a dictionary with
+            the following keys: "url", "apikey", "project_id" can be directly provided
+            instead.
         model_name (str, optional): ID of a model to be used for inference. Mutually
             exclusive with 'deployment_id'.
         deployment_id (str, optional): Deployment ID of a tuned model to be used for
         results = wml_inference.infer(dataset["test"])
     """
+    credentials: Optional[Dict[Literal["url", "apikey", "project_id"], str]] = None
     model_name: Optional[str] = None
     deployment_id: Optional[str] = None
     label: str = "wml"
         "It is advised to have Python version >=3.10 installed, as at lower version this package "
         "may cause conflicts with other installed packages."
     }
+    data_classification_policy = ["public", "proprietary"]
     parameters: Optional[WMLInferenceEngineParams] = None
+    _client: Any = InternalField(default=None, name="WML client")
+    def verify(self):
+        super().verify()
+        if self.credentials is not None:
+            for key in self.credentials:
+                if key not in ["url", "apikey", "project_id"]:
+                    raise ValueError(
+                        f'Illegal credential key: {key}, use only ["url", "apikey", "project_id"]'
+                    )
+        assert (
+            self.model_name
+            or self.deployment_id
+            and not (self.model_name and self.deployment_id)
+        ), "Either 'model_name' or 'deployment_id' must be specified, but not both at the same time."
+    def process_data_before_dump(self, data):
+        if "credentials" in data:
+            for key, value in data["credentials"].items():
+                if key != "url":
+                    data["credentials"][key] = "<hidden>"
+                else:
+                    data["credentials"][key] = value
+        return data
     @staticmethod
+    def _read_wml_credentials_from_env() -> (
+        Dict[Literal["url", "apikey", "project_id"], str]
+    ):
         credentials = {}
         for env_var_name in ["WML_URL", "WML_PROJECT_ID", "WML_APIKEY"]:
             env_var = os.environ.get(env_var_name)
         return client
     def prepare(self):
+        self._client = self._initialize_wml_client()
         self._set_inference_parameters()
     def _infer(self, dataset):
         from ibm_watsonx_ai.foundation_models import ModelInference
         model = ModelInference(
             model_id=self.model_name,
             deployment_id=self.deployment_id,
+            api_client=self._client,
         )
+        return model.generate_text(
+            prompt=dataset["source"],
+            params=self.to_dict([WMLInferenceEngineParamsMixin], keep_empty=False),
+        )

loaders.py CHANGED Viewed

@@ -36,7 +36,6 @@ import itertools
 import os
 import tempfile
 from abc import abstractmethod
-from copy import deepcopy
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import Any, Dict, List, Mapping, Optional, Sequence, Union
@@ -54,6 +53,7 @@ from .operators import Set
 from .settings_utils import get_settings
 from .stream import DynamicStream, MultiStream
 from .type_utils import isoftype
 logger = get_logger()
 settings = get_settings()

 import os
 import tempfile
 from abc import abstractmethod
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import Any, Dict, List, Mapping, Optional, Sequence, Union
 from .settings_utils import get_settings
 from .stream import DynamicStream, MultiStream
 from .type_utils import isoftype
+from .utils import deepcopy
 logger = get_logger()
 settings = get_settings()

metric.py CHANGED Viewed

@@ -14,6 +14,7 @@ from .dataset_utils import __file__ as _
 from .deprecation_utils import __file__ as _
 from .dialog_operators import __file__ as _
 from .dict_utils import __file__ as _
 from .eval_utils import __file__ as _
 from .file_utils import __file__ as _
 from .formats import __file__ as _

 from .deprecation_utils import __file__ as _
 from .dialog_operators import __file__ as _
 from .dict_utils import __file__ as _
+from .error_utils import __file__ as _
 from .eval_utils import __file__ as _
 from .file_utils import __file__ as _
 from .formats import __file__ as _

metric_utils.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import json
-from copy import deepcopy
 from typing import Any, Dict, Generator, Iterable, List, Optional
 from datasets import Features, Value
@@ -27,6 +26,7 @@ from .schema import UNITXT_DATASET_SCHEMA
 from .settings_utils import get_settings
 from .stream import DynamicStream, MultiStream
 from .struct_data_operators import LoadJson
 class MultiStreamScoreMean(MultiStreamOperator):

 import json
 from typing import Any, Dict, Generator, Iterable, List, Optional
 from datasets import Features, Value
 from .settings_utils import get_settings
 from .stream import DynamicStream, MultiStream
 from .struct_data_operators import LoadJson
+from .utils import deepcopy
 class MultiStreamScoreMean(MultiStreamOperator):

metrics.py CHANGED Viewed

@@ -1,15 +1,14 @@
 import ast
 import json
 import re
 import string
 import uuid
 import warnings
 from abc import ABC, abstractmethod
 from collections import Counter, defaultdict
-from copy import deepcopy
 from dataclasses import field
 from operator import itemgetter
-from statistics import mean
 from typing import Any, Dict, Generator, List, Optional, Tuple, Union
 import evaluate
@@ -22,11 +21,13 @@ from scipy.stats._warnings_errors import DegenerateDataWarning
 from .artifact import Artifact, fetch_artifact
 from .dataclass import (
     AbstractField,
     InternalField,
     NonPositionalField,
     OptionalField,
 )
 from .deprecation_utils import deprecation
 from .inference import HFPipelineBasedInferenceEngine, InferenceEngine
 from .logging_utils import get_logger
 from .metric_utils import InstanceInput, MetricRequest, MetricResponse
@@ -42,6 +43,7 @@ from .random_utils import get_seed
 from .settings_utils import get_settings
 from .stream import MultiStream, Stream
 from .type_utils import Type, isoftype, parse_type_string, to_type_string
 logger = get_logger()
 settings = get_settings()
@@ -141,13 +143,25 @@ class Metric(Artifact):
             else score_name
         )
-    def _add_score_prefixes_to_score_dict(self, scores: Dict[str, Any]):
         new_scores = {}
         for score_name, score in scores.items():
             score_with_prefix = self._add_score_prefix(score_name)
             new_scores[score_with_prefix] = (
                 score if score_name not in ["score_name"] else self.score_prefix + score
             )
         return new_scores
     def _validate_references_and_prediction(self, references, predictions):
@@ -238,12 +252,14 @@ class Metric(Artifact):
     def disable_confidence_interval_calculation(self):
         pass
-    # update instance["score"]["global"] with the newly computed global score, global_score, for the
-    # current metric computed.  global_score contains "score" and "score_name" fields that reflect
-    # (the main_score of) the current metric.
     # A simple python-dictionary-update adds new fields to instance["score"]["global"], and also replaces the values
-    # of its fields "score" and "score_name", to reflect the current metric, overwriting previous metrics' settings
-    # of these fields (if any previous metric exists).
     # When global_score does NOT contain ci score (because CI was not computed for the current metric), but
     # one of the previous metrics computed did have, the last of such previous metrics set the values in
     # fields "score_ci_low" and "score_ci_high" in instance["score"]["global"] to reflect its
@@ -254,15 +270,25 @@ class Metric(Artifact):
     # therefore, not consistent with "score_name".
     # In such a case, following the python-dictionary-update, we pop out fields "score_ci_low" and
     # "score_ci_high" from instance["score"]["global"], so that now all the fields "score.." in
-    # instance["score"]["global"] are consistent with the current metric: The current metric
-    # is named instance["score"]["global"]["score_name"], its score shows in
     # field instance["score"]["global"]["score"], and it does not have ci_scores,
     # which is also reflected in the absence of fields "score_ci_low" and "score_ci_high" from instance["score"]["global"].
     # If ci IS computed for the current metric, global_score contains "score_ci_low" and "score_ci_high", and these overwrite
-    # the ones existing in instance["score"]["global"] by a simple python-dictionary-update, and no need for any further fixeup.
     def update_and_adjust_global_score(
         self, instance: Dict[str, Any], global_score: dict
     ):
         instance["score"]["global"].update(global_score)
         for score_ci in ["score_ci_low", "score_ci_high"]:
             if score_ci in global_score:
@@ -559,12 +585,18 @@ class GlobalMetric(StreamOperator, MetricWithConfidenceInterval):
                     instance_score[self.main_score] = no_score_value
             instance["score"]["instance"].update(
-                self._add_score_prefixes_to_score_dict(instance_score)
             )
         self._validate_references_and_prediction(references, predictions)
         result = self._compute(references, predictions, task_data)
-        global_score.update(self._add_score_prefixes_to_score_dict(result))
         score_name = global_score["score_name"]
         confidence_interval = self.compute_global_confidence_intervals(
             references, predictions, task_data, score_name
@@ -657,7 +689,9 @@ class BulkInstanceMetric(StreamOperator, MetricWithConfidenceInterval):
                 instance["score"] = {"global": {}, "instance": {}}
             instance["score"]["instance"].update(
-                self._add_score_prefixes_to_score_dict(score)
             )
             instances.append(instance)
@@ -669,7 +703,7 @@ class BulkInstanceMetric(StreamOperator, MetricWithConfidenceInterval):
             if reduction == "mean":
                 for field_name in fields:
                     field_name_with_prefix = self._add_score_prefix(field_name)
-                    global_score[field_name_with_prefix] = mean(
                         [
                             instance["score"]["instance"][field_name_with_prefix]
                             for instance in instances
@@ -1140,7 +1174,9 @@ class InstanceMetric(StreamOperator, MetricWithConfidenceInterval):
                 instance["score"] = {"global": {}, "instance": {}}
             instance["score"]["instance"].update(
-                self._add_score_prefixes_to_score_dict(instance_score)
             )
             instances.append(instance)
@@ -1326,7 +1362,6 @@ class StringContainment(InstanceMetric):
     ci_scores = ["string_containment"]
     prediction_type = Any  # string representation is compared
-    single_reference_per_prediction = False  # multiple references allowed
     def compute(
         self, references: List[Any], prediction: Any, task_data: List[Dict]
@@ -1341,11 +1376,59 @@ class StringContainment(InstanceMetric):
         return result
 class MetricPipeline(MultiStreamOperator, Metric):
     main_score: str = None
     preprocess_steps: Optional[List[StreamingOperator]] = field(default_factory=list)
-    postpreprocess_steps: Optional[List[StreamingOperator]] = field(
-        default_factory=list
     )
     metric: Metric = None
@@ -1366,6 +1449,23 @@ class MetricPipeline(MultiStreamOperator, Metric):
     def prepare(self):
         super().prepare()
         self.prepare_score = Copy(
             field_to_field=[
                 [
@@ -1383,7 +1483,7 @@ class MetricPipeline(MultiStreamOperator, Metric):
         for step in self.preprocess_steps:
             multi_stream = step(multi_stream)
         multi_stream = self.metric(multi_stream)
-        for step in self.postpreprocess_steps:
             multi_stream = step(multi_stream)
         return self.prepare_score(multi_stream)
@@ -1409,6 +1509,13 @@ class HuggingfaceMetric(GlobalMetric):
     experiment_id: str = OptionalField(default_factory=lambda: str(uuid.uuid4()))
     def verify(self):
         assert (
             self.hf_additional_input_fields is None
             or isoftype(self.hf_additional_input_fields, List[str])
@@ -1654,7 +1761,7 @@ class F1(GlobalMetric):
             average=self.average,
         )
         if isinstance(result[self.metric], numpy.ndarray):
-            final_result = {self.main_score: mean(result[self.metric])}
             for i, label in enumerate(labels):
                 final_result[f"{self.metric}_" + self.id_to_str[label]] = result[
                     self.metric
@@ -1959,7 +2066,7 @@ class F1MultiLabel(GlobalMetric):
             assert (
                 len(result[self.metric]) == len(labels)
             ), f"F1 result ({result[self.metric]}) has more entries than labels ({labels})"
-            final_result = {self.main_score: mean(result[self.metric])}
             for i, label in enumerate(labels):
                 final_result[self.metric + "_" + label] = result[self.metric][i]
         else:
@@ -2001,7 +2108,17 @@ class F1MacroMultiLabel(F1MultiLabel):
     average = None
-class Rouge(InstanceMetric):
     main_score = "rougeL"
     prediction_type = str
     single_reference_per_prediction = False  # multiple references allowed
@@ -2014,21 +2131,17 @@ class Rouge(InstanceMetric):
     def prepare(self):
         super().prepare()
-        import nltk
         from rouge_score import rouge_scorer
         self.rouge_scorer = rouge_scorer
-        nltk.download("punkt", quiet=True)
-        self.sent_tokenize = nltk.sent_tokenize
     def compute(self, references: List[Any], prediction: Any, task_data: Dict) -> dict:
         # for a single instance, prediction is of type str, and references: list of str
         if self.sent_split_newline:
-            prediction = "\n".join(self.sent_tokenize(prediction.strip()))
             references = [
-                "\n".join(self.sent_tokenize(reference.strip()))
                 for reference in references
             ]
@@ -2044,7 +2157,7 @@ class Rouge(InstanceMetric):
         return score
-class RougeHF(HuggingfaceInstanceMetric):
     hf_metric_name = "rouge"
     main_score = "rougeL"
     scale = 1.0
@@ -2070,18 +2183,13 @@ class RougeHF(HuggingfaceInstanceMetric):
             {"use_aggregator": False, "rouge_types": self.rouge_types}
         )
-        import nltk
-        nltk.download("punkt", quiet=True)
-        self.sent_tokenize = nltk.sent_tokenize
     def compute(self, references, prediction, task_data: List[Dict]):
         # for a single instance, prediction is of type str, and references: list of str
         if self.sent_split_newline:
-            prediction = "\n".join(self.sent_tokenize(prediction.strip()))
             references = [
-                "\n".join(self.sent_tokenize(reference.strip()))
                 for reference in references
             ]
@@ -3360,7 +3468,7 @@ class NDCG(GlobalMetric):
                     for pred in q_predictions
                 ]
             scores.append(self.eval([q_references], [q_predictions]))
-        return {self.main_score: mean(scores) if len(scores) > 0 else np.nan}
 class RetrievalMetric(InstanceMetric):
@@ -3695,8 +3803,8 @@ def performance_drop_rate(
     if any(len(scores) == 0 for scores in group_scores_list):
         # no comparison can be made since there is not at least one score per type
         return np.nan
-    control_mean = mean(group_scores_list[0])
-    comparison_mean = mean(group_scores_list[1])
     if control_mean == 0:
         # return 0 if comparison is also 0
         if comparison_mean == 0:
@@ -3809,8 +3917,8 @@ def normalized_cohens_h(
         # no comparison can be made since there is not at least one score per type
         h, norm_h = np.nan, np.nan
     else:
-        control_mean = mean(group_scores_list[0])
-        comparison_mean = mean(group_scores_list[1])
         h = 2 * (np.arcsin(np.sqrt(comparison_mean)) - np.arcsin(np.sqrt(control_mean)))
         norm_h = np.clip(a=h / np.pi, a_min=-1, a_max=1)
@@ -3863,7 +3971,7 @@ def normalized_hedges_g(
         g, norm_g = np.nan, np.nan
     else:
         # otherwise, calculate the variances
-        group_mean = [mean(scores) for scores in group_scores_list]
         # sample variance with 1 degree of freedom (denominator n-1); if n=1, return 0 since otherwise throws an error
         group_var = [
             0.0 if nn == 1 else np.var(scores, ddof=1)
@@ -3922,7 +4030,7 @@ def mean_subgroup_score(
     if len(score_list) == 0:
         # no scores to use
         return np.nan
-    return mean(score_list)
 # metrics using mean reduction

 import ast
 import json
+import os
 import re
 import string
 import uuid
 import warnings
 from abc import ABC, abstractmethod
 from collections import Counter, defaultdict
 from dataclasses import field
 from operator import itemgetter
 from typing import Any, Dict, Generator, List, Optional, Tuple, Union
 import evaluate
 from .artifact import Artifact, fetch_artifact
 from .dataclass import (
     AbstractField,
+    DeprecatedField,
     InternalField,
     NonPositionalField,
     OptionalField,
 )
 from .deprecation_utils import deprecation
+from .error_utils import Documentation, UnitxtWarning
 from .inference import HFPipelineBasedInferenceEngine, InferenceEngine
 from .logging_utils import get_logger
 from .metric_utils import InstanceInput, MetricRequest, MetricResponse
 from .settings_utils import get_settings
 from .stream import MultiStream, Stream
 from .type_utils import Type, isoftype, parse_type_string, to_type_string
+from .utils import deepcopy
 logger = get_logger()
 settings = get_settings()
             else score_name
         )
+    def _add_score_prefixes_to_score_dict_and_check_against_existing_scores(
+        self, scores: Dict[str, Any], existing_scores: Dict[str, Any]
+    ) -> Dict[str, Any]:
         new_scores = {}
         for score_name, score in scores.items():
             score_with_prefix = self._add_score_prefix(score_name)
             new_scores[score_with_prefix] = (
                 score if score_name not in ["score_name"] else self.score_prefix + score
             )
+        for new_score_name in new_scores:
+            if new_score_name in ["score", "score_name"]:
+                continue
+            if new_score_name in existing_scores:
+                UnitxtWarning(
+                    message=f"Metric '{new_score_name}' that has just been evaluated to {new_scores[new_score_name]}, is already recorded "
+                    f"to have value {existing_scores[new_score_name]} by a previous metric evaluation on this instance or stream. "
+                    f"To avoid overwriting the existing value, add a score_prefix to the metric (e.g. score_prefix='my_second_').",
+                    additional_info_id=Documentation.MULTIPLE_METRICS_OUTPUTS,
+                )
         return new_scores
     def _validate_references_and_prediction(self, references, predictions):
     def disable_confidence_interval_calculation(self):
         pass
+    # update instance["score"]["global"] with the global_score just computed for the
+    # current metric.  global_score contains "score" and "score_name" fields that reflect
+    # (the main_score of) the current metric. If CI was computed for global_score, then global_score
+    # also contains "score_ci_low" and "score_ci_high" that reflect (the main_score of) the current metric.
     # A simple python-dictionary-update adds new fields to instance["score"]["global"], and also replaces the values
+    # of its fields "score" and "score_name" (and "score_ci_low", "score_ci_high" if applicable),
+    # to reflect the current metric, overwriting previous metrics' settings of these fields
+    # (if any previous metric exists).
     # When global_score does NOT contain ci score (because CI was not computed for the current metric), but
     # one of the previous metrics computed did have, the last of such previous metrics set the values in
     # fields "score_ci_low" and "score_ci_high" in instance["score"]["global"] to reflect its
     # therefore, not consistent with "score_name".
     # In such a case, following the python-dictionary-update, we pop out fields "score_ci_low" and
     # "score_ci_high" from instance["score"]["global"], so that now all the fields "score.." in
+    # instance["score"]["global"] are consistent with the current metric: The metric that is named
+    # instance["score"]["global"]["score_name"], its score shows in
     # field instance["score"]["global"]["score"], and it does not have ci_scores,
     # which is also reflected in the absence of fields "score_ci_low" and "score_ci_high" from instance["score"]["global"].
     # If ci IS computed for the current metric, global_score contains "score_ci_low" and "score_ci_high", and these overwrite
+    # the ones existing in instance["score"]["global"] by the simple python-dictionary-update, and no need for any further fixeup.
     def update_and_adjust_global_score(
         self, instance: Dict[str, Any], global_score: dict
     ):
+        for score_name in global_score:
+            if score_name in ["score", "score_name", "score_ci_low", "score_ci_high"]:
+                continue
+            if score_name in instance["score"]["global"]:
+                UnitxtWarning(
+                    message=f"Global metric '{score_name}' that has just been evaluated to {global_score[score_name]}, is already recorded "
+                    f"to have value {instance['score']['global'][score_name]} by a previous metric evaluation on this stream. "
+                    f"To avoid overwriting the value, add a score_prefix to the metric (e.g. score_prefix='my_{score_name}'.",
+                    additional_info_id=Documentation.MULTIPLE_METRICS_OUTPUTS,
+                )
         instance["score"]["global"].update(global_score)
         for score_ci in ["score_ci_low", "score_ci_high"]:
             if score_ci in global_score:
                     instance_score[self.main_score] = no_score_value
             instance["score"]["instance"].update(
+                self._add_score_prefixes_to_score_dict_and_check_against_existing_scores(
+                    instance_score, instance["score"]["instance"]
+                )
             )
         self._validate_references_and_prediction(references, predictions)
         result = self._compute(references, predictions, task_data)
+        global_score.update(
+            self._add_score_prefixes_to_score_dict_and_check_against_existing_scores(
+                result, global_score
+            )
+        )
         score_name = global_score["score_name"]
         confidence_interval = self.compute_global_confidence_intervals(
             references, predictions, task_data, score_name
                 instance["score"] = {"global": {}, "instance": {}}
             instance["score"]["instance"].update(
+                self._add_score_prefixes_to_score_dict_and_check_against_existing_scores(
+                    score, instance["score"]["instance"]
+                )
             )
             instances.append(instance)
             if reduction == "mean":
                 for field_name in fields:
                     field_name_with_prefix = self._add_score_prefix(field_name)
+                    global_score[field_name_with_prefix] = nan_mean(
                         [
                             instance["score"]["instance"][field_name_with_prefix]
                             for instance in instances
                 instance["score"] = {"global": {}, "instance": {}}
             instance["score"]["instance"].update(
+                self._add_score_prefixes_to_score_dict_and_check_against_existing_scores(
+                    instance_score, instance["score"]["instance"]
+                )
             )
             instances.append(instance)
     ci_scores = ["string_containment"]
     prediction_type = Any  # string representation is compared
     def compute(
         self, references: List[Any], prediction: Any, task_data: List[Dict]
         return result
+class StringContainmentRatio(InstanceMetric):
+    """Metric that returns the ratio of values from a specific field contained in the prediction.
+    Attributes:
+        field: The field from the task_data that contains the values to be checked for containment.
+               Example task:
+                    Task(
+                        input_fields={"question": str},
+                        reference_fields={"entities": str},
+                        prediction_type=str,
+                        metrics=["string_containment_ratio[field=entities]"],
+                    )
+    """
+    reduction_map = {"mean": ["string_containment"]}
+    main_score = "string_containment"
+    ci_scores = ["string_containment"]
+    field: str = None
+    prediction_type = Any  # string representation is compared
+    def compute(
+        self, references: List[Any], prediction: Any, task_data: List[Dict]
+    ) -> dict:
+        if self.field not in task_data:
+            raise ValueError(
+                f"'{self.field}' field required by {__class__.__name__} is not in passed in task_data: {task_data}"
+            )
+        contain_results = [
+            str(value) in str(prediction) for value in task_data[self.field]
+        ]
+        score = sum(contain_results) / len(contain_results)
+        result = {self.main_score: score}
+        result["score"] = result[self.main_score]
+        result["score_name"] = self.main_score
+        return result
+    def verify(self):
+        super().verify()
+        if self.field is None:
+            raise ValueError(
+                "StringContainmentRatio metric requires the 'field' attribute to be set."
+            )
 class MetricPipeline(MultiStreamOperator, Metric):
     main_score: str = None
     preprocess_steps: Optional[List[StreamingOperator]] = field(default_factory=list)
+    postprocess_steps: Optional[List[StreamingOperator]] = field(default_factory=list)
+    postpreprocess_steps: Optional[List[StreamingOperator]] = DeprecatedField(
+        metadata={
+            "deprecation_msg": "Field 'postpreprocess_steps' is deprecated. Please use 'postprocess_steps' for the same purpose."
+        }
     )
     metric: Metric = None
     def prepare(self):
         super().prepare()
+        has_postpreprocess = (
+            hasattr(self, "postpreprocess_steps")
+            and self.postpreprocess_steps is not None
+            and isinstance(self.postpreprocess_steps, list)
+            and len(self.postpreprocess_steps) > 0
+        )
+        has_postprocess = (
+            hasattr(self, "postprocess_steps")
+            and self.postprocess_steps is not None
+            and isinstance(self.postprocess_steps, list)
+            and len(self.postprocess_steps) > 0
+        )
+        assert not (
+            has_postpreprocess and has_postprocess
+        ), "Must define at most one of postpreprocess_steps (which is deprecated) and postprocess_steps (to be used from now on)"
+        if has_postpreprocess:
+            self.postprocess_steps = self.postpreprocess_steps
         self.prepare_score = Copy(
             field_to_field=[
                 [
         for step in self.preprocess_steps:
             multi_stream = step(multi_stream)
         multi_stream = self.metric(multi_stream)
+        for step in self.postprocess_steps:
             multi_stream = step(multi_stream)
         return self.prepare_score(multi_stream)
     experiment_id: str = OptionalField(default_factory=lambda: str(uuid.uuid4()))
     def verify(self):
+        if os.path.exists(self.hf_metric_name):
+            UnitxtWarning(
+                f"{self.get_metric_name()} uses a huggingface metric {self.hf_metric_name} which is defined in a local file."
+                f"This may cause issues when running on different machine or different root directories.",
+                Documentation.HUGGINGFACE_METRICS,
+            )
         assert (
             self.hf_additional_input_fields is None
             or isoftype(self.hf_additional_input_fields, List[str])
             average=self.average,
         )
         if isinstance(result[self.metric], numpy.ndarray):
+            final_result = {self.main_score: nan_mean(result[self.metric])}
             for i, label in enumerate(labels):
                 final_result[f"{self.metric}_" + self.id_to_str[label]] = result[
                     self.metric
             assert (
                 len(result[self.metric]) == len(labels)
             ), f"F1 result ({result[self.metric]}) has more entries than labels ({labels})"
+            final_result = {self.main_score: nan_mean(result[self.metric])}
             for i, label in enumerate(labels):
                 final_result[self.metric + "_" + label] = result[self.metric][i]
         else:
     average = None
+class NLTKMixin(Artifact):
+    def prepare(self):
+        super().prepare()
+        import nltk
+        nltk.download("punkt", quiet=True)
+        nltk.download("punkt_tab", quiet=True)
+        self.nltk = nltk
+class Rouge(InstanceMetric, NLTKMixin):
     main_score = "rougeL"
     prediction_type = str
     single_reference_per_prediction = False  # multiple references allowed
     def prepare(self):
         super().prepare()
         from rouge_score import rouge_scorer
         self.rouge_scorer = rouge_scorer
     def compute(self, references: List[Any], prediction: Any, task_data: Dict) -> dict:
         # for a single instance, prediction is of type str, and references: list of str
         if self.sent_split_newline:
+            prediction = "\n".join(self.nltk.sent_tokenize(prediction.strip()))
             references = [
+                "\n".join(self.nltk.sent_tokenize(reference.strip()))
                 for reference in references
             ]
         return score
+class RougeHF(HuggingfaceInstanceMetric, NLTKMixin):
     hf_metric_name = "rouge"
     main_score = "rougeL"
     scale = 1.0
             {"use_aggregator": False, "rouge_types": self.rouge_types}
         )
     def compute(self, references, prediction, task_data: List[Dict]):
         # for a single instance, prediction is of type str, and references: list of str
         if self.sent_split_newline:
+            prediction = "\n".join(self.nltk.sent_tokenize(prediction.strip()))
             references = [
+                "\n".join(self.nltk.sent_tokenize(reference.strip()))
                 for reference in references
             ]
                     for pred in q_predictions
                 ]
             scores.append(self.eval([q_references], [q_predictions]))
+        return {self.main_score: nan_mean(scores) if len(scores) > 0 else np.nan}
 class RetrievalMetric(InstanceMetric):
     if any(len(scores) == 0 for scores in group_scores_list):
         # no comparison can be made since there is not at least one score per type
         return np.nan
+    control_mean = nan_mean(group_scores_list[0])
+    comparison_mean = nan_mean(group_scores_list[1])
     if control_mean == 0:
         # return 0 if comparison is also 0
         if comparison_mean == 0:
         # no comparison can be made since there is not at least one score per type
         h, norm_h = np.nan, np.nan
     else:
+        control_mean = nan_mean(group_scores_list[0])
+        comparison_mean = nan_mean(group_scores_list[1])
         h = 2 * (np.arcsin(np.sqrt(comparison_mean)) - np.arcsin(np.sqrt(control_mean)))
         norm_h = np.clip(a=h / np.pi, a_min=-1, a_max=1)
         g, norm_g = np.nan, np.nan
     else:
         # otherwise, calculate the variances
+        group_mean = [nan_mean(scores) for scores in group_scores_list]
         # sample variance with 1 degree of freedom (denominator n-1); if n=1, return 0 since otherwise throws an error
         group_var = [
             0.0 if nn == 1 else np.var(scores, ddof=1)
     if len(score_list) == 0:
         # no scores to use
         return np.nan
+    return nan_mean(score_list)
 # metrics using mean reduction

operators.py CHANGED Viewed

@@ -45,7 +45,6 @@ import uuid
 import zipfile
 from abc import abstractmethod
 from collections import Counter, defaultdict
-from copy import deepcopy
 from dataclasses import field
 from itertools import zip_longest
 from random import Random
@@ -86,7 +85,7 @@ from .settings_utils import get_settings
 from .stream import DynamicStream, Stream
 from .text_utils import nested_tuple_to_string
 from .type_utils import isoftype
-from .utils import flatten_dict
 settings = get_settings()

 import zipfile
 from abc import abstractmethod
 from collections import Counter, defaultdict
 from dataclasses import field
 from itertools import zip_longest
 from random import Random
 from .stream import DynamicStream, Stream
 from .text_utils import nested_tuple_to_string
 from .type_utils import isoftype
+from .utils import deepcopy, flatten_dict
 settings = get_settings()

schema.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import json
-from dataclasses import field
-from typing import Any, Dict, List, Optional
 from datasets import Features, Sequence, Value
 from .operator import InstanceOperatorValidator
 UNITXT_DATASET_SCHEMA = Features(
@@ -20,10 +20,7 @@ UNITXT_DATASET_SCHEMA = Features(
 )
-class ToUnitxtGroup(InstanceOperatorValidator):
-    group: str
-    metrics: List[str] = None
-    postprocessors: List[str] = field(default_factory=lambda: ["to_string_stripped"])
     remove_unnecessary_fields: bool = True
     @staticmethod
@@ -43,6 +40,7 @@ class ToUnitxtGroup(InstanceOperatorValidator):
                 "template": self.artifact_to_jsonable(
                     instance["recipe_metadata"]["template"]
                 ),
             },
         }
         instance["task_data"] = json.dumps(task_data)
@@ -56,11 +54,16 @@ class ToUnitxtGroup(InstanceOperatorValidator):
             for key in keys_to_delete:
                 del instance[key]
-        instance["group"] = self.group
-        if self.metrics is not None:
-            instance["metrics"] = self.metrics
-        if self.postprocessors is not None:
-            instance["postprocessors"] = self.postprocessors
         return instance
     def validate(self, instance: Dict[str, Any], stream_name: Optional[str] = None):

 import json
+from typing import Any, Dict, Optional
 from datasets import Features, Sequence, Value
+from .artifact import Artifact
 from .operator import InstanceOperatorValidator
 UNITXT_DATASET_SCHEMA = Features(
 )
+class Finalize(InstanceOperatorValidator):
     remove_unnecessary_fields: bool = True
     @staticmethod
                 "template": self.artifact_to_jsonable(
                     instance["recipe_metadata"]["template"]
                 ),
+                "num_demos": instance["recipe_metadata"]["num_demos"],
             },
         }
         instance["task_data"] = json.dumps(task_data)
             for key in keys_to_delete:
                 del instance[key]
+        if "group" not in instance:
+            instance["group"] = "unitxt"
+        instance["metrics"] = [
+            metric.to_json() if isinstance(metric, Artifact) else metric
+            for metric in instance["metrics"]
+        ]
+        instance["postprocessors"] = [
+            processor.to_json() if isinstance(processor, Artifact) else processor
+            for processor in instance["postprocessors"]
+        ]
         return instance
     def validate(self, instance: Dict[str, Any], stream_name: Optional[str] = None):

splitters.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import itertools
 from abc import abstractmethod
-from copy import deepcopy
 from difflib import get_close_matches
 from typing import Dict, List, Optional
@@ -17,6 +16,7 @@ from .split_utils import (
 )
 from .stream import EmptyStreamError, FaultyStreamError, MultiStream
 from .type_utils import isoftype
 class Splitter(MultiStreamOperator):
@@ -109,36 +109,25 @@ class SliceSplit(Splitter):
         return MultiStream.from_generators(generators)
-class Sampler(Artifact):
-    sample_size: int = None
-    def prepare(self):
-        super().prepare()
-        self.set_size(self.sample_size)
-    def set_size(self, size):
-        if isinstance(size, str):
-            assert (
-                size.isdigit()
-            ), f"sample_size must be a natural number, got {self.sample_size}"
-            size = int(size)
-        self.sample_size = size
     @abstractmethod
     def sample(
-        self, instances_pool: List[Dict[str, object]], instance: Dict[str, object]
     ) -> List[Dict[str, object]]:
         pass
-    def get_random_generator_based_on_instance(self, instance):
-        return new_random_generator(sub_seed={**instance["input_fields"]})
     def filter_source_by_instance(
         self, instances_pool: List[Dict[str, object]], instance: Dict[str, object]
     ) -> List[Dict[str, object]]:
         if "input_fields" not in instance:
             raise ValueError(f"'input_fields' field is missing from '{instance}'.")
-        # l = list(filter(lambda x: x["inputs"] != instance["inputs"], instances_pool))
         try:
             return [
                 item
@@ -154,12 +143,13 @@ class RandomSampler(Sampler):
     def sample(
         self,
         instances_pool: List[Dict[str, object]],
         instance: Optional[Dict[str, object]],
     ) -> List[Dict[str, object]]:
         instances_pool = list(instances_pool)
-        random_generator = self.get_random_generator_based_on_instance(instance)
-        return random_generator.sample(instances_pool, self.sample_size)
 class FixedIndicesSampler(Sampler):
@@ -175,13 +165,14 @@ class FixedIndicesSampler(Sampler):
     def sample(
         self,
         instances_pool: List[Dict[str, object]],
         instance: Optional[Dict[str, object]],
     ) -> List[Dict[str, object]]:
         num_instances = len(instances_pool)
         instances = []
-        for index in self.indices[0 : self.sample_size]:
             if index >= num_instances:
                 raise ValueError(
                     f"FixedIndicesSampler 'indices' field contains index ({index}) which is out of bounds of the instance pool ( of size {num_instances})"
@@ -200,7 +191,10 @@ class CloseTextSampler(Sampler):
     field: str
     def sample(
-        self, instances_pool: List[Dict[str, object]], instance: Dict[str, object]
     ) -> List[Dict[str, object]]:
         field = f"input_fields/{self.field}"
         value = dict_get(instance, field)
@@ -211,9 +205,7 @@ class CloseTextSampler(Sampler):
         options = []
         for instance_in_pool in instances_pool:
             options.append(dict_get(instance_in_pool, field))
-        closest_matches = get_close_matches(
-            value, options, n=self.sample_size, cutoff=0
-        )
         # Randmly select 'sample_size' instances that are from the closest matches text
         # (There may be multiple instance with same text in the given field, and the order returned is
         # is also randomized )
@@ -222,8 +214,8 @@ class CloseTextSampler(Sampler):
             for instance_in_pool in instances_pool
             if dict_get(instance_in_pool, field) in closest_matches
         ]
-        random_generator = self.get_random_generator_based_on_instance(instance)
-        return random_generator.sample(instances_pool, self.sample_size)
 class DiverseLabelsSampler(Sampler):
@@ -306,26 +298,27 @@ class DiverseLabelsSampler(Sampler):
     def sample(
         self,
         instances_pool: List[Dict[str, object]],
         instance: Optional[Dict[str, object]],
     ) -> List[Dict[str, object]]:
         if self.labels_cache is None:
             self.labels_cache = self.divide_by_repr(instances_pool)
         all_labels = list(self.labels_cache.keys())
-        random_generator = self.get_random_generator_based_on_instance(instance)
         random_generator.shuffle(all_labels)
         from collections import Counter
-        if self.sample_size > len(instances_pool):
             raise ValueError(
-                f"Request sample size {self.sample_size} is greater than number of instances {len(instances_pool)}"
             )
         total_allocated = 0
         allocations = Counter()
-        while total_allocated < self.sample_size:
             for label in all_labels:
-                if total_allocated < self.sample_size:
                     if len(self.labels_cache[label]) - allocations[label] > 0:
                         allocations[label] += 1
                         total_allocated += 1
@@ -341,40 +334,56 @@ class DiverseLabelsSampler(Sampler):
         return result
-class SpreadSplit(InstanceOperatorWithMultiStreamAccess):
-    source_stream: str = None
-    target_field: str = None
-    sampler: Sampler = None
     def prepare(self):
         self.local_cache = None
         self.sampler.prepare()
-    def verify(self):
-        assert self.source_stream is not None, "Source stream must be specified"
-        assert self.target_field is not None, "Target field must be specified"
-        assert self.sampler is not None, "Sampler must be specified"
-        return super().verify()
     def process(
         self, instance: Dict[str, object], multi_stream: MultiStream
     ) -> Dict[str, object]:
         try:
             if self.local_cache is None:
-                self.local_cache = deepcopy(list(multi_stream[self.source_stream]))
             source_stream = self.local_cache
             source_stream = self.sampler.filter_source_by_instance(
                 source_stream, instance
             )
-            if len(source_stream) < self.sampler.sample_size:
                 raise ValueError(
                     f"Size of population to sample from: {len(source_stream)} is smaller than the needed sample_size: {self.sampler.sample_size}."
                 )
-            sampled_instances = self.sampler.sample(source_stream, instance)
-            instance[self.target_field] = sampled_instances
             return instance
         except FaultyStreamError as e:
             raise EmptyStreamError(
-                f"Unable to fetch instances from '{self.source_stream}' to '{self.target_field}', due to {e.__class__.__name__}: {e}"
             ) from e

 import itertools
 from abc import abstractmethod
 from difflib import get_close_matches
 from typing import Dict, List, Optional
 )
 from .stream import EmptyStreamError, FaultyStreamError, MultiStream
 from .type_utils import isoftype
+from .utils import deepcopy
 class Splitter(MultiStreamOperator):
         return MultiStream.from_generators(generators)
+def get_random_generator_based_on_instance(instance):
+    return new_random_generator(sub_seed={**instance["input_fields"]})
+class Sampler(Artifact):
     @abstractmethod
     def sample(
+        self,
+        sample_size: int,
+        instances_pool: List[Dict[str, object]],
+        instance: Dict[str, object],
     ) -> List[Dict[str, object]]:
         pass
     def filter_source_by_instance(
         self, instances_pool: List[Dict[str, object]], instance: Dict[str, object]
     ) -> List[Dict[str, object]]:
         if "input_fields" not in instance:
             raise ValueError(f"'input_fields' field is missing from '{instance}'.")
         try:
             return [
                 item
     def sample(
         self,
+        sample_size,
         instances_pool: List[Dict[str, object]],
         instance: Optional[Dict[str, object]],
     ) -> List[Dict[str, object]]:
         instances_pool = list(instances_pool)
+        random_generator = get_random_generator_based_on_instance(instance)
+        return random_generator.sample(instances_pool, sample_size)
 class FixedIndicesSampler(Sampler):
     def sample(
         self,
+        sample_size,
         instances_pool: List[Dict[str, object]],
         instance: Optional[Dict[str, object]],
     ) -> List[Dict[str, object]]:
         num_instances = len(instances_pool)
         instances = []
+        for index in self.indices[0:sample_size]:
             if index >= num_instances:
                 raise ValueError(
                     f"FixedIndicesSampler 'indices' field contains index ({index}) which is out of bounds of the instance pool ( of size {num_instances})"
     field: str
     def sample(
+        self,
+        sample_size: int,
+        instances_pool: List[Dict[str, object]],
+        instance: Dict[str, object],
     ) -> List[Dict[str, object]]:
         field = f"input_fields/{self.field}"
         value = dict_get(instance, field)
         options = []
         for instance_in_pool in instances_pool:
             options.append(dict_get(instance_in_pool, field))
+        closest_matches = get_close_matches(value, options, n=sample_size, cutoff=0)
         # Randmly select 'sample_size' instances that are from the closest matches text
         # (There may be multiple instance with same text in the given field, and the order returned is
         # is also randomized )
             for instance_in_pool in instances_pool
             if dict_get(instance_in_pool, field) in closest_matches
         ]
+        random_generator = get_random_generator_based_on_instance(instance)
+        return random_generator.sample(instances_pool, sample_size)
 class DiverseLabelsSampler(Sampler):
     def sample(
         self,
+        sample_size: int,
         instances_pool: List[Dict[str, object]],
         instance: Optional[Dict[str, object]],
     ) -> List[Dict[str, object]]:
         if self.labels_cache is None:
             self.labels_cache = self.divide_by_repr(instances_pool)
         all_labels = list(self.labels_cache.keys())
+        random_generator = get_random_generator_based_on_instance(instance)
         random_generator.shuffle(all_labels)
         from collections import Counter
+        if sample_size > len(instances_pool):
             raise ValueError(
+                f"Request sample size {sample_size} is greater than number of instances {len(instances_pool)}"
             )
         total_allocated = 0
         allocations = Counter()
+        while total_allocated < sample_size:
             for label in all_labels:
+                if total_allocated < sample_size:
                     if len(self.labels_cache[label]) - allocations[label] > 0:
                         allocations[label] += 1
                         total_allocated += 1
         return result
+class Sample(InstanceOperatorWithMultiStreamAccess):
+    from_stream: str
+    to_field: str
+    sampler: Sampler
     def prepare(self):
         self.local_cache = None
         self.sampler.prepare()
+    @abstractmethod
+    def get_sample_size(self, instance) -> int:
+        pass
     def process(
         self, instance: Dict[str, object], multi_stream: MultiStream
     ) -> Dict[str, object]:
+        sample_size = self.get_sample_size(instance)
         try:
             if self.local_cache is None:
+                self.local_cache = deepcopy(list(multi_stream[self.from_stream]))
             source_stream = self.local_cache
             source_stream = self.sampler.filter_source_by_instance(
                 source_stream, instance
             )
+            if len(source_stream) < sample_size:
                 raise ValueError(
                     f"Size of population to sample from: {len(source_stream)} is smaller than the needed sample_size: {self.sampler.sample_size}."
                 )
+            sampled_instances = self.sampler.sample(
+                sample_size=sample_size, instances_pool=source_stream, instance=instance
+            )
+            instance[self.to_field] = sampled_instances
             return instance
         except FaultyStreamError as e:
             raise EmptyStreamError(
+                f"Unable to fetch instances from '{self.from_stream}' to '{self.to_field}', due to {e.__class__.__name__}: {e}"
             ) from e
+class ConstantSizeSample(Sample):
+    sample_size: int
+    def get_sample_size(self, instance) -> int:
+        return self.sample_size
+class RandomSizeSample(Sample):
+    sample_sizes: List[int]
+    def get_sample_size(self, instance) -> int:
+        random_generator = get_random_generator_based_on_instance(instance)
+        return random_generator.choice(self.sample_sizes)

standard.py CHANGED Viewed

@@ -1,17 +1,18 @@
-from typing import List
 from .card import TaskCard
 from .dataclass import Field, InternalField, NonPositionalField, OptionalField
 from .formats import Format, SystemFormat
 from .logging_utils import get_logger
 from .operator import SequentialOperator, SourceSequentialOperator, StreamingOperator
 from .operators import Augmentor, NullAugmentor, Set, StreamRefiner
 from .recipe import Recipe
-from .schema import ToUnitxtGroup
-from .splitters import Sampler, SeparateSplit, SpreadSplit
 from .stream import MultiStream
 from .system_prompts import EmptySystemPrompt, SystemPrompt
-from .templates import Template
 logger = get_logger()
@@ -21,15 +22,15 @@ class CreateDemosPool(SeparateSplit):
     pass
-class AddDemosField(SpreadSplit):
-    pass
 class BaseRecipe(Recipe, SourceSequentialOperator):
     card: TaskCard
-    template: Template = None
     system_prompt: SystemPrompt = Field(default_factory=EmptySystemPrompt)
     format: Format = Field(default_factory=SystemFormat)
     metrics: List[str] = NonPositionalField(default=None)
     postprocessors: List[str] = NonPositionalField(default=None)
@@ -44,7 +45,7 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
     test_refiner: StreamRefiner = OptionalField(default_factory=StreamRefiner)
     demos_pool_size: int = None
-    num_demos: int = 0
     demos_removed_from_data: bool = True
     demos_pool_name: str = "demos_pool"
@@ -59,16 +60,22 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
     def before_process_multi_stream(self):
         super().before_process_multi_stream()
     def verify(self):
         super().verify()
-        if self.num_demos > 0:
             if self.demos_pool_size is None or self.demos_pool_size < 1:
                 raise ValueError(
                     "When using demonstrations both num_demos and demos_pool_size should be assigned with positive integers."
                 )
-            if self.demos_pool_size < self.num_demos:
                 raise ValueError(
-                    f"num_demos (got: {self.num_demos}) should not exceed demos_pool_size (got: {self.demos_pool_size})"
                 )
             if self.loader_limit and self.demos_pool_size > self.loader_limit:
                 raise ValueError(
@@ -105,6 +112,17 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
                 f"post processors must be a list of post processor.  Got postprocessors = {self.postprocessors}"
             )
     def prepare_refiners(self):
         self.train_refiner.max_instances = self.max_train_instances
         self.train_refiner.apply_to_streams = ["train"]
@@ -118,31 +136,12 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
         self.test_refiner.apply_to_streams = ["test"]
         self.processing.steps.append(self.test_refiner)
-    def prepare_metrics_and_postprocessors(self):
-        # Check is done here to ensure get_postprocessor is called on
-        # a Template object
-        if self.template is not None and not isinstance(self.template, Template):
             raise ValueError(
-                f"template argument must be an object of type Template.  Got template = {self.template}"
             )
-        if self.postprocessors is None:
-            postprocessors = self.template.get_postprocessors()
-        else:
-            postprocessors = self.postprocessors
-        if self.metrics is None:
-            metrics = self.card.task.metrics
-        else:
-            metrics = self.metrics
-        metrics = [
-            metric if isinstance(metric, str) else metric.to_json()
-            for metric in metrics
-        ]
-        return metrics, postprocessors
     def set_pipelines(self):
         self.loading = SequentialOperator()
         self.loading.__description__ = "Loading the data from the data source."
@@ -158,8 +157,8 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
         self.processing.__description__ = (
             "Setting task fields (and selecting demos per sample if needed)."
         )
-        self.verblization = SequentialOperator()
-        self.verblization.__description__ = "Verbalizing the input to the model and gold references to the 'source', 'target' and 'references' fields."
         self.finalize = SequentialOperator()
         self.finalize.__description__ = "Adding post processors. Removing intermediate fields. Creating the final output dataset."
@@ -169,7 +168,7 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
             self.standardization,
             self.processing,
             self.metadata,
-            self.verblization,
             self.finalize,
         ]
@@ -193,7 +192,7 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
         self.inference = SequentialOperator()
-        self.inference.steps = [self.verblization, self.finalize]
         self._demos_pool_cache = None
@@ -202,7 +201,7 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
         return list(self.inference_instance(ms)["__inference__"])
     def production_demos_pool(self):
-        if self.num_demos > 0:
             if self._demos_pool_cache is None:
                 self._demos_pool_cache = list(
                     self.inference_demos()[self.demos_pool_name]
@@ -210,6 +209,14 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
             return self._demos_pool_cache
         return []
     def produce(self, task_instances):
         """Use the recipe in production to produce model ready query from standard task instance."""
         self.before_process_multi_stream()
@@ -243,11 +250,8 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
         self.metadata.steps.append(
             Set(
                 fields={
-                    "recipe_metadata": {
-                        "template": self.template,
-                        "system_prompt": self.system_prompt,
-                        "format": self.format,
-                    }
                 }
             )
         )
@@ -260,7 +264,7 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
             self.augmentor.set_task_input_fields(self.card.task.augmentable_inputs)
             self.processing.steps.append(self.augmentor)
-        if self.demos_pool_size is not None and self.demos_pool_size > 0:
             self.processing.steps.append(
                 CreateDemosPool(
                     from_split=self.demos_taken_from,
@@ -270,7 +274,7 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
                 )
             )
-        if self.num_demos > 0:
             if self.sampler is None:
                 if self.card.sampler is None:
                     raise ValueError(
@@ -279,33 +283,76 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
                     )
                 self.sampler = self.card.sampler
-            self.sampler.set_size(self.num_demos)
         self.prepare_refiners()
-        self.verblization.steps.append(self.template)
-        if self.num_demos > 0:
-            self.verblization.steps.append(
-                AddDemosField(
-                    source_stream=self.demos_pool_name,
-                    target_field=self.demos_field,
-                    sampler=self.sampler,
                 )
             )
-        self.verblization.steps.append(self.system_prompt)
-        self.verblization.steps.append(self.format)
-        if self.augmentor.augment_model_input:
-            self.verblization.steps.append(self.augmentor)
-        metrics, postprocessors = self.prepare_metrics_and_postprocessors()
-        self.finalize.steps.append(
-            ToUnitxtGroup(
-                group="unitxt",
-                metrics=metrics,
-                postprocessors=postprocessors,
             )
-        )
 class StandardRecipeWithIndexes(BaseRecipe):

+from typing import List, Optional, Union
 from .card import TaskCard
+from .collections_operators import GetLength
 from .dataclass import Field, InternalField, NonPositionalField, OptionalField
 from .formats import Format, SystemFormat
 from .logging_utils import get_logger
 from .operator import SequentialOperator, SourceSequentialOperator, StreamingOperator
 from .operators import Augmentor, NullAugmentor, Set, StreamRefiner
 from .recipe import Recipe
+from .schema import Finalize
+from .splitters import ConstantSizeSample, RandomSizeSample, Sampler, SeparateSplit
 from .stream import MultiStream
 from .system_prompts import EmptySystemPrompt, SystemPrompt
+from .templates import ApplyRandomTemplate, ApplySingleTemplate, Template
 logger = get_logger()
     pass
 class BaseRecipe(Recipe, SourceSequentialOperator):
+    # Base parameters
     card: TaskCard
+    template: Union[Template, List[Template]] = None
     system_prompt: SystemPrompt = Field(default_factory=EmptySystemPrompt)
     format: Format = Field(default_factory=SystemFormat)
+    # Additional parameters
+    template_card_index: int = NonPositionalField(default=None)
     metrics: List[str] = NonPositionalField(default=None)
     postprocessors: List[str] = NonPositionalField(default=None)
     test_refiner: StreamRefiner = OptionalField(default_factory=StreamRefiner)
     demos_pool_size: int = None
+    num_demos: Optional[Union[int, List[int]]] = 0
     demos_removed_from_data: bool = True
     demos_pool_name: str = "demos_pool"
     def before_process_multi_stream(self):
         super().before_process_multi_stream()
+    @property
+    def max_demos_size(self):
+        if isinstance(self.num_demos, list):
+            return max(self.num_demos)
+        return self.num_demos
     def verify(self):
         super().verify()
+        if self.use_demos:
             if self.demos_pool_size is None or self.demos_pool_size < 1:
                 raise ValueError(
                     "When using demonstrations both num_demos and demos_pool_size should be assigned with positive integers."
                 )
+            if self.demos_pool_size < self.max_demos_size:
                 raise ValueError(
+                    f"num_demos (got: {self.max_demos_size}) should not exceed demos_pool_size (got: {self.demos_pool_size})"
                 )
             if self.loader_limit and self.demos_pool_size > self.loader_limit:
                 raise ValueError(
                 f"post processors must be a list of post processor.  Got postprocessors = {self.postprocessors}"
             )
+        if self.template is None:
+            raise ValueError(
+                "You must set in the recipe either `template`, `template_card_index` or `templates`."
+            )
+        if isinstance(self.template, list):
+            for template in self.template:
+                self.verify_template(template)
+        else:
+            self.verify_template(self.template)
     def prepare_refiners(self):
         self.train_refiner.max_instances = self.max_train_instances
         self.train_refiner.apply_to_streams = ["train"]
         self.test_refiner.apply_to_streams = ["test"]
         self.processing.steps.append(self.test_refiner)
+    def verify_template(self, template):
+        if not isinstance(template, Template):
             raise ValueError(
+                f"template argument must be an object of type Template. Got template = {template}"
             )
     def set_pipelines(self):
         self.loading = SequentialOperator()
         self.loading.__description__ = "Loading the data from the data source."
         self.processing.__description__ = (
             "Setting task fields (and selecting demos per sample if needed)."
         )
+        self.verbalization = SequentialOperator()
+        self.verbalization.__description__ = "Verbalizing the input to the model and gold references to the 'source', 'target' and 'references' fields."
         self.finalize = SequentialOperator()
         self.finalize.__description__ = "Adding post processors. Removing intermediate fields. Creating the final output dataset."
             self.standardization,
             self.processing,
             self.metadata,
+            self.verbalization,
             self.finalize,
         ]
         self.inference = SequentialOperator()
+        self.inference.steps = [self.verbalization, self.finalize]
         self._demos_pool_cache = None
         return list(self.inference_instance(ms)["__inference__"])
     def production_demos_pool(self):
+        if self.use_demos:
             if self._demos_pool_cache is None:
                 self._demos_pool_cache = list(
                     self.inference_demos()[self.demos_pool_name]
             return self._demos_pool_cache
         return []
+    @property
+    def has_custom_demos_pool(self):
+        return self.demos_pool_size is not None and self.demos_pool_size > 0
+    @property
+    def use_demos(self):
+        return self.num_demos is not None and self.max_demos_size > 0
     def produce(self, task_instances):
         """Use the recipe in production to produce model ready query from standard task instance."""
         self.before_process_multi_stream()
         self.metadata.steps.append(
             Set(
                 fields={
+                    "recipe_metadata/system_prompt": self.system_prompt,
+                    "recipe_metadata/format": self.format,
                 }
             )
         )
             self.augmentor.set_task_input_fields(self.card.task.augmentable_inputs)
             self.processing.steps.append(self.augmentor)
+        if self.has_custom_demos_pool:
             self.processing.steps.append(
                 CreateDemosPool(
                     from_split=self.demos_taken_from,
                 )
             )
+        if self.use_demos:
             if self.sampler is None:
                 if self.card.sampler is None:
                     raise ValueError(
                     )
                 self.sampler = self.card.sampler
         self.prepare_refiners()
+        if self.use_demos:
+            if isinstance(self.num_demos, int):
+                self.verbalization.steps.append(
+                    ConstantSizeSample(
+                        from_stream=self.demos_pool_name,
+                        to_field=self.demos_field,
+                        sampler=self.sampler,
+                        sample_size=self.num_demos,
+                    )
+                )
+                self.verbalization.steps.append(
+                    Set(fields={"recipe_metadata/num_demos": self.num_demos})
+                )
+            elif isinstance(self.num_demos, list):
+                self.verbalization.steps.append(
+                    RandomSizeSample(
+                        from_stream=self.demos_pool_name,
+                        to_field=self.demos_field,
+                        sampler=self.sampler,
+                        sample_sizes=self.num_demos,
+                    )
                 )
+                self.verbalization.steps.append(
+                    GetLength(field="demos", to_field="recipe_metadata/num_demos")
+                )
+            else:
+                raise ValueError("num_demos must be int or List[int]")
+            if isinstance(self.template, list):
+                self.verbalization.steps.append(
+                    ApplyRandomTemplate(
+                        templates=self.template, demos_field=self.demos_field
+                    )
+                )
+            else:
+                self.verbalization.steps.append(
+                    ApplySingleTemplate(
+                        template=self.template, demos_field=self.demos_field
+                    )
+                )
+        else:
+            self.verbalization.steps.append(
+                Set(fields={"recipe_metadata/num_demos": 0})
             )
+            if isinstance(self.template, list):
+                self.verbalization.steps.append(
+                    ApplyRandomTemplate(templates=self.template)
+                )
+            else:
+                self.verbalization.steps.append(
+                    ApplySingleTemplate(template=self.template)
+                )
+        self.verbalization.steps.append(self.system_prompt)
+        self.verbalization.steps.append(self.format)
+        if self.augmentor.augment_model_input:
+            self.verbalization.steps.append(self.augmentor)
+        if self.postprocessors is not None:
+            self.finalize.steps.append(
+                Set(fields={"postprocessors": self.postprocessors})
             )
+        if self.metrics is not None:
+            self.finalize.steps.append(Set(fields={"metrics": self.metrics}))
+        self.finalize.steps.append(Finalize())
 class StandardRecipeWithIndexes(BaseRecipe):

stream.py CHANGED Viewed

@@ -2,7 +2,6 @@ import tempfile
 import traceback
 import warnings
 from abc import abstractmethod
-from copy import deepcopy
 from typing import Any, Callable, Dict, Generator, Iterable, List
 from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
@@ -11,6 +10,7 @@ from .dataclass import Dataclass, OptionalField
 from .generator_utils import CopyingReusableGenerator, ReusableGenerator
 from .logging_utils import get_logger
 from .settings_utils import get_settings
 settings = get_settings()
 logger = get_logger()

 import traceback
 import warnings
 from abc import abstractmethod
 from typing import Any, Callable, Dict, Generator, Iterable, List
 from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
 from .generator_utils import CopyingReusableGenerator, ReusableGenerator
 from .logging_utils import get_logger
 from .settings_utils import get_settings
+from .utils import deepcopy
 settings = get_settings()
 logger = get_logger()

struct_data_operators.py CHANGED Viewed

@@ -18,7 +18,6 @@ For key-value pairs, expected input format is:
 import json
 import random
 from abc import ABC, abstractmethod
-from copy import deepcopy
 from typing import (
     Any,
     Dict,
@@ -30,6 +29,7 @@ import pandas as pd
 from .dict_utils import dict_get
 from .operators import FieldOperator, InstanceOperator
 class SerializeTable(ABC, FieldOperator):

 import json
 import random
 from abc import ABC, abstractmethod
 from typing import (
     Any,
     Dict,
 from .dict_utils import dict_get
 from .operators import FieldOperator, InstanceOperator
+from .utils import deepcopy
 class SerializeTable(ABC, FieldOperator):

task.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Optional, Union
 from .artifact import fetch_artifact
 from .dataclass import DeprecatedField
 from .deprecation_utils import deprecation
-from .logging_utils import get_logger
 from .operator import InstanceOperator
 from .type_utils import (
     Type,
@@ -77,12 +77,14 @@ class Task(InstanceOperator):
     def prepare(self):
         super().prepare()
         if self.input_fields is not None and self.inputs is not None:
-            raise ValueError(
-                "Conflicting attributes: 'input_fields' cannot be set simultaneously with 'inputs'. Use only 'input_fields'"
             )
         if self.reference_fields is not None and self.outputs is not None:
-            raise ValueError(
-                "Conflicting attributes: 'reference_fields' cannot be set simultaneously with 'output'. Use only 'reference_fields'"
             )
         self.input_fields = (
@@ -107,9 +109,15 @@ class Task(InstanceOperator):
     def verify(self):
         if self.input_fields is None:
-            raise ValueError("Missing attribute in task: 'input_fields' not set.")
         if self.reference_fields is None:
-            raise ValueError("Missing attribute in task: 'reference_fields' not set.")
         for io_type in ["input_fields", "reference_fields"]:
             data = (
                 self.input_fields
@@ -118,11 +126,12 @@ class Task(InstanceOperator):
             )
             if isinstance(data, list) or not is_type_dict(data):
-                get_logger().warning(
                     f"'{io_type}' field of Task should be a dictionary of field names and their types. "
                     f"For example, {{'text': str, 'classes': List[str]}}. Instead only '{data}' was "
                     f"passed. All types will be assumed to be 'Any'. In future version of unitxt this "
-                    f"will raise an exception."
                 )
                 data = {key: Any for key in data}
                 if io_type == "input_fields":
@@ -131,11 +140,12 @@ class Task(InstanceOperator):
                     self.reference_fields = data
         if not self.prediction_type:
-            get_logger().warning(
                 "'prediction_type' was not set in Task. It is used to check the output of "
                 "template post processors is compatible with the expected input of the metrics. "
                 "Setting `prediction_type` to 'Any' (no checking is done). In future version "
-                "of unitxt this will raise an exception."
             )
             self.prediction_type = Any
@@ -191,18 +201,20 @@ class Task(InstanceOperator):
             ):
                 continue
-            raise ValueError(
                 f"The task's prediction type ({prediction_type}) and '{metric_id}' "
-                f"metric's prediction type ({metric_prediction_type}) are different."
             )
     def verify_defaults(self):
         if self.defaults:
             if not isinstance(self.defaults, dict):
-                raise ValueError(
                     f"If specified, the 'defaults' must be a dictionary, "
                     f"however, '{self.defaults}' was provided instead, "
-                    f"which is of type '{to_type_string(type(self.defaults))}'."
                 )
             for default_name, default_value in self.defaults.items():

 from .artifact import fetch_artifact
 from .dataclass import DeprecatedField
 from .deprecation_utils import deprecation
+from .error_utils import Documentation, UnitxtError, UnitxtWarning
 from .operator import InstanceOperator
 from .type_utils import (
     Type,
     def prepare(self):
         super().prepare()
         if self.input_fields is not None and self.inputs is not None:
+            raise UnitxtError(
+                "Conflicting attributes: 'input_fields' cannot be set simultaneously with 'inputs'. Use only 'input_fields'",
+                Documentation.ADDING_TASK,
             )
         if self.reference_fields is not None and self.outputs is not None:
+            raise UnitxtError(
+                "Conflicting attributes: 'reference_fields' cannot be set simultaneously with 'output'. Use only 'reference_fields'",
+                Documentation.ADDING_TASK,
             )
         self.input_fields = (
     def verify(self):
         if self.input_fields is None:
+            raise UnitxtError(
+                "Missing attribute in task: 'input_fields' not set.",
+                Documentation.ADDING_TASK,
+            )
         if self.reference_fields is None:
+            raise UnitxtError(
+                "Missing attribute in task: 'reference_fields' not set.",
+                Documentation.ADDING_TASK,
+            )
         for io_type in ["input_fields", "reference_fields"]:
             data = (
                 self.input_fields
             )
             if isinstance(data, list) or not is_type_dict(data):
+                UnitxtWarning(
                     f"'{io_type}' field of Task should be a dictionary of field names and their types. "
                     f"For example, {{'text': str, 'classes': List[str]}}. Instead only '{data}' was "
                     f"passed. All types will be assumed to be 'Any'. In future version of unitxt this "
+                    f"will raise an exception.",
+                    Documentation.ADDING_TASK,
                 )
                 data = {key: Any for key in data}
                 if io_type == "input_fields":
                     self.reference_fields = data
         if not self.prediction_type:
+            UnitxtWarning(
                 "'prediction_type' was not set in Task. It is used to check the output of "
                 "template post processors is compatible with the expected input of the metrics. "
                 "Setting `prediction_type` to 'Any' (no checking is done). In future version "
+                "of unitxt this will raise an exception.",
+                Documentation.ADDING_TASK,
             )
             self.prediction_type = Any
             ):
                 continue
+            raise UnitxtError(
                 f"The task's prediction type ({prediction_type}) and '{metric_id}' "
+                f"metric's prediction type ({metric_prediction_type}) are different.",
+                Documentation.ADDING_TASK,
             )
     def verify_defaults(self):
         if self.defaults:
             if not isinstance(self.defaults, dict):
+                raise UnitxtError(
                     f"If specified, the 'defaults' must be a dictionary, "
                     f"however, '{self.defaults}' was provided instead, "
+                    f"which is of type '{to_type_string(type(self.defaults))}'.",
+                    Documentation.ADDING_TASK,
                 )
             for default_name, default_value in self.defaults.items():

templates.py CHANGED Viewed

@@ -6,17 +6,20 @@ from typing import Any, Dict, List, Optional, Tuple, Union
 from .artifact import Artifact
 from .collections import ListCollection
 from .dataclass import NonPositionalField
 from .operator import InstanceOperator
 from .random_utils import new_random_generator
 from .type_utils import isoftype
-class TemplateFormatKeyError(KeyError):
     def __init__(self, template, data, data_type, format_str, format_name):
         keys = ", ".join(data.keys())
         super().__init__(
             f"Available {data_type}s are [{keys}] "
-            f"but {template.__class__.__name__}.{format_name} format requires a different ones: '{format_str}'"
         )
@@ -92,6 +95,7 @@ class Template(InstanceOperator):
             "references": references,
             "instruction": instruction,
             "target_prefix": target_prefix,
         }
     @abstractmethod
@@ -108,9 +112,6 @@ class Template(InstanceOperator):
     ) -> Tuple[str, List[str]]:
         pass
-    def get_postprocessors(self) -> List[str]:
-        return self.postprocessors
     def serialize_data(self, data):
         return {
             k: ", ".join(str(t) for t in v) if isinstance(v, list) else v
@@ -123,6 +124,11 @@ class Template(InstanceOperator):
         if serialize:
             data = self.serialize_data(data)
         try:
             return format_str.format(**data)
         except KeyError as e:
             raise TemplateFormatKeyError(
@@ -130,6 +136,49 @@ class Template(InstanceOperator):
             ) from e
 class InputOutputTemplate(Template):
     """Generate field 'source' from fields designated as input, and fields 'target' and 'references' from fields designated as output, of the processed instance.
@@ -471,8 +520,9 @@ class MultipleChoiceTemplate(Template):
             try:
                 return reference_fields[self.choices_field].index(target)
             except ValueError as e:
-                raise ValueError(
-                    f"MultipleChoiceTemplate could not locate textual target '{target}' in choices list: {reference_fields[self.choices_field]}"
                 ) from e
         return target
@@ -485,8 +535,9 @@ class MultipleChoiceTemplate(Template):
             try:
                 target = reference_fields[self.choices_field].index(target)
             except ValueError as e:
-                raise ValueError(
-                    f"MultipleChoiceTemplate could not locate textual target '{target}' in choices list: {reference_fields[self.choices_field]}"
                 ) from e
         choices = self.inputs_to_choices(reference_fields, self.target_choice_format)
@@ -494,8 +545,9 @@ class MultipleChoiceTemplate(Template):
         try:
             target = choices[target]
         except IndexError as e:
-            raise IndexError(
-                f"MultipleChoiceTemplate cannot find index number {target} in choices: {choices}"
             ) from e
         return target, [target]
@@ -574,21 +626,21 @@ class YesNoTemplate(Template):
         try:
             gold_class_names = reference_fields[self.label_field]
         except KeyError as e:
-            raise RuntimeError(
                 f"Available reference_fields are {list(reference_fields.keys())}, missing required label field: '{self.label_field}'."
             ) from e
         if not isinstance(gold_class_names, list):
-            raise RuntimeError(
                 f"Unexpected value for gold_class_names: '{gold_class_names}'. Expecting a list."
             )
         try:
             queried_class_name = reference_fields[self.class_field]
         except KeyError as e:
-            raise RuntimeError(
                 f"Available reference_fields are {list(reference_fields.keys())}, missing required class field: '{self.class_field}'."
             ) from e
         if not queried_class_name or not isinstance(queried_class_name, str):
-            raise RuntimeError(
                 f"Unexpected value for queried_class_names: '{queried_class_name}'. Expected a string."
             )
         if queried_class_name in gold_class_names:
@@ -674,8 +726,9 @@ class MultiLabelTemplate(InputOutputTemplate):
     ) -> str:
         labels = reference_fields[self.labels_field]
         if not isinstance(labels, list):
-            raise ValueError(
-                f"MultiLabelTemplate requires labels field '{self.labels_field}' to be a list. Got {self.labels_field}<{type(labels).__name__}>: {labels}"
             )
         if len(labels) == 0:
             labels = [self.empty_label]
@@ -694,12 +747,14 @@ class MultiReferenceTemplate(InputOutputTemplate):
     ) -> List[str]:
         references = reference_fields[self.references_field]
         if not isoftype(references, List[str]):
-            raise ValueError(
-                f"MultiReferenceTemplate requires references field '{self.references_field}' to be List[str]. Got {self.references_field}<{type(references).__name__}>: {references}"
             )
         if len(references) == 0:
-            raise ValueError(
-                "No references found. MultiReferenceTemplate requires at least one reference."
             )
         if self.random_reference:

 from .artifact import Artifact
 from .collections import ListCollection
 from .dataclass import NonPositionalField
+from .dict_utils import dict_set
+from .error_utils import Documentation, UnitxtError
 from .operator import InstanceOperator
 from .random_utils import new_random_generator
 from .type_utils import isoftype
+class TemplateFormatKeyError(UnitxtError):
     def __init__(self, template, data, data_type, format_str, format_name):
         keys = ", ".join(data.keys())
         super().__init__(
             f"Available {data_type}s are [{keys}] "
+            f"but {template.__class__.__name__}.{format_name} format requires a different ones: '{format_str}'",
+            Documentation.ADDING_TEMPLATE,
         )
             "references": references,
             "instruction": instruction,
             "target_prefix": target_prefix,
+            "postprocessors": self.postprocessors,
         }
     @abstractmethod
     ) -> Tuple[str, List[str]]:
         pass
     def serialize_data(self, data):
         return {
             k: ", ".join(str(t) for t in v) if isinstance(v, list) else v
         if serialize:
             data = self.serialize_data(data)
         try:
+            if format_str is None:
+                raise UnitxtError(
+                    f"Required field 'output_format' of class {self.__class__.__name__} not set in {self.__class__.__name__}",
+                    Documentation.ADDING_TEMPLATE,
+                )
             return format_str.format(**data)
         except KeyError as e:
             raise TemplateFormatKeyError(
             ) from e
+class ApplyTemplate(InstanceOperator):
+    demos_field: Optional[str] = None
+    @abstractmethod
+    def get_template(self, instance: Dict[str, Any]) -> Template:
+        pass
+    def apply(self, template: Template, instance: Dict[str, Any]):
+        return template.process_instance(instance)
+    def process(
+        self, instance: Dict[str, Any], stream_name: Optional[str] = None
+    ) -> Dict[str, Any]:
+        template = self.get_template(instance)
+        if self.demos_field is not None:
+            if self.demos_field not in instance:
+                raise ValueError("Demos field is missing.")
+            instance[self.demos_field] = [
+                self.apply(template, demo_instance)
+                for demo_instance in instance[self.demos_field]
+            ]
+        dict_set(instance, "recipe_metadata/template", template)
+        return self.apply(template, instance)
+class ApplySingleTemplate(ApplyTemplate):
+    template: Template
+    def get_template(self, instance: Dict[str, Any]) -> Template:
+        return self.template
+class ApplyRandomTemplate(ApplyTemplate):
+    templates: List[Template]
+    def get_template(self, instance: Dict[str, Any]) -> Template:
+        random_generator = new_random_generator(
+            {**instance["input_fields"], **instance["reference_fields"]}
+        )
+        return random_generator.choice(self.templates)
 class InputOutputTemplate(Template):
     """Generate field 'source' from fields designated as input, and fields 'target' and 'references' from fields designated as output, of the processed instance.
             try:
                 return reference_fields[self.choices_field].index(target)
             except ValueError as e:
+                raise UnitxtError(
+                    f"MultipleChoiceTemplate could not locate textual target '{target}' in choices list: {reference_fields[self.choices_field]}",
+                    Documentation.ADDING_TEMPLATE,
                 ) from e
         return target
             try:
                 target = reference_fields[self.choices_field].index(target)
             except ValueError as e:
+                raise UnitxtError(
+                    f"MultipleChoiceTemplate could not locate textual target '{target}' in choices list: {reference_fields[self.choices_field]}",
+                    Documentation.ADDING_TEMPLATE,
                 ) from e
         choices = self.inputs_to_choices(reference_fields, self.target_choice_format)
         try:
             target = choices[target]
         except IndexError as e:
+            raise UnitxtError(
+                f"MultipleChoiceTemplate cannot find index number {target} in choices: {choices}",
+                Documentation.ADDING_TEMPLATE,
             ) from e
         return target, [target]
         try:
             gold_class_names = reference_fields[self.label_field]
         except KeyError as e:
+            raise UnitxtError(
                 f"Available reference_fields are {list(reference_fields.keys())}, missing required label field: '{self.label_field}'."
             ) from e
         if not isinstance(gold_class_names, list):
+            raise UnitxtError(
                 f"Unexpected value for gold_class_names: '{gold_class_names}'. Expecting a list."
             )
         try:
             queried_class_name = reference_fields[self.class_field]
         except KeyError as e:
+            raise UnitxtError(
                 f"Available reference_fields are {list(reference_fields.keys())}, missing required class field: '{self.class_field}'."
             ) from e
         if not queried_class_name or not isinstance(queried_class_name, str):
+            raise UnitxtError(
                 f"Unexpected value for queried_class_names: '{queried_class_name}'. Expected a string."
             )
         if queried_class_name in gold_class_names:
     ) -> str:
         labels = reference_fields[self.labels_field]
         if not isinstance(labels, list):
+            raise UnitxtError(
+                f"MultiLabelTemplate requires labels field '{self.labels_field}' to be a list. Got {self.labels_field}<{type(labels).__name__}>: {labels}",
+                Documentation.ADDING_TEMPLATE,
             )
         if len(labels) == 0:
             labels = [self.empty_label]
     ) -> List[str]:
         references = reference_fields[self.references_field]
         if not isoftype(references, List[str]):
+            raise UnitxtError(
+                f"MultiReferenceTemplate requires references field '{self.references_field}' to be List[str]. Got {self.references_field}<{type(references).__name__}>: {references}",
+                Documentation.ADDING_TEMPLATE,
             )
         if len(references) == 0:
+            raise UnitxtError(
+                "No references found. MultiReferenceTemplate requires at least one reference.",
+                Documentation.ADDING_TEMPLATE,
             )
         if self.random_reference:

utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import importlib.util
 import json
 import os
@@ -125,3 +126,7 @@ def import_module_from_file(file_path):
     # Load the module
     spec.loader.exec_module(module)
     return module

+import copy
 import importlib.util
 import json
 import os
     # Load the module
     spec.loader.exec_module(module)
     return module
+def deepcopy(obj):
+    return copy.deepcopy(obj)

version.py CHANGED Viewed

	@@ -1 +1 @@
1	- version = "1.12.2"


1	+ version = "1.12.3"