Spaces:

unitxt
/

metric

Running

App Files Files Community

Elron commited on 24 days ago

Commit

357b16c

•

1 Parent(s): 5ba849c

Upload folder using huggingface_hub

Browse files

Files changed (20) hide show

api.py +21 -13
artifact.py +29 -21
augmentors.py +2 -1
dataclass.py +8 -1
dialog_operators.py +7 -5
dict_utils.py +20 -15
formats.py +20 -32
image_operators.py +10 -7
inference.py +107 -102
llm_as_judge.py +40 -12
loaders.py +26 -11
metrics.py +172 -35
operators.py +123 -105
span_lableing_operators.py +22 -16
struct_data_operators.py +125 -86
task.py +16 -12
templates.py +17 -8
type_utils.py +13 -12
utils.py +2 -2
version.py +1 -1

api.py CHANGED Viewed

@@ -93,31 +93,39 @@ def load_dataset(
 ) -> Union[DatasetDict, IterableDatasetDict, Dataset, IterableDataset]:
     """Loads dataset.
-    If the 'dataset_query' argument is provided, then dataset is loaded from a card in local
-    catalog based on parameters specified in the query.
-    Alternatively, dataset is loaded from a provided card based on explicitly given parameters.
     Args:
         dataset_query (str, optional): A string query which specifies a dataset to load from local catalog or name of specific recipe or benchmark in the catalog.
-            For example:
-            "card=cards.wnli,template=templates.classification.multi_class.relation.default".
         streaming (bool, False): When True yields the data as Unitxt streams dictionary
         split (str, optional): The split of the data to load
         disable_cache (str, optional): Disable caching process of the data
         **kwargs: Arguments used to load dataset from provided card, which is not present in local catalog.
     Returns:
         DatasetDict
-    Examples:
-        dataset = load_dataset(
-            dataset_query="card=cards.stsb,template=templates.regression.two_texts.simple,max_train_instances=5"
-        )  # card must be present in local catalog
-        card = TaskCard(...)
-        template = Template(...)
-        loader_limit = 10
-        dataset = load_dataset(card=card, template=template, loader_limit=loader_limit)
     """
     recipe = load_recipe(dataset_query, **kwargs)

 ) -> Union[DatasetDict, IterableDatasetDict, Dataset, IterableDataset]:
     """Loads dataset.
+    If the 'dataset_query' argument is provided, then dataset is loaded from a card
+    in local catalog based on parameters specified in the query.
+    Alternatively, dataset is loaded from a provided card based on explicitly
+    given parameters.
     Args:
         dataset_query (str, optional): A string query which specifies a dataset to load from local catalog or name of specific recipe or benchmark in the catalog.
+        For example: ``"card=cards.wnli,template=templates.classification.multi_class.relation.default".``
         streaming (bool, False): When True yields the data as Unitxt streams dictionary
         split (str, optional): The split of the data to load
         disable_cache (str, optional): Disable caching process of the data
         **kwargs: Arguments used to load dataset from provided card, which is not present in local catalog.
     Returns:
         DatasetDict
+    Example:
+        .. code-block:: python
+            dataset = load_dataset(
+                dataset_query="card=cards.stsb,template=templates.regression.two_texts.simple,max_train_instances=5"
+            )  # card must be present in local catalog
+            card = TaskCard(...)
+            template = Template(...)
+            loader_limit = 10
+            dataset = load_dataset(card=card, template=template, loader_limit=loader_limit)
     """
     recipe = load_recipe(dataset_query, **kwargs)

artifact.py CHANGED Viewed

@@ -89,16 +89,18 @@ class Catalogs:
         self.catalogs = []
-def map_values_in_place(object, mapper):
-    if isinstance(object, dict):
-        for key, value in object.items():
-            object[key] = mapper(value)
-        return object
-    if isinstance(object, list):
-        for i in range(len(object)):
-            object[i] = mapper(object[i])
-        return object
-    return mapper(object)
 def get_closest_artifact_type(type):
@@ -150,8 +152,12 @@ class Artifact(Dataclass):
     )
     @classmethod
-    def is_artifact_dict(cls, d):
-        return isinstance(d, dict) and "__type__" in d
     @classmethod
     def verify_artifact_dict(cls, d):
@@ -292,7 +298,7 @@ class Artifact(Dataclass):
                 field.type, Union[Artifact, List[Artifact], Dict[str, Artifact]]
             ):
                 value = getattr(self, field.name)
-                value = map_values_in_place(value, maybe_recover_artifact)
                 setattr(self, field.name, value)
         self.verify_data_classification_policy()
@@ -343,15 +349,18 @@ class Artifact(Dataclass):
         Args:
             instance (Dict[str, Any]): data which should contain its allowed data
-                classification policies under key 'data_classification_policy'.
             name (Optional[str]): name of artifact which should be used to retrieve
-                data classification from env. If not specified, then either __id__ or
-                 __class__.__name__, are used instead, respectively.
         Returns:
             Dict[str, Any]: unchanged instance.
         Examples:
             instance = {"x": "some_text", "data_classification_policy": ["pii"]}
             # Will raise an error as "pii" is not included policy
@@ -574,11 +583,10 @@ def reset_artifacts_json_cache():
     artifacts_json_cache.cache_clear()
-def maybe_recover_artifact(artifact):
-    if isinstance(artifact, str):
-        return verbosed_fetch_artifact(artifact)
-    return artifact
 def register_all_artifacts(path):

         self.catalogs = []
+def maybe_recover_artifacts_structure(obj):
+    if Artifact.is_possible_identifier(obj):
+        return verbosed_fetch_artifact(obj)
+    if isinstance(obj, dict):
+        for key, value in obj.items():
+            obj[key] = maybe_recover_artifact(value)
+        return obj
+    if isinstance(obj, list):
+        for i in range(len(obj)):
+            obj[i] = maybe_recover_artifact(obj[i])
+        return obj
+    return obj
 def get_closest_artifact_type(type):
     )
     @classmethod
+    def is_artifact_dict(cls, obj):
+        return isinstance(obj, dict) and "__type__" in obj
+    @classmethod
+    def is_possible_identifier(cls, obj):
+        return isinstance(obj, str) or cls.is_artifact_dict(obj)
     @classmethod
     def verify_artifact_dict(cls, d):
                 field.type, Union[Artifact, List[Artifact], Dict[str, Artifact]]
             ):
                 value = getattr(self, field.name)
+                value = maybe_recover_artifacts_structure(value)
                 setattr(self, field.name, value)
         self.verify_data_classification_policy()
         Args:
             instance (Dict[str, Any]): data which should contain its allowed data
+            classification policies under key 'data_classification_policy'.
             name (Optional[str]): name of artifact which should be used to retrieve
+            data classification from env. If not specified, then either ``__id__`` or
+            ``__class__.__name__``, are used instead, respectively.
         Returns:
             Dict[str, Any]: unchanged instance.
         Examples:
+        .. code-block:: python
             instance = {"x": "some_text", "data_classification_policy": ["pii"]}
             # Will raise an error as "pii" is not included policy
     artifacts_json_cache.cache_clear()
+def maybe_recover_artifact(obj):
+    if Artifact.is_possible_identifier(obj):
+        return verbosed_fetch_artifact(obj)
+    return obj
 def register_all_artifacts(path):

augmentors.py CHANGED Viewed

@@ -97,7 +97,8 @@ class AugmentPrefixSuffix(TextAugmentor):
         To prepend the input with a prefix made of 4 ``\n``-s or ``\t``-s, employ
         ``AugmentPrefixSuffix(augment_model_input=True, prefixes=['\n','\t'], prefix_len=4, suffixes = None)``.
-        To append the input with a suffix made of 3 ``\n``-s or ``\t``-s, with ``\n`` being preferred over ``\t``, at 2:1 ratio, employ
         ``AugmentPrefixSuffix(augment_model_input=True, suffixes={'\n':2,'\t':1}, suffix_len=3, prefixes = None)``
         which will append ``\n``-s twice as often as ``\t``-s.

         To prepend the input with a prefix made of 4 ``\n``-s or ``\t``-s, employ
         ``AugmentPrefixSuffix(augment_model_input=True, prefixes=['\n','\t'], prefix_len=4, suffixes = None)``.
+        To append the input with a suffix made of 3 ``\n``-s or ``\t``-s, with ``\n`` being preferred over ``\t``,
+        at 2:1 ratio, employ
         ``AugmentPrefixSuffix(augment_model_input=True, suffixes={'\n':2,'\t':1}, suffix_len=3, prefixes = None)``
         which will append ``\n``-s twice as often as ``\t``-s.

dataclass.py CHANGED Viewed

@@ -533,6 +533,13 @@ class Dataclass(metaclass=DataclassMeta):
             if keep_empty or value is not None
         }
     def __repr__(self) -> str:
         """String representation."""
-        return f"{self.__class__.__name__}({', '.join([f'{field.name}={getattr(self, field.name)!r}' for field in fields(self)])})"

             if keep_empty or value is not None
         }
+    def get_repr_dict(self):
+        result = {}
+        for field in fields(self):
+            if not field.internal:
+                result[field.name] = getattr(self, field.name)
+        return result
     def __repr__(self) -> str:
         """String representation."""
+        return f"{self.__class__.__name__}({', '.join([f'{key}={val!r}' for key, val in self.get_repr_dict().items()])})"

dialog_operators.py CHANGED Viewed

@@ -5,11 +5,13 @@ text that can be fed to the model.
 The format of the dialog is:
-dialog = [
-    {"user": "hello", "system": "hi"},
-    {"user": "kkk", "system": ""},
-    {"user": "kkk", "system": ""},
-]
 """
 from typing import Any, Dict, List, Optional

 The format of the dialog is:
+.. code-block:: text
+    dialog = [
+        {"user": "hello", "system": "hi"},
+        {"user": "kkk", "system": ""},
+        {"user": "kkk", "system": ""},
+    ]
 """
 from typing import Any, Dict, List, Optional

dict_utils.py CHANGED Viewed

@@ -24,29 +24,32 @@ def is_wildcard(string):
 # formal definition of qpath syntax by which a query is specified:
 # qpath -> A (/A)*
 # A -> name | * | non-neg-int
-# name -> name.matches()
-#  * matches ALL members (each and every) of a list or a dictionary element in input dictionary,
 #
-# a path p in dictionary dic is said to match query qpath if it satisfies the following recursively
-# defined condition:
-# (1) the prefix of length 0 of p (i.e., pref = "") matches the whole of dic. Also denoted here: pref leads to dic.
-# (2) Denoting by el the element in dic lead to by prefix pref of qpath (el must be a list or dictionary),
-# and by A (as the definition above) the component, DIFFERENT from *, in qpath, that follows pref, the element
-# lead to by pref/A is el[A]. If el[A] is missing from dic, then no path in dic matches prefix pref/A of qpath,
 # and hence no path in dic matches query qpath. (E.g., when el is a list, A must match indx, and its
 # int value should be smaller than len(el) in order for the path in dic leading to element el[A] to match pref/A)
-# (3) Denoting as in (2), now with A == * : when el is a list, each and every element in the set:
 # {el[0], el[1], .. , el[len(el)-1]} is said to be lead to by a path matching pref/*
 # and when el is a dict, each and every element in the set {el[k] for k being a key in el} is said to be lead
 # to by a path matching pref/*
 #
 # An element el lead to by path p that matches qpath as a whole is thus either a list member (when indx.match the last
-# component of p, indexing into el) or a dictionary item (the key of which equals the last component of p). The value
-# of el (i.e. el[last component of p]) is returned (dic_get) or popped (dic_delete) or replaced by a new value (dic_set).
 #
 # Thus, for a query with no *, dic contains at most one element the path to which matches the query.
 # If there is such one in dic - the function (either dict_get, dict_set, or dict_delete) operates on
-# that element according to its arguments, other than not_exist_ok
 # If there is not any such element in dic - the function throws or does not throw an exception, depending
 # on flag not_exist_ok.
 # For a query with *, there could be up to as many as there are values to match the *
@@ -54,9 +57,9 @@ def is_wildcard(string):
 # for more than one * in the query -- this effect multiplies)
 # Each of the three functions below (dict_get, dict_set, dict_delete) applies the requested
 # operation (read, set, or delete) to each and every element el in dic, the path to which matches the query in whole,
-# and reads a value from, or sets a new value to, or pops the value out from dic.
 #
-# If no path in dic matches the query, then # if not_exist_ok=False, the function throws an exception;
 # but if not_exist_ok=True, the function returns a default value (dict_get) or does nothing (dict_delete)
 # or generates all the needed missing suffixes (dict_set, see details below).
 #
@@ -444,7 +447,9 @@ def dict_get(
     )
     if len(components) > 1:
         try:
-            success, values = get_values(dic, components, -1 * len(components))
             if success:
                 return values
         except Exception as e:

 # formal definition of qpath syntax by which a query is specified:
 # qpath -> A (/A)*
 # A -> name | * | non-neg-int
+# name -> a string satisfying is_name above.
+#  * -> ALL members (each and every) of a list or a dictionary element in the input dictionary,
 #
+# A path p in dictionary dic, leading to element (aka subfield) el, is said to match query qpath
+# (alternatively said: query qpath matches path p in dic),
+# if the following recursively defined condition is satisfied:
+# (1) the prefix of length 0 of qpath (i.e., pref = "") matches the empty path in dic, the path leading to the whole of dic.
+# (2) Denoting by el the element in dic lead to by the path in dic that matches the prefix pref of qpath
+# (el must be a list or dictionary, since led to by a path matching a prefix of qpath, and not the whole of qpath),
+# and by A (as the definition above) the component, DIFFERENT from *, in qpath, that follows pref, then the element
+# lead to by the path in dic matching query pref/A is el[A]. If el[A] is missing from dic, then no path in dic matches
+# pref/A, that is either a longer prefix of qpath, or the whole of qpath,
 # and hence no path in dic matches query qpath. (E.g., when el is a list, A must match indx, and its
 # int value should be smaller than len(el) in order for the path in dic leading to element el[A] to match pref/A)
+# (3) Denoting as in (2), now with A == * , then when el is a list, each and every element in the set:
 # {el[0], el[1], .. , el[len(el)-1]} is said to be lead to by a path matching pref/*
 # and when el is a dict, each and every element in the set {el[k] for k being a key in el} is said to be lead
 # to by a path matching pref/*
 #
 # An element el lead to by path p that matches qpath as a whole is thus either a list member (when indx.match the last
+# component of p) or a dictionary item (the key of which equals the last component of p). The value
+# of el is returned (dic_get) or el is popped (dic_delete) or el's value is replaced by a new value (dic_set).
 #
 # Thus, for a query with no *, dic contains at most one element the path to which matches the query.
 # If there is such one in dic - the function (either dict_get, dict_set, or dict_delete) operates on
+# that element according to its arguments, other than not_exist_ok.
 # If there is not any such element in dic - the function throws or does not throw an exception, depending
 # on flag not_exist_ok.
 # For a query with *, there could be up to as many as there are values to match the *
 # for more than one * in the query -- this effect multiplies)
 # Each of the three functions below (dict_get, dict_set, dict_delete) applies the requested
 # operation (read, set, or delete) to each and every element el in dic, the path to which matches the query in whole,
+# and reads a value from, or sets a new value to, or pops el out from dic.
 #
+# If no path in dic matches the query, then if not_exist_ok=False, the function throws an exception;
 # but if not_exist_ok=True, the function returns a default value (dict_get) or does nothing (dict_delete)
 # or generates all the needed missing suffixes (dict_set, see details below).
 #
     )
     if len(components) > 1:
         try:
+            success, values = get_values(
+                dic, components, -1 * len(components), allow_int_index=allow_int_index
+            )
             if success:
                 return values
         except Exception as e:

formats.py CHANGED Viewed

@@ -28,26 +28,26 @@ class Format(InstanceOperator):
 def apply_capital_new_line_notation(text: str) -> str:
     r"""Transforms a given string by applying the Capital New Line Notation.
-    The Capital New Line Notation (\N) is designed to manage newline behavior in a string efficiently.
-    This custom notation aims to consolidate multiple newline characters (\n) into a single newline under
     specific conditions, with tailored handling based on whether there's preceding text. The function
     distinguishes between two primary scenarios:
-    1. If there's text (referred to as a prefix) followed by any number of \n characters and then one or
-    more \N, the entire sequence is replaced with a single \n. This effectively simplifies multiple
     newlines and notation characters into a single newline when there's preceding text.
-    2. If the string starts with \n characters followed by \N without any text before this sequence, or if
-    \N is at the very beginning of the string, the sequence is completely removed. This case is
     applicable when the notation should not introduce any newlines due to the absence of preceding text.
     Args:
-        text (str): The input string to be transformed, potentially containing the Capital New Line Notation
-                        (\N) mixed with actual newline characters (\n).
     Returns:
-        str: The string after applying the Capital New Line Notation rules, which either consolidates multiple
-            newlines and notation characters into a single newline when text precedes them, or removes the
-            notation and any preceding newlines entirely if no text is present before the notation.
     Examples:
         >>> apply_capital_new_line_notation("Hello World\\n\\n\N")
@@ -131,27 +131,26 @@ class BaseFormat(Format):
 class SystemFormat(BaseFormat):
     r"""Generates the whole input to the model, from constant strings that are given as args, and from values found in specified fields of the instance.
-    Important: formats can use '\N' notations that means new-line if no new-line before and no empty string before.
     SystemFormat expects the input instance to contain:
     1. A field named "system_prompt" whose value is a string (potentially empty) that delivers a task-independent opening text.
     2. A field named "source" whose value is a string verbalizing the original values in the instance (as read
     from the source dataset), in the context of the underlying task.
     3. A field named "instruction" that contains a (non-None) string.
-    4. A field named with the value in arg 'demos_field', containing a list of dicts, each dict with fields "source"
     and "target", representing a single demo.
     5. A field named "target_prefix" that contains a string to prefix the target in each demo, and to end the whole generated prompt
-    SystemFormat formats the above fields into a single string to be inputted to the model. This string overwrites
-    field "source" of the instance. Formatting is driven by two args: 'demo_format' and 'model_input_format'.
     SystemFormat also pops fields "system_prompt", "instruction", "target_prefix",  and the field containing the demos out from the input instance.
     Args:
         demos_field (str): the name of the field that contains the demos, being a list of dicts, each with "source" and "target" keys
         demo_format (str): formatting string for a single demo, combining fields "source" and "target"
-        model_input_format (str) overall product format, combining instruction and source (as read from fields "instruction"
-        and "source" of the input instance), together with demos (as formatted into one string)
-        format_args: Dict[str,str]: additional format args to be used when formatting the different format strings
     Example:
         when input instance:
@@ -423,24 +422,13 @@ class ChatAPIFormat(BaseFormat):
 class HFSystemFormat(ChatAPIFormat):
     r"""Formats the complete input for the model using the HuggingFace chat template of a given model.
-    HFSystemFormat expects the input instance to contain:
-    1. A field named "system_prompt" whose value is a string (potentially empty) that delivers a task-independent opening text.
-    2. A field named "source" whose value is a string verbalizing the original values in the instance (as read
-    from the source dataset), in the context of the underlying task.
-    3. A field named "instruction" that contains a (non-None) string.
-    4. A field named with the value in arg 'demos_field', containing a list of dicts, each dict with fields "source"
-    and "target", representing a single demo.
-    5. A field named "target_prefix" that contains a string to prefix the target in each demo, and to end the whole generated prompt.
-    SystemFormat formats the above fields into a single string to be inputted to the model. This string overwrites
     field "source" of the instance.
     Example:
-        HFSystemFormat(model_name="HuggingFaceH4/zephyr-7b-beta")
-        Uses the template defined the in tokenizer_config.json of the model:
-        "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
         See more details in https://huggingface.co/docs/transformers/main/en/chat_templating

 def apply_capital_new_line_notation(text: str) -> str:
     r"""Transforms a given string by applying the Capital New Line Notation.
+    The Capital New Line Notation ``(\N)`` is designed to manage newline behavior in a string efficiently.
+    This custom notation aims to consolidate multiple newline characters ``(\n)`` into a single newline under
     specific conditions, with tailored handling based on whether there's preceding text. The function
     distinguishes between two primary scenarios:
+    1. If there's text (referred to as a prefix) followed by any number of ``\n`` characters and then one or
+    more ``\N``, the entire sequence is replaced with a single ``\n``. This effectively simplifies multiple
     newlines and notation characters into a single newline when there's preceding text.
+    2. If the string starts with ``\n`` characters followed by ``\N`` without any text before this sequence, or if
+    ``\N`` is at the very beginning of the string, the sequence is completely removed. This case is
     applicable when the notation should not introduce any newlines due to the absence of preceding text.
     Args:
+        text (str): The input string to be transformed, potentially containing the Capital New Line Notation ``(\N)`` mixed with actual newline characters ``(\n)``.
     Returns:
+        The string after applying the Capital New Line Notation rules, which either consolidates multiple
+        newlines and notation characters into a single newline when text precedes them, or removes the
+        notation and any preceding newlines entirely if no text is present before the notation.
     Examples:
         >>> apply_capital_new_line_notation("Hello World\\n\\n\N")
 class SystemFormat(BaseFormat):
     r"""Generates the whole input to the model, from constant strings that are given as args, and from values found in specified fields of the instance.
+    Important: formats can use ``'\N'`` notations that means new-line if no new-line before and no empty string before.
     SystemFormat expects the input instance to contain:
     1. A field named "system_prompt" whose value is a string (potentially empty) that delivers a task-independent opening text.
     2. A field named "source" whose value is a string verbalizing the original values in the instance (as read
     from the source dataset), in the context of the underlying task.
     3. A field named "instruction" that contains a (non-None) string.
+    4. A field named with the value in arg ``'demos_field'``, containing a list of dicts, each dict with fields "source"
     and "target", representing a single demo.
     5. A field named "target_prefix" that contains a string to prefix the target in each demo, and to end the whole generated prompt
+    SystemFormat formats the above fields into a single string to be input to the model. This string overwrites
+    field "source" of the instance. Formatting is driven by two args: ``'demo_format'`` and ``'model_input_format'``.
     SystemFormat also pops fields "system_prompt", "instruction", "target_prefix",  and the field containing the demos out from the input instance.
     Args:
         demos_field (str): the name of the field that contains the demos, being a list of dicts, each with "source" and "target" keys
         demo_format (str): formatting string for a single demo, combining fields "source" and "target"
+        model_input_format (str): overall product format, combining instruction and source (as read from fields "instruction" and "source" of the input instance), together with demos (as formatted into one string)
+        format_args (Dict[str,str]): additional format args to be used when formatting the different format strings
     Example:
         when input instance:
 class HFSystemFormat(ChatAPIFormat):
     r"""Formats the complete input for the model using the HuggingFace chat template of a given model.
+    HFSystemFormat formats instance fields into a single string to be inputted to the model. This string overwrites
     field "source" of the instance.
     Example:
+        ``HFSystemFormat(model_name="HuggingFaceH4/zephyr-7b-beta")``  Uses the template defined the in tokenizer_config.json of the model:
+        ``"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"``
         See more details in https://huggingface.co/docs/transformers/main/en/chat_templating

image_operators.py CHANGED Viewed

@@ -167,12 +167,14 @@ class GridLines(ImageAugmentor):
     """A class that overlays a fixed number of evenly spaced horizontal and vertical lines on an image.
     Attributes:
-    - num_lines (int): The number of horizontal and vertical lines to add.
-    - line_thickness (int): Thickness of each line in pixels.
-    - line_color (Tuple[int, int, int]): RGB color of the grid lines.
     Methods:
-    - process_image(image): Adds grid lines to the provided image and returns the modified image.
     """
     num_lines: int = 128
@@ -207,11 +209,12 @@ class PixelNoise(ImageAugmentor):
     """A class that overlays a mask of randomly colored nxn squares across an image based on a specified noise rate.
     Attributes:
-    - square_size (int): Size of each square in pixels.
-    - noise_rate (float): Proportion of the image that should be affected by noise (0 to 1).
     Methods:
-    - process_image(image): Adds the random square mask to the provided image and returns the modified image.
     """
     square_size: int = 1

     """A class that overlays a fixed number of evenly spaced horizontal and vertical lines on an image.
     Attributes:
+        num_lines (int): The number of horizontal and vertical lines to add.
+        line_thickness (int): Thickness of each line in pixels.
+        line_color (Tuple[int, int, int]): RGB color of the grid lines.
     Methods:
+        process_image(image): Adds grid lines to the provided image and returns the modified image.
     """
     num_lines: int = 128
     """A class that overlays a mask of randomly colored nxn squares across an image based on a specified noise rate.
     Attributes:
+        square_size (int): Size of each square in pixels.
+        noise_rate (float): Proportion of the image that should be affected by noise (0 to 1).
     Methods:
+        process_image(image): Adds the random square mask to the provided image and returns the modified image.
     """
     square_size: int = 1

inference.py CHANGED Viewed

@@ -23,7 +23,7 @@ from typing import (
     Union,
 )
-from datasets import DatasetDict
 from tqdm import tqdm, trange
 from tqdm.asyncio import tqdm_asyncio
@@ -70,21 +70,26 @@ class TextGenerationInferenceOutput:
     """Contains the prediction results and metadata for the inference.
     Args:
-    prediction (Union[str, List[Dict[str, Any]]]): If this is the result of an _infer call, the string predicted by the model.
-    If this is the results of an _infer_log_probs call, a list of dictionaries. The i'th dictionary represents
-    the i'th token in the response. The entry "top_tokens" in the dictionary holds a sorted list of the top tokens
-    for this position and their probabilities.
-    For example: [ {.. "top_tokens": [ {"text": "a", 'logprob': },  {"text": "b", 'logprob': } ....]},
-                   {.. "top_tokens": [ {"text": "c", 'logprob': },  {"text": "d", 'logprob': } ....]}
-                ]
-    input_tokens (int) : number of input tokens to the model.
-    output_tokens (int) : number of output tokens to the model.
-    stop_reason (str): stop reason for text generation, for example "eos" (end of string).
-    seed (int): seed used by the model during generation.
-    input_text (str): input to the model.
-    model_name (str): the model_name as kept in the InferenceEngine.
-    inference_type (str): The label stating the type of the InferenceEngine.
     """
     prediction: Union[str, List[Dict[str, Any]]]
@@ -103,7 +108,7 @@ class InferenceEngine(Artifact):
     @abc.abstractmethod
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         """Perform inference on the input dataset.
@@ -126,7 +131,7 @@ class InferenceEngine(Artifact):
     def infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         """Verifies instances of a dataset and perform inference on the input dataset.
@@ -134,6 +139,10 @@ class InferenceEngine(Artifact):
         If return_meta_data - returns a list of TextGenerationInferenceOutput, else returns a list of the string
         predictions.
         """
         if return_meta_data and not hasattr(self, "get_return_object"):
             raise NotImplementedError(
                 f"Inference engine {self.__class__.__name__} does not support return_meta_data as it "
@@ -147,7 +156,7 @@ class InferenceEngine(Artifact):
     def _mock_infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         return [str(instance["source"]) for instance in dataset]
@@ -198,7 +207,7 @@ class LogProbInferenceEngine(abc.ABC, Artifact):
     @abc.abstractmethod
     def _infer_log_probs(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         """Perform inference on the input dataset  that returns log probs.
@@ -211,7 +220,7 @@ class LogProbInferenceEngine(abc.ABC, Artifact):
     def infer_log_probs(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         """Verifies instances of a dataset and performs inference that returns log probabilities of top tokens.
@@ -446,7 +455,7 @@ class HFInferenceEngineBase(
     def infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         if not self._is_loaded():
@@ -456,14 +465,14 @@ class HFInferenceEngineBase(
     @abc.abstractmethod
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         raise NotImplementedError
     def infer_log_probs(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         if not self._is_loaded():
@@ -473,7 +482,7 @@ class HFInferenceEngineBase(
     @abc.abstractmethod
     def _infer_log_probs(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         raise NotImplementedError
@@ -524,7 +533,7 @@ class HFAutoModelInferenceEngine(HFInferenceEngineBase):
     def _infer_fn(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool,
         return_logprobs: bool,
     ) -> Union[List[str], List[Dict], List[TextGenerationInferenceOutput]]:
@@ -565,7 +574,7 @@ class HFAutoModelInferenceEngine(HFInferenceEngineBase):
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         self.verify_not_chat_api(dataset)
@@ -573,7 +582,7 @@ class HFAutoModelInferenceEngine(HFInferenceEngineBase):
     def _infer_log_probs(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         self.verify_not_chat_api(dataset)
@@ -647,7 +656,7 @@ class HFLlavaInferenceEngine(HFInferenceEngineBase):
     def _infer_fn(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool,
         return_logprobs: bool,
     ) -> Union[List[str], List[Dict], List[TextGenerationInferenceOutput]]:
@@ -681,14 +690,14 @@ class HFLlavaInferenceEngine(HFInferenceEngineBase):
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         return self._infer_fn(dataset, return_meta_data, False)
     def _infer_log_probs(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         return self._infer_fn(dataset, return_meta_data, True)
@@ -879,7 +888,7 @@ class HFPipelineBasedInferenceEngine(
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         if not self._is_loaded():
@@ -933,13 +942,13 @@ class MockInferenceEngine(InferenceEngine, LogProbInferenceEngine):
     def _mock_infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         return [self.default_inference_value for _ in dataset]
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         return [
@@ -951,7 +960,7 @@ class MockInferenceEngine(InferenceEngine, LogProbInferenceEngine):
     def _infer_log_probs(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         return [
@@ -1047,14 +1056,14 @@ class GenericInferenceEngine(
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         return self.engine._infer(dataset)
     def _infer_log_probs(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         if not isinstance(self.engine, LogProbInferenceEngine):
@@ -1082,7 +1091,7 @@ class OllamaInferenceEngine(
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         import ollama
@@ -1250,7 +1259,7 @@ class IbmGenAiInferenceEngine(
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         from genai.schema import TextGenerationParameters, TextGenerationResult
@@ -1279,7 +1288,7 @@ class IbmGenAiInferenceEngine(
     def _infer_log_probs(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         from genai.schema import TextGenerationParameters, TextGenerationResult
@@ -1507,7 +1516,7 @@ class OpenAiInferenceEngine(
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         outputs = []
@@ -1527,22 +1536,14 @@ class OpenAiInferenceEngine(
     def _infer_log_probs(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         outputs = []
         for instance in tqdm(dataset, desc="Inferring with openAI API"):
             response = self.client.chat.completions.create(
-                messages=[
-                    # {
-                    #     "role": "system",
-                    #     "content": self.system_prompt,
-                    # },
-                    {
-                        "role": "user",
-                        "content": instance["source"],
-                    }
-                ],
                 model=self.model_name,
                 **self._get_completion_kwargs(),
             )
@@ -1681,7 +1682,7 @@ class TogetherAiInferenceEngine(
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         from together.types.models import ModelType
@@ -1943,7 +1944,7 @@ class WMLInferenceEngineBase(
     @abc.abstractmethod
     def _send_requests(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_logprobs: bool,
         return_meta_data: bool,
     ) -> Union[List[str], List[Dict], List[TextGenerationInferenceOutput]]:
@@ -1955,7 +1956,7 @@ class WMLInferenceEngineBase(
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         if self._model is None:
@@ -1969,7 +1970,7 @@ class WMLInferenceEngineBase(
     def _infer_log_probs(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         if self._model is None:
@@ -2050,27 +2051,29 @@ class WMLInferenceEngineGeneration(WMLInferenceEngineBase, WMLGenerationParamsMi
     Attributes:
         concurrency_limit (int): Number of concurrent requests sent to a model. Default is 10,
-            which is also the maximum value.
     Examples:
-        from .api import load_dataset
-        wml_credentials = {
-            "url": "some_url", "project_id": "some_id", "api_key": "some_key"
-        }
-        model_name = "google/flan-t5-xxl"
-        wml_inference = WMLInferenceEngineGeneration(
-            credentials=wml_credentials,
-            model_name=model_name,
-            data_classification_policy=["public"],
-            top_p=0.5,
-            random_seed=123,
-        )
-        dataset = load_dataset(
-            dataset_query="card=cards.argument_topic,template_card_index=0,loader_limit=5"
-        )
-        results = wml_inference.infer(dataset["test"])
     """
     concurrency_limit: int = 10
@@ -2112,7 +2115,7 @@ class WMLInferenceEngineGeneration(WMLInferenceEngineBase, WMLGenerationParamsMi
     def _send_requests(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_logprobs: bool,
         return_meta_data: bool,
     ) -> Union[List[str], List[Dict], List[TextGenerationInferenceOutput]]:
@@ -2178,31 +2181,33 @@ class WMLInferenceEngineChat(WMLInferenceEngineBase, WMLChatParamsMixin):
     Attributes:
         image_encoder (EncodeImageToString, optional): operator which encodes images in
-            given format to base64 strings required by service. You should specify it when
-            you are using images in your inputs.
     Example:
-        from .api import load_dataset
-        from .image_operators
-        image_encoder = EncodeImageToString(image_format="JPEG")
-        wml_credentials = {
-            "url": "some_url", "project_id": "some_id", "api_key": "some_key"
-        }
-        model_name = "meta-llama/llama-3-2-11b-vision-instruct"
-        wml_inference = WMLInferenceEngineChat(
-            credentials=wml_credentials,
-            model_name=model_name,
-            image_encoder=image_encoder,
-            data_classification_policy=["public"],
-            max_tokens=1024,
-        )
-        dataset = load_dataset(
-            dataset_query="card=cards.doc_vqa.en,template=templates.qa.with_context.with_type,loader_limit=30"
-        )
-        results = wml_inference.infer(dataset["test"])
     """
     image_encoder: Optional[EncodeImageToString] = None
@@ -2303,7 +2308,7 @@ class WMLInferenceEngineChat(WMLInferenceEngineBase, WMLChatParamsMixin):
     def _send_requests(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_logprobs: bool,
         return_meta_data: bool,
     ) -> Union[List[str], List[Dict], List[TextGenerationInferenceOutput]]:
@@ -2428,7 +2433,7 @@ class LMMSEvalInferenceEngine(LMMSEvalBaseInferenceEngine):
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         if not self._is_loaded():
@@ -2500,7 +2505,7 @@ class LMMSEvalLoglikelihoodInferenceEngine(LMMSEvalBaseInferenceEngine):
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         if not self._is_loaded():
@@ -2555,7 +2560,7 @@ class VLLMInferenceEngine(
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         inputs = []
@@ -2681,7 +2686,7 @@ class LiteLLMInferenceEngine(
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], "DatasetDict"],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         """Main inference entry point."""
@@ -2735,8 +2740,8 @@ class CrossProviderInferenceEngine(InferenceEngine, StandardAPIParamsMixin):
             "granite-3-8b-instruct": "ibm/granite-3-8b-instruct",
         },
         "together-ai": {
-            "llama-3-8b-instruct": "together_ai/togethercomputer/llama-3-8b-instruct",
-            "llama-3-70b-instruct": "together_ai/togethercomputer/llama-3-70b-instruct",
             "llama-3-2-1b-instruct": "together_ai/togethercomputer/llama-3-2-1b-instruct",
         },
         "aws": {
@@ -2812,7 +2817,7 @@ class CrossProviderInferenceEngine(InferenceEngine, StandardAPIParamsMixin):
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         return self.engine._infer(dataset, return_meta_data)
@@ -2898,7 +2903,7 @@ class HFOptionSelectingInferenceEngine(InferenceEngine):
     def _infer(
         self,
-        dataset: Union[List[Dict[str, Any]], DatasetDict],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         inputs = []

     Union,
 )
+from datasets import Dataset, DatasetDict
 from tqdm import tqdm, trange
 from tqdm.asyncio import tqdm_asyncio
     """Contains the prediction results and metadata for the inference.
     Args:
+        prediction (Union[str, List[Dict[str, Any]]]): If this is the result of an _infer call, the string predicted by the model.
+        | If this is the results of an _infer_log_probs call, a list of dictionaries. The i'th dictionary represents
+          the i'th token in the response. The entry "top_tokens" in the dictionary holds a sorted list of the top tokens
+          for this position and their probabilities.
+        | For example: ``[ {.. "top_tokens": [ {"text": "a", 'logprob': },  {"text": "b", 'logprob': } ....]},
+          {.. "top_tokens": [ {"text": "c", 'logprob': },  {"text": "d", 'logprob': } ....]} ]``
+        input_tokens (int) : number of input tokens to the model.
+        output_tokens (int) : number of output tokens to the model.
+        stop_reason (str): stop reason for text generation, for example "eos" (end of string).
+        seed (int): seed used by the model during generation.
+        input_text (str): input to the model.
+        model_name (str): the model_name as kept in the InferenceEngine.
+        inference_type (str): The label stating the type of the InferenceEngine.
     """
     prediction: Union[str, List[Dict[str, Any]]]
     @abc.abstractmethod
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         """Perform inference on the input dataset.
     def infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         """Verifies instances of a dataset and perform inference on the input dataset.
         If return_meta_data - returns a list of TextGenerationInferenceOutput, else returns a list of the string
         predictions.
         """
+        if not isoftype(dataset, Union[List[Dict[str, Any]], Dataset]):
+            raise Exception(
+                "Dataset passed to infer() is not list of dictionaries or Huggingface Dataset"
+            )
         if return_meta_data and not hasattr(self, "get_return_object"):
             raise NotImplementedError(
                 f"Inference engine {self.__class__.__name__} does not support return_meta_data as it "
     def _mock_infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         return [str(instance["source"]) for instance in dataset]
     @abc.abstractmethod
     def _infer_log_probs(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         """Perform inference on the input dataset  that returns log probs.
     def infer_log_probs(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         """Verifies instances of a dataset and performs inference that returns log probabilities of top tokens.
     def infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         if not self._is_loaded():
     @abc.abstractmethod
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         raise NotImplementedError
     def infer_log_probs(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         if not self._is_loaded():
     @abc.abstractmethod
     def _infer_log_probs(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         raise NotImplementedError
     def _infer_fn(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool,
         return_logprobs: bool,
     ) -> Union[List[str], List[Dict], List[TextGenerationInferenceOutput]]:
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         self.verify_not_chat_api(dataset)
     def _infer_log_probs(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         self.verify_not_chat_api(dataset)
     def _infer_fn(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool,
         return_logprobs: bool,
     ) -> Union[List[str], List[Dict], List[TextGenerationInferenceOutput]]:
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         return self._infer_fn(dataset, return_meta_data, False)
     def _infer_log_probs(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         return self._infer_fn(dataset, return_meta_data, True)
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         if not self._is_loaded():
     def _mock_infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         return [self.default_inference_value for _ in dataset]
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         return [
     def _infer_log_probs(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         return [
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         return self.engine._infer(dataset)
     def _infer_log_probs(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         if not isinstance(self.engine, LogProbInferenceEngine):
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         import ollama
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         from genai.schema import TextGenerationParameters, TextGenerationResult
     def _infer_log_probs(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         from genai.schema import TextGenerationParameters, TextGenerationResult
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         outputs = []
     def _infer_log_probs(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         outputs = []
         for instance in tqdm(dataset, desc="Inferring with openAI API"):
+            messages = self.to_messages(instance)
             response = self.client.chat.completions.create(
+                messages=messages,
                 model=self.model_name,
                 **self._get_completion_kwargs(),
             )
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         from together.types.models import ModelType
     @abc.abstractmethod
     def _send_requests(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_logprobs: bool,
         return_meta_data: bool,
     ) -> Union[List[str], List[Dict], List[TextGenerationInferenceOutput]]:
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         if self._model is None:
     def _infer_log_probs(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
         if self._model is None:
     Attributes:
         concurrency_limit (int): Number of concurrent requests sent to a model. Default is 10,
+        which is also the maximum value.
     Examples:
+        .. code-block:: python
+            from .api import load_dataset
+            wml_credentials = {
+                "url": "some_url", "project_id": "some_id", "api_key": "some_key"
+            }
+            model_name = "google/flan-t5-xxl"
+            wml_inference = WMLInferenceEngineGeneration(
+                credentials=wml_credentials,
+                model_name=model_name,
+                data_classification_policy=["public"],
+                top_p=0.5,
+                random_seed=123,
+            )
+            dataset = load_dataset(
+                dataset_query="card=cards.argument_topic,template_card_index=0,loader_limit=5"
+            )
+            results = wml_inference.infer(dataset["test"])
     """
     concurrency_limit: int = 10
     def _send_requests(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_logprobs: bool,
         return_meta_data: bool,
     ) -> Union[List[str], List[Dict], List[TextGenerationInferenceOutput]]:
     Attributes:
         image_encoder (EncodeImageToString, optional): operator which encodes images in
+        given format to base64 strings required by service. You should specify it when
+        you are using images in your inputs.
     Example:
+        .. code-block:: python
+            from .api import load_dataset
+            from .image_operators
+            image_encoder = EncodeImageToString(image_format="JPEG")
+            wml_credentials = {
+                "url": "some_url", "project_id": "some_id", "api_key": "some_key"
+            }
+            model_name = "meta-llama/llama-3-2-11b-vision-instruct"
+            wml_inference = WMLInferenceEngineChat(
+                credentials=wml_credentials,
+                model_name=model_name,
+                image_encoder=image_encoder,
+                data_classification_policy=["public"],
+                max_tokens=1024,
+            )
+            dataset = load_dataset(
+                dataset_query="card=cards.doc_vqa.en,template=templates.qa.with_context.with_type,loader_limit=30"
+            )
+            results = wml_inference.infer(dataset["test"])
     """
     image_encoder: Optional[EncodeImageToString] = None
     def _send_requests(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_logprobs: bool,
         return_meta_data: bool,
     ) -> Union[List[str], List[Dict], List[TextGenerationInferenceOutput]]:
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         if not self._is_loaded():
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         if not self._is_loaded():
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         inputs = []
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         """Main inference entry point."""
             "granite-3-8b-instruct": "ibm/granite-3-8b-instruct",
         },
         "together-ai": {
+            "llama-3-8b-instruct": "together_ai/meta-llama/Llama-3-8b-chat-hf",
+            "llama-3-70b-instruct": "together_ai/meta-llama/Llama-3-70b-chat-hf",
             "llama-3-2-1b-instruct": "together_ai/togethercomputer/llama-3-2-1b-instruct",
         },
         "aws": {
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         return self.engine._infer(dataset, return_meta_data)
     def _infer(
         self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         inputs = []

llm_as_judge.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Literal, Optional
 from .api import infer
 from .dataclass import Field
-from .formats import Format, SystemFormat
 from .inference import InferenceEngine, LogProbInferenceEngine, OpenAiInferenceEngine
 from .metrics import BulkInstanceMetric
 from .operator import SequentialOperator
@@ -65,12 +65,17 @@ class LLMAsJudgeBase(BulkInstanceMetric, ArtifactFetcherMixin):
             )
         if isinstance(self.inference_model, OpenAiInferenceEngine):
-            if self.format and type(self.format) is not SystemFormat:
-                raise ValueError(
-                    "Error in 'LLMAsJudge' metric. Inference model 'OpenAiInferenceEngine' does "
-                    "not support formatting. Please remove the format definition from the recipe"
-                    " (OpenAi Chat API take care of the formatting automatically)."
-                )
             if self.system_prompt and type(self.system_prompt) is not EmptySystemPrompt:
                 raise ValueError(
                     "Error in 'LLMAsJudge' metric. Inference model 'OpenAiInferenceEngine' does "
@@ -132,16 +137,24 @@ class LLMAsJudge(LLMAsJudgeBase):
     Attributes:
         main_score (str): The main score label used for evaluation.
         task (Literal["rating.single_turn","rating.single_turn_with_reference",
         "pairwise_comparative_rating.single_turn"]): The type of task the llm as judge runs.
-         This defines the output and input format of the judge model.
         template (Template): The template used when generating inputs for the judge llm.
         format (Format): The format used when generating inputs for judge llm.
         system_prompt (SystemPrompt): The system prompt used when generating inputs for judge llm.
         strip_system_prompt_and_format_from_inputs (bool): Whether to strip the system prompt and formatting from the
-         inputs that the models that is being judges received, when they are inserted to the llm-as-judge prompt.
         inference_model (InferenceEngine): The module that creates the inference of the judge llm.
         reduction_map (dict): A dictionary specifying the reduction method for the metric.
         batch_size (int): The size of the bulk.
     """
@@ -318,22 +331,34 @@ class TaskBasedLLMasJudge(LLMAsJudgeBase):
     Attributes:
         main_score (str): The main score label used for evaluation.
         task (str): The type of task the llm as judge runs.
         This defines the output and input format of the judge model.
         template (Template): The template used when generating inputs for the judge llm.
         format (Format): The format used when generating inputs for judge llm.
         system_prompt (SystemPrompt): The system prompt used when generating inputs for judge llm.
         strip_system_prompt_and_format_from_inputs (bool): Whether to strip the system prompt and formatting from the
-         inputs that the models that is being judges received, when they are inserted to the llm-as-judge prompt.
         inference_model (InferenceEngine): The module that creates the inference of the judge llm.
         reduction_map (dict): A dictionary specifying the reduction method for the metric.
         batch_size (int): The size of the bulk.
         infer_log_probs(bool): whether to perform the inference using logprobs. If true, the template's
         post-processing must support the logprobs output.
         judge_to_generator_fields_mapping (Dict[str, str]): optional mapping between the names of the fields in the generator task and the
         judge task. For example, if the generator task uses "reference_answers" and the judge task  expect "ground_truth",
         include  {"ground_truth": "reference_answers"} in this dictionary.
-        prediction_field: if indicated, and prediction exist, copy prediction to this field name in task_data.
         include_meta_data (bool): whether to include the inference per-instance metadata in the returned results.
     """
@@ -384,7 +409,10 @@ class TaskBasedLLMasJudge(LLMAsJudgeBase):
     # if format is not directly set in constructor, choose according to the inference model
     def set_format_for_inference_engine(self):
         model_name = self.inference_model.get_engine_id()
-        if re.search("llama.?3.*instruct", model_name):
             format_name = "formats.llama3_instruct"
         else:
             format_name = "formats.empty"

 from .api import infer
 from .dataclass import Field
+from .formats import ChatAPIFormat, Format, SystemFormat
 from .inference import InferenceEngine, LogProbInferenceEngine, OpenAiInferenceEngine
 from .metrics import BulkInstanceMetric
 from .operator import SequentialOperator
             )
         if isinstance(self.inference_model, OpenAiInferenceEngine):
+            if self.format and type(self.format) is not ChatAPIFormat:
+                if not (
+                    type(self.format) is SystemFormat
+                    and self.format.__id__ == "formats.empty"
+                ):
+                    raise ValueError(
+                        "Error in 'LLMAsJudge' metric. Inference model 'OpenAiInferenceEngine' does "
+                        "not support formatting. Please remove the format definition from the recipe,"
+                        "or set the format to either 'formats.empty' or 'formats.chat_api'"
+                        " (OpenAi Chat API take care of the formatting automatically)."
+                    )
             if self.system_prompt and type(self.system_prompt) is not EmptySystemPrompt:
                 raise ValueError(
                     "Error in 'LLMAsJudge' metric. Inference model 'OpenAiInferenceEngine' does "
     Attributes:
         main_score (str): The main score label used for evaluation.
         task (Literal["rating.single_turn","rating.single_turn_with_reference",
         "pairwise_comparative_rating.single_turn"]): The type of task the llm as judge runs.
+        This defines the output and input format of the judge model.
         template (Template): The template used when generating inputs for the judge llm.
         format (Format): The format used when generating inputs for judge llm.
         system_prompt (SystemPrompt): The system prompt used when generating inputs for judge llm.
         strip_system_prompt_and_format_from_inputs (bool): Whether to strip the system prompt and formatting from the
+        inputs that the models that is being judges received, when they are inserted to the llm-as-judge prompt.
         inference_model (InferenceEngine): The module that creates the inference of the judge llm.
         reduction_map (dict): A dictionary specifying the reduction method for the metric.
         batch_size (int): The size of the bulk.
     """
     Attributes:
         main_score (str): The main score label used for evaluation.
         task (str): The type of task the llm as judge runs.
         This defines the output and input format of the judge model.
         template (Template): The template used when generating inputs for the judge llm.
         format (Format): The format used when generating inputs for judge llm.
         system_prompt (SystemPrompt): The system prompt used when generating inputs for judge llm.
         strip_system_prompt_and_format_from_inputs (bool): Whether to strip the system prompt and formatting from the
+        inputs that the models that is being judges received, when they are inserted to the llm-as-judge prompt.
         inference_model (InferenceEngine): The module that creates the inference of the judge llm.
         reduction_map (dict): A dictionary specifying the reduction method for the metric.
         batch_size (int): The size of the bulk.
         infer_log_probs(bool): whether to perform the inference using logprobs. If true, the template's
         post-processing must support the logprobs output.
         judge_to_generator_fields_mapping (Dict[str, str]): optional mapping between the names of the fields in the generator task and the
         judge task. For example, if the generator task uses "reference_answers" and the judge task  expect "ground_truth",
         include  {"ground_truth": "reference_answers"} in this dictionary.
+        prediction_field (str): if indicated, and prediction exist, copy prediction to this field name in task_data.
         include_meta_data (bool): whether to include the inference per-instance metadata in the returned results.
     """
     # if format is not directly set in constructor, choose according to the inference model
     def set_format_for_inference_engine(self):
         model_name = self.inference_model.get_engine_id()
+        # TODO : better format resolution to support more chat_api options
+        if "rits" in model_name:
+            format_name = "formats.chat_api"
+        elif re.search("llama.?3.*instruct", model_name):
             format_name = "formats.llama3_instruct"
         else:
             format_name = "formats.empty"

loaders.py CHANGED Viewed

@@ -162,14 +162,22 @@ class LoadHF(Loader):
     Args:
         path: The path or identifier of the dataset on the HuggingFace Hub.
         name: An optional dataset name.
         data_dir: Optional directory to store downloaded data.
         split: Optional specification of which split to load.
         data_files: Optional specification of particular data files to load.
         revision: Optional. The revision of the dataset. Often the commit id. Use in case you want to set the dataset version.
-        streaming: Bool indicating if streaming should be used.
         filtering_lambda: A lambda function for filtering the data after loading.
-        num_proc: Optional integer to specify the number of processes to use for parallel dataset loading.
     Example:
         Loading glue's mrpc dataset
@@ -355,7 +363,9 @@ class LoadCSV(Loader):
                     file_path, nrows=self.get_limit(), sep=self.sep
                 ).to_dict("records")
             else:
-                iterables[split_name] = pd.read_csv(file_path).to_dict("records")
         return iterables
@@ -733,19 +743,24 @@ class LoadFromHFSpace(LoadHF):
     Args:
         space_name (str): Name of the HuggingFace Space to be accessed.
         data_files (str | Sequence[str] | Mapping[str, str | Sequence[str]]): Relative
-            paths to files within a given repository. If given as a mapping, paths should
-            be values, while keys should represent the type of respective files
-            (training, testing etc.).
         path (str, optional): Absolute path to a directory where data should be downloaded.
         revision (str, optional): ID of a Git branch or commit to be used. By default, it is
-            set to None, thus data is downloaded from the main branch of the accessed
-            repository.
         use_token (bool, optional): Whether a token is used for authentication when accessing
-            the HuggingFace Space. If necessary, the token is read from the HuggingFace
-            config folder.
         token_env (str, optional): Key of an env variable which value will be used for
-            authentication when accessing the HuggingFace Space - if necessary.
     Example:
         Loading from a HuggingFace Space

     Args:
         path: The path or identifier of the dataset on the HuggingFace Hub.
         name: An optional dataset name.
         data_dir: Optional directory to store downloaded data.
         split: Optional specification of which split to load.
         data_files: Optional specification of particular data files to load.
         revision: Optional. The revision of the dataset. Often the commit id. Use in case you want to set the dataset version.
+        streaming (bool): indicating if streaming should be used.
         filtering_lambda: A lambda function for filtering the data after loading.
+        num_proc (int): Optional integer to specify the number of processes to use for parallel dataset loading.
     Example:
         Loading glue's mrpc dataset
                     file_path, nrows=self.get_limit(), sep=self.sep
                 ).to_dict("records")
             else:
+                iterables[split_name] = pd.read_csv(file_path, sep=self.sep).to_dict(
+                    "records"
+                )
         return iterables
     Args:
         space_name (str): Name of the HuggingFace Space to be accessed.
         data_files (str | Sequence[str] | Mapping[str, str | Sequence[str]]): Relative
+        paths to files within a given repository. If given as a mapping, paths should
+        be values, while keys should represent the type of respective files
+        (training, testing etc.).
         path (str, optional): Absolute path to a directory where data should be downloaded.
         revision (str, optional): ID of a Git branch or commit to be used. By default, it is
+        set to None, thus data is downloaded from the main branch of the accessed
+        repository.
         use_token (bool, optional): Whether a token is used for authentication when accessing
+        the HuggingFace Space. If necessary, the token is read from the HuggingFace
+        config folder.
         token_env (str, optional): Key of an env variable which value will be used for
+        authentication when accessing the HuggingFace Space - if necessary.
     Example:
         Loading from a HuggingFace Space

metrics.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import ast
 import json
 import os
 import re
 import string
@@ -27,7 +28,11 @@ from .dataclass import (
 )
 from .deprecation_utils import deprecation
 from .error_utils import Documentation, UnitxtWarning
-from .inference import HFPipelineBasedInferenceEngine, InferenceEngine
 from .logging_utils import get_logger
 from .metric_utils import InstanceInput, MetricRequest, MetricResponse
 from .operator import (
@@ -960,11 +965,13 @@ class InstanceMetric(StreamOperator, MetricWithConfidenceInterval):
     """Class for metrics for which a global score can be calculated by aggregating the instance scores (possibly with additional instance inputs).
     InstanceMetric currently allows two reductions:
     1. 'mean', which calculates the mean of instance scores,
     2. 'group_mean', which first applies an aggregation function specified in the reduction_map
-        to instance scores grouped by the field grouping_field (which must not be None), and returns the mean
-        of the group scores; if grouping_field is None, grouping is disabled.
-        See _validate_group_mean_reduction for formatting instructions.
     """
     n_resamples: int = OptionalField(
@@ -1489,13 +1496,17 @@ class StringContainmentRatio(InstanceMetric):
     Attributes:
         field: The field from the task_data that contains the values to be checked for containment.
-               Example task:
-                    Task(
-                        input_fields={"question": str},
-                        reference_fields={"entities": str},
-                        prediction_type=str,
-                        metrics=["string_containment_ratio[field=entities]"],
-                    )
     """
     reduction_map = {"mean": ["string_containment"]}
@@ -2776,8 +2787,8 @@ class BertScore(HuggingfaceBulkMetric):
 class SentenceBert(BulkInstanceMetric):
-    reduction_map = {"mean": ["score"]}
-    main_score = "score"
     batch_size: int = 32
     model_name: str
@@ -2823,12 +2834,12 @@ class SentenceBert(BulkInstanceMetric):
             refs_group_emb = refs_emb[ref_group_bounds[0] : ref_group_bounds[1]]
             scores.append(self.util.cos_sim(pred_emb, refs_group_emb).max().item())
-        return [{"score": score} for score in scores]
 class Reward(BulkInstanceMetric):
-    reduction_map = {"mean": ["score"]}
-    main_score = "score"
     batch_size: int = 32
     model_name: str
@@ -2864,12 +2875,15 @@ class Reward(BulkInstanceMetric):
         # compute the metric
         # add function_to_apply="none" to disable sigmoid
-        return self.pipe(inputs, batch_size=self.batch_size)
 class Detector(BulkInstanceMetric):
-    reduction_map = {"mean": ["score"]}
-    main_score = "score"
     batch_size: int = 32
     prediction_type = str
@@ -2896,7 +2910,10 @@ class Detector(BulkInstanceMetric):
     ) -> List[Dict[str, Any]]:
         # compute the metric
         # add function_to_apply="none" to disable sigmoid
-        return self.pipe(predictions, batch_size=self.batch_size)
 class RegardMetric(GlobalMetric):
@@ -3537,13 +3554,13 @@ class Perplexity(BulkInstanceMetric):
 class FaithfulnessHHEM(BulkInstanceMetric):
-    reduction_map = {"mean": ["score"]}
-    main_score = "score"
     batch_size: int = 2
     model_name: str = "vectara/hallucination_evaluation_model"
     prediction_type = str
     single_reference_per_prediction = True
     max_context_words = 4096
     _requirements_list: List[str] = ["transformers", "torch"]
@@ -3587,7 +3604,7 @@ class FaithfulnessHHEM(BulkInstanceMetric):
         for input_batch in tqdm(input_batches, "input batch"):
             batch_scores = self.model.predict(input_batch).cpu().tolist()
             scores.extend(batch_scores)
-        return [{"score": score} for score in scores]
 class Squad(HuggingfaceMetric):
@@ -4019,18 +4036,21 @@ def performance_drop_rate(
 def interpret_effect_size(x: float):
     """Return a string rule-of-thumb interpretation of an effect size value, as defined by Cohen/Sawilowsky.
-    See https://en.wikipedia.org/wiki/Effect_size;
-    Cohen, Jacob (1988). Statistical Power Analysis for the Behavioral Sciences; and
-    Sawilowsky, S (2009). "New effect size rules of thumb". Journal of Modern Applied Statistical Methods. 8 (2): 467-474.
     Value has interpretation of
-    - essentially 0 if |x| < 0.01
-    - very small if 0.01 <= |x| < 0.2
-    - small difference if 0.2 <= |x| < 0.5
-    - a medium difference if 0.5 <= |x| < 0.8
-    - a large difference if 0.8 <= |x| < 1.2
-    - a very large difference if 1.2 <= |x| < 2.0
-    - a huge difference if 2.0 <= |x|
     Args:
         x: float effect size value
@@ -4066,7 +4086,7 @@ def normalized_cohens_h(
     """Cohen's h effect size between two proportions, normalized to interval [-1,1].
     Allows for change-type metric when the baseline is 0 (percentage change, and thus PDR, is undefined)
-    https://en.wikipedia.org/wiki/Cohen%27s_h
     Cohen's h effect size metric between two proportions p2 and p1 is 2 * (arcsin(sqrt(p2)) - arcsin(sqrt(p1))).
     h in -pi, pi, with +/-pi representing the largest increase/decrease (p1=0, p2=1), or (p1=1, p2=0).
@@ -4077,6 +4097,9 @@ def normalized_cohens_h(
     Interpretation: the original unscaled Cohen's h can be interpreted according to function interpret_effect_size
     Thus, the rule of interpreting the effect of the normalized value is to use the same thresholds divided by pi
         - essentially 0 if |norm h| < 0.0031831
         - very small if 0.0031831 <= |norm h| < 0.06366198
         - small difference if 0.06366198 <= |norm h| < 0.15915494
@@ -4084,12 +4107,17 @@ def normalized_cohens_h(
         - a large difference if 0.25464791 <= |norm h| < 0.38197186
         - a very large difference if 0.38197186 <= |norm h| < 0.63661977
         - a huge difference if 0.63661977 <= |norm h|
     Args:
         subgroup_scores_dict: dict where keys are subgroup types and values are lists of instance scores.
         control_subgroup_types: list of subgroup types (potential keys of subgroup_scores_dict) that are the control (baseline) group
         comparison_subgroup_types: list of subgroup types (potential keys of subgroup_scores_dict) that are the group
-            to be compared to the control group.
         interpret: boolean, whether to interpret the significance of the score or not
     Returns:
         float score between -1 and 1, and a string interpretation if interpret=True
     """
@@ -5118,3 +5146,112 @@ class PredictionLength(InstanceMetric):
         task_data: List[Dict],
     ) -> dict:
         return {self.main_score: [len(prediction)], "score_name": self.main_score}

 import ast
 import json
+import math
 import os
 import re
 import string
 )
 from .deprecation_utils import deprecation
 from .error_utils import Documentation, UnitxtWarning
+from .inference import (
+    HFPipelineBasedInferenceEngine,
+    InferenceEngine,
+    WMLInferenceEngineGeneration,
+)
 from .logging_utils import get_logger
 from .metric_utils import InstanceInput, MetricRequest, MetricResponse
 from .operator import (
     """Class for metrics for which a global score can be calculated by aggregating the instance scores (possibly with additional instance inputs).
     InstanceMetric currently allows two reductions:
     1. 'mean', which calculates the mean of instance scores,
     2. 'group_mean', which first applies an aggregation function specified in the reduction_map
+       to instance scores grouped by the field grouping_field (which must not be None), and returns the mean
+       of the group scores; if grouping_field is None, grouping is disabled.
+       See _validate_group_mean_reduction for formatting instructions.
     """
     n_resamples: int = OptionalField(
     Attributes:
         field: The field from the task_data that contains the values to be checked for containment.
+    Example task that contains this metric:
+        .. code-block:: python
+            Task(
+                input_fields={"question": str},
+                reference_fields={"entities": str},
+                prediction_type=str,
+                metrics=["string_containment_ratio[field=entities]"],
+            )
     """
     reduction_map = {"mean": ["string_containment"]}
 class SentenceBert(BulkInstanceMetric):
+    main_score = "sbert_score"
+    reduction_map = {"mean": [main_score]}
     batch_size: int = 32
     model_name: str
             refs_group_emb = refs_emb[ref_group_bounds[0] : ref_group_bounds[1]]
             scores.append(self.util.cos_sim(pred_emb, refs_group_emb).max().item())
+        return [{self.main_score: score} for score in scores]
 class Reward(BulkInstanceMetric):
+    main_score = "reward_score"
+    reduction_map = {"mean": [main_score]}
     batch_size: int = 32
     model_name: str
         # compute the metric
         # add function_to_apply="none" to disable sigmoid
+        results = self.pipe(inputs, batch_size=self.batch_size)
+        for result in results:
+            result[self.main_score] = result["score"]
+        return results
 class Detector(BulkInstanceMetric):
+    main_score = "detector_score"
+    reduction_map = {"mean": [main_score]}
     batch_size: int = 32
     prediction_type = str
     ) -> List[Dict[str, Any]]:
         # compute the metric
         # add function_to_apply="none" to disable sigmoid
+        results = self.pipe(predictions, batch_size=self.batch_size)
+        for result in results:
+            result[self.main_score] = result["score"]
+        return results
 class RegardMetric(GlobalMetric):
 class FaithfulnessHHEM(BulkInstanceMetric):
+    main_score = "hhem_score"
     batch_size: int = 2
     model_name: str = "vectara/hallucination_evaluation_model"
     prediction_type = str
     single_reference_per_prediction = True
     max_context_words = 4096
+    reduction_map = {"mean": [main_score]}
     _requirements_list: List[str] = ["transformers", "torch"]
         for input_batch in tqdm(input_batches, "input batch"):
             batch_scores = self.model.predict(input_batch).cpu().tolist()
             scores.extend(batch_scores)
+        return [{self.main_score: score} for score in scores]
 class Squad(HuggingfaceMetric):
 def interpret_effect_size(x: float):
     """Return a string rule-of-thumb interpretation of an effect size value, as defined by Cohen/Sawilowsky.
+    | See `Effect size <https://en.wikipedia.org/wiki/Effect_size>`_
+    | Cohen, Jacob (1988). Statistical Power Analysis for the Behavioral Sciences; and
+    | Sawilowsky, S (2009). "New effect size rules of thumb". Journal of Modern Applied Statistical Methods. 8 (2): 467-474.
     Value has interpretation of
+    .. code-block:: text
+        - essentially 0 if |x| < 0.01
+        - very small if 0.01 <= |x| < 0.2
+        - small difference if 0.2 <= |x| < 0.5
+        - a medium difference if 0.5 <= |x| < 0.8
+        - a large difference if 0.8 <= |x| < 1.2
+        - a very large difference if 1.2 <= |x| < 2.0
+        - a huge difference if 2.0 <= |x|
     Args:
         x: float effect size value
     """Cohen's h effect size between two proportions, normalized to interval [-1,1].
     Allows for change-type metric when the baseline is 0 (percentage change, and thus PDR, is undefined)
+    `Conhen's h <https://en.wikipedia.org/wiki/Cohen%27s_h>`_
     Cohen's h effect size metric between two proportions p2 and p1 is 2 * (arcsin(sqrt(p2)) - arcsin(sqrt(p1))).
     h in -pi, pi, with +/-pi representing the largest increase/decrease (p1=0, p2=1), or (p1=1, p2=0).
     Interpretation: the original unscaled Cohen's h can be interpreted according to function interpret_effect_size
     Thus, the rule of interpreting the effect of the normalized value is to use the same thresholds divided by pi
+    .. code-block:: text
         - essentially 0 if |norm h| < 0.0031831
         - very small if 0.0031831 <= |norm h| < 0.06366198
         - small difference if 0.06366198 <= |norm h| < 0.15915494
         - a large difference if 0.25464791 <= |norm h| < 0.38197186
         - a very large difference if 0.38197186 <= |norm h| < 0.63661977
         - a huge difference if 0.63661977 <= |norm h|
     Args:
         subgroup_scores_dict: dict where keys are subgroup types and values are lists of instance scores.
         control_subgroup_types: list of subgroup types (potential keys of subgroup_scores_dict) that are the control (baseline) group
         comparison_subgroup_types: list of subgroup types (potential keys of subgroup_scores_dict) that are the group
+        to be compared to the control group.
         interpret: boolean, whether to interpret the significance of the score or not
     Returns:
         float score between -1 and 1, and a string interpretation if interpret=True
     """
         task_data: List[Dict],
     ) -> dict:
         return {self.main_score: [len(prediction)], "score_name": self.main_score}
+class GraniteGuardianWMLMetric(InstanceMetric):
+    """Return metric for different kinds of "risk" from the Granite-3.0 Guardian model."""
+    main_score = "granite_guardian"
+    reduction_map: Dict[str, List[str]] = None
+    prediction_type = float
+    model_name: str = "ibm/granite-guardian-3-8b"
+    hf_model_name: str = "ibm-granite/granite-guardian-3.0-8b"
+    safe_token = "No"
+    unsafe_token = "Yes"
+    inference_engine: WMLInferenceEngineGeneration = None
+    generation_params: Dict = None
+    risk_name: str = None
+    _requirements_list: List[str] = ["ibm_watsonx_ai", "torch", "transformers"]
+    def prepare(self):
+        self.reduction_map = {"mean": [self.main_score]}
+    def compute(self, references: List[Any], prediction: Any, task_data: Dict) -> dict:
+        from transformers import AutoTokenizer
+        if not hasattr(self, "_tokenizer") or self._tokenizer is None:
+            self._tokenizer = AutoTokenizer.from_pretrained(self.hf_model_name)
+            self.inference_engine = WMLInferenceEngineGeneration(
+                model_name=self.model_name,
+            )
+            self.inference_engine._load_model()
+            self.model = self.inference_engine._model
+            self.generation_params = self.inference_engine._set_logprobs_params({})
+        messages = self.process_input_fields(task_data)
+        guardian_config = {"risk_name": self.risk_name}
+        processed_input = self._tokenizer.apply_chat_template(
+            messages,
+            guardian_config=guardian_config,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+        result = self.model.generate(
+            prompt=[processed_input],
+            params=self.generation_params,
+        )
+        generated_tokens_list = result[0]["results"][0]["generated_tokens"]
+        label, prob_of_risk = self.parse_output(generated_tokens_list)
+        score = 1 - prob_of_risk if label is not None else np.nan
+        return {self.main_score: score}
+    def process_input_fields(self, task_data):
+        if self.risk_name == "groundedness":
+            messages = [
+                {"role": "context", "content": "\n".join(task_data["contexts"])},
+                {"role": "assistant", "content": task_data["answer"]},
+            ]
+        elif self.risk_name == "answer_relevance":
+            messages = [
+                {"role": "user", "content": task_data["question"]},
+                {"role": "assistant", "content": task_data["answer"]},
+            ]
+        elif self.risk_name == "context_relevance":
+            messages = [
+                {"role": "user", "content": task_data["question"]},
+                {"role": "context", "content": "\n".join(task_data["contexts"])},
+            ]
+        else:
+            raise NotImplementedError()
+        return messages
+    def parse_output(self, generated_tokens_list):
+        top_tokens_list = [
+            generated_tokens["top_tokens"] for generated_tokens in generated_tokens_list
+        ]
+        prob = self.get_probabilities(top_tokens_list)
+        prob_of_risk = prob[1]
+        res = next(iter(generated_tokens_list))["text"].strip()
+        if self.unsafe_token.lower() == res.lower():
+            label = self.unsafe_token
+        elif self.safe_token.lower() == res.lower():
+            label = self.safe_token
+        else:
+            label = None
+        return label, prob_of_risk
+    def get_probabilities(self, top_tokens_list):
+        import torch
+        safe_token_prob = 1e-50
+        unsafe_token_prob = 1e-50
+        for top_tokens in top_tokens_list:
+            for token in top_tokens:
+                if token["text"].strip().lower() == self.safe_token.lower():
+                    safe_token_prob += math.exp(token["logprob"])
+                if token["text"].strip().lower() == self.unsafe_token.lower():
+                    unsafe_token_prob += math.exp(token["logprob"])
+        return torch.softmax(
+            torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]),
+            dim=0,
+        ).numpy()

operators.py CHANGED Viewed

@@ -137,34 +137,39 @@ class MapInstanceValues(InstanceOperator):
     Attributes:
         mappers (Dict[str, Dict[str, Any]]): The mappers to use for mapping instance values.
-            Keys are the names of the fields to undergo mapping, and values are dictionaries
-            that define the mapping from old values to new values.
         strict (bool): If True, the mapping is applied strictly. That means if a value
-            does not exist in the mapper, it will raise a KeyError. If False, values
-            that are not present in the mapper are kept as they are.
         process_every_value (bool): If True, all fields to be mapped should be lists, and the mapping
-            is to be applied to their individual elements. If False, mapping is only applied to a field
-            containing a single value.
     Examples:
-        MapInstanceValues(mappers={"a": {"1": "hi", "2": "bye"}})
-        replaces '1' with 'hi' and '2' with 'bye' in field 'a' in all instances of all streams:
-        instance {"a":"1", "b": 2} becomes {"a":"hi", "b": 2}.
-        MapInstanceValues(mappers={"a": {"1": "hi", "2": "bye"}}, process_every_value=True)
-        Assuming field 'a' is a list of values, potentially including "1"-s and "2"-s, this replaces
-        each such "1" with "hi" and "2" -- with "bye" in all instances of all streams:
-        instance {"a": ["1", "2"], "b": 2} becomes {"a": ["hi", "bye"], "b": 2}.
-        MapInstanceValues(mappers={"a": {"1": "hi", "2": "bye"}}, strict=True)
-        To ensure that all values of field 'a' are mapped in every instance, use strict=True.
-        Input instance {"a":"3", "b": 2} will raise an exception per the above call,
-        because "3" is not a key in the mapper of "a".
-        MapInstanceValues(mappers={"a": {str([1,2,3,4]): 'All', str([]): 'None'}}, strict=True)
-        replaces a list [1,2,3,4] with the string 'All' and an empty list by string 'None'.
-        Note that mapped values are defined by their string representation, so mapped values
-        must be converted to strings.
     """
     mappers: Dict[str, Dict[str, str]]
@@ -234,27 +239,25 @@ class FlattenInstances(InstanceOperator):
 class Set(InstanceOperator):
-    """Adds specified fields to each instance in a given stream or all streams (default) If fields exist, updates them.
     Args:
-        fields (Dict[str, object]): The fields to add to each instance.
-             Use '/' to access inner fields
         use_deepcopy (bool) : Deep copy the input value to avoid later modifications
     Examples:
-        # Add a 'classes' field with a value of a list "positive" and "negative" to all streams
-        Set(fields={"classes": ["positive","negatives"]})
-        # Add a 'start' field under the 'span' field with a value of 0 to all streams
-        Set(fields={"span/start": 0}
-        # Add a 'classes' field with a value of a list "positive" and "negative" to 'train' stream
-        Set(fields={"classes": ["positive","negatives"], apply_to_stream=["train"]})
-        # Add a 'classes' field on a given list, prevent modification of original list
-        # from changing the instance.
-        Set(fields={"classes": alist}), use_deepcopy=True)
-        # if now alist is modified, still the instances remain intact.
     """
     fields: Dict[str, object]
@@ -333,22 +336,26 @@ class InstanceFieldOperator(InstanceOperator):
     Args:
         field (Optional[str]): The field to process, if only a single one is passed. Defaults to None
         to_field (Optional[str]): Field name to save result into, if only one field is processed, if None is passed the
-          operation would happen in-place and its result would replace the value of "field". Defaults to None
         field_to_field (Optional[Union[List[List[str]], Dict[str, str]]]): Mapping from names of fields to process,
-          to names of fields to save the results into. Inner List, if used, should be of length 2.
-          A field is processed by feeding its value into method 'process_value' and storing the result in to_field that
           is mapped to the field.
-          When the type of argument 'field_to_field' is List, the order by which the fields are processed is their order
-          in the (outer) List. But when the type of argument 'field_to_field' is Dict, there is no uniquely determined
           order. The end result might depend on that order if either (1) two different fields are mapped to the same
           to_field, or (2) a field shows both as a key and as a value in different mappings.
-          The operator throws an AssertionError in either of these cases.
-          field_to_field defaults to None
-        process_every_value (bool): Processes the values in a list instead of the list as a value, similar to *var. Defaults to False
-        Note: if 'field' and 'to_field' (or both members of a pair in 'field_to_field') are equal (or share a common
-        prefix if 'field' and 'to_field' contain a /), then the result of the operation is saved within 'field'
     """
     field: Optional[str] = None
@@ -577,17 +584,18 @@ class Apply(InstanceOperator):
     Args:
         function (str): name of function.
         to_field (str): the field to store the result
-        any additional arguments are field names whose values will be passed directly to the function specified
     Examples:
-    Store in field  "b" the uppercase string of the value in field "a"
-    Apply("a", function=str.upper, to_field="b")
-    Dump the json representation of field "t" and store back in the same field.
-    Apply("t", function=json.dumps, to_field="t")
-    Set the time in a field 'b'.
-    Apply(function=time.time, to_field="b")
     """
@@ -667,14 +675,13 @@ class ListFieldValues(InstanceOperator):
 class ZipFieldValues(InstanceOperator):
-    """Zips values of multiple fields in a given instance, similar to list(zip(*fields)).
     The value in each of the specified 'fields' is assumed to be a list. The lists from all 'fields'
     are zipped, and stored into 'to_field'.
-    If 'longest'=False, the length of the zipped result is determined by the shortest input value.
-    If 'longest'=False, the length of the zipped result is determined by the longest input, padding shorter
-    inputs with None -s.
     """
@@ -706,11 +713,11 @@ class ZipFieldValues(InstanceOperator):
 class InterleaveListsToDialogOperator(InstanceOperator):
     """Interleaves two lists, one of user dialog turns and one of assistant dialog turns, into a single list of tuples, alternating between "user" and "assistant".
-     The list of tuples if of format (role, turn_content), where the role label is specified by
-     the 'user_role_label' and 'assistant_role_label' fields (default to "user" and "assistant").
     The user turns and assistant turns field are specified in the arguments.
-     The value of each of the 'fields' is assumed to be a list.
     """
@@ -854,13 +861,13 @@ class Copy(FieldOperator):
     Examples:
         An input instance {"a": 2, "b": 3}, when processed by
-        Copy(field_to_field={"a": "b"}
         would yield {"a": 2, "b": 2}, and when processed by
-        Copy(field_to_field={"a": "c"} would yield
         {"a": 2, "b": 3, "c": 2}
         with field names containing / , we can also copy inside the field:
-        Copy(field="a/0",to_field="a")
         would process instance {"a": [1, 3]} into {"a": 1}
@@ -930,32 +937,41 @@ class CastFields(InstanceOperator):
     """Casts specified fields to specified types.
     Args:
-        use_nested_query (bool): Whether to cast nested fields, expressed in dpath. Defaults to False.
         fields (Dict[str, str]): A dictionary mapping field names to the names of the types to cast the fields to.
-            e.g: "int", "str", "float", "bool". Basic names of types
         defaults (Dict[str, object]): A dictionary mapping field names to default values for cases of casting failure.
         process_every_value (bool): If true, all fields involved must contain lists, and each value in the list is then casted. Defaults to False.
-    Examples:
-        CastFields(
-                fields={"a/d": "float", "b": "int"},
-                failure_defaults={"a/d": 0.0, "b": 0},
-                process_every_value=True,
-                use_nested_query=True
-            )
-        would process the input instance: {"a": {"d": ["half", "0.6", 1, 12]}, "b": ["2"]}
-            into {"a": {"d": [0.0, 0.6, 1.0, 12.0]}, "b": [2]}
     """
     fields: Dict[str, str] = field(default_factory=dict)
     failure_defaults: Dict[str, object] = field(default_factory=dict)
-    use_nested_query: bool = False
     process_every_value: bool = False
     def prepare(self):
         self.types = {"int": int, "float": float, "str": str, "bool": bool}
     def _cast_single(self, value, type, field):
         try:
             return self.types[type](value)
@@ -1093,18 +1109,18 @@ class FilterByCondition(StreamOperator):
     Args:
        values (Dict[str, Any]): Field names and respective Values that instances must match according the condition, to be included in the output.
        condition: the name of the desired condition operator between the specified (sub) field's value  and the provided constant value.  Supported conditions are  ("gt", "ge", "lt", "le", "ne", "eq", "in","not in")
        error_on_filtered_all (bool, optional): If True, raises an error if all instances are filtered out. Defaults to True.
     Examples:
-       FilterByCondition(values = {"a":4}, condition = "gt") will yield only instances where field "a" contains a value > 4
-       FilterByCondition(values = {"a":4}, condition = "le") will yield only instances where "a"<=4
-       FilterByCondition(values = {"a":[4,8]}, condition = "in") will yield only instances where "a" is 4 or 8
-       FilterByCondition(values = {"a":[4,8]}, condition = "not in") will yield only instances where "a" different from 4 or 8
-       FilterByCondition(values = {"a/b":[4,8]}, condition = "not in") will yield only instances where "a" is
-            a dict in which key "b" is mapped to a value that is neither 4 nor 8
-       FilterByCondition(values = {"a[2]":4}, condition = "le") will yield only instances where "a" is a list whose 3-rd
-            element is <= 4
     """
@@ -1805,14 +1821,14 @@ class EncodeLabels(InstanceOperator):
     Args:
         fields (List[str]): The fields to encode together.
-    Example: applying
-        EncodeLabels(fields = ["a", "b/*"])
-        on input stream = [{"a": "red", "b": ["red", "blue"], "c":"bread"},
-        {"a": "blue", "b": ["green"], "c":"water"}]   will yield the
-        output stream = [{'a': 0, 'b': [0, 1], 'c': 'bread'}, {'a': 1, 'b': [2], 'c': 'water'}]
-        Note: qpath is applied here, and hence, fields that are lists, should be included in
-        input 'fields' with the appendix "/*"  as in the above example.
     """
@@ -2132,21 +2148,23 @@ class CollateInstances(StreamOperator):
         batch_size (int)
     Example:
-        CollateInstances(batch_size=2)
-        Given inputs = [
-            {"a": 1, "b": 2},
-            {"a": 2, "b": 2},
-            {"a": 3, "b": 2},
-            {"a": 4, "b": 2},
-            {"a": 5, "b": 2}
-        ]
-        Returns targets = [
-            {"a": [1,2], "b": [2,2]},
-            {"a": [3,4], "b": [2,2]},
-            {"a": [5], "b": [2]},
-        ]
     """

     Attributes:
         mappers (Dict[str, Dict[str, Any]]): The mappers to use for mapping instance values.
+        Keys are the names of the fields to undergo mapping, and values are dictionaries
+        that define the mapping from old values to new values.
+        Note that mapped values are defined by their string representation, so mapped values
+        are converted to strings before being looked up in the mappers.
         strict (bool): If True, the mapping is applied strictly. That means if a value
+        does not exist in the mapper, it will raise a KeyError. If False, values
+        that are not present in the mapper are kept as they are.
         process_every_value (bool): If True, all fields to be mapped should be lists, and the mapping
+        is to be applied to their individual elements. If False, mapping is only applied to a field
+        containing a single value.
     Examples:
+        ``MapInstanceValues(mappers={"a": {"1": "hi", "2": "bye"}})``
+        replaces ``"1"`` with ``"hi"`` and ``"2"`` with ``"bye"`` in field ``"a"`` in all instances of all streams:
+        instance ``{"a": 1, "b": 2}`` becomes ``{"a": "hi", "b": 2}``. Note that the value of ``"b"`` remained intact,
+        since field-name ``"b"`` does not participate in the mappers, and that ``1`` was casted to ``"1"`` before looked
+        up in the mapper of ``"a"``.
+        ``MapInstanceValues(mappers={"a": {"1": "hi", "2": "bye"}}, process_every_value=True)``:
+        Assuming field ``"a"`` is a list of values, potentially including ``"1"``-s and ``"2"``-s, this replaces
+        each such ``"1"`` with ``"hi"`` and ``"2"`` -- with ``"bye"`` in all instances of all streams:
+        instance ``{"a": ["1", "2"], "b": 2}`` becomes ``{"a": ["hi", "bye"], "b": 2}``.
+        ``MapInstanceValues(mappers={"a": {"1": "hi", "2": "bye"}}, strict=True)``:
+        To ensure that all values of field ``"a"`` are mapped in every instance, use ``strict=True``.
+        Input instance ``{"a":"3", "b": 2}`` will raise an exception per the above call,
+        because ``"3"`` is not a key in the mapper of ``"a"``.
+        ``MapInstanceValues(mappers={"a": {str([1,2,3,4]): "All", str([]): "None"}}, strict=True)``
+        replaces a list ``[1,2,3,4]`` with the string ``"All"`` and an empty list by string ``"None"``.
     """
     mappers: Dict[str, Dict[str, str]]
 class Set(InstanceOperator):
+    """Sets specified fields in each instance, in a given stream or all streams (default), with specified values. If fields exist, updates them, if do not exist -- adds them.
     Args:
+        fields (Dict[str, object]): The fields to add to each instance. Use '/' to access inner fields
         use_deepcopy (bool) : Deep copy the input value to avoid later modifications
     Examples:
+        # Set a value of a list consisting of "positive" and "negative" do field "classes" to each and every instance of all streams
+        ``Set(fields={"classes": ["positive","negatives"]})``
+        # In each and every instance of all streams, field "span" is to become a dictionary containing a field "start", in which the value 0 is to be set
+        ``Set(fields={"span/start": 0}``
+        # In all instances of stream "train" only, Set field "classes" to have the value of a list consisting of "positive" and "negative"
+        ``Set(fields={"classes": ["positive","negatives"], apply_to_stream=["train"]})``
+        # Set field "classes" to have the value of a given list, preventing modification of original list from changing the instance.
+        ``Set(fields={"classes": alist}), use_deepcopy=True)``  if now alist is modified, still the instances remain intact.
     """
     fields: Dict[str, object]
     Args:
         field (Optional[str]): The field to process, if only a single one is passed. Defaults to None
         to_field (Optional[str]): Field name to save result into, if only one field is processed, if None is passed the
+        operation would happen in-place and its result would replace the value of ``field``. Defaults to None
         field_to_field (Optional[Union[List[List[str]], Dict[str, str]]]): Mapping from names of fields to process,
+        to names of fields to save the results into. Inner List, if used, should be of length 2.
+        | A field is processed by feeding its value into method ``process_value`` and storing the result in ``to_field`` that
           is mapped to the field.
+        | When the type of argument ``field_to_field`` is List, the order by which the fields are processed is their order
+          in the (outer) List. But when the type of argument ``field_to_field`` is Dict, there is no uniquely determined
           order. The end result might depend on that order if either (1) two different fields are mapped to the same
           to_field, or (2) a field shows both as a key and as a value in different mappings.
+        | The operator throws an AssertionError in either of these cases.
+        | field_to_field defaults to None
+        process_every_value (bool): Processes the values in a list instead of the list as a value, similar to python's ``*var``. Defaults to False
+    Note: if ``field`` and ``to_field`` (or both members of a pair in ``field_to_field`` ) are equal (or share a common
+    prefix if ``field`` and ``to_field`` contain a / ), then the result of the operation is saved within ``field`` .
     """
     field: Optional[str] = None
     Args:
         function (str): name of function.
         to_field (str): the field to store the result
+    any additional arguments are field names whose values will be passed directly to the function specified
     Examples:
+    Store in field  "b" the uppercase string of the value in field "a":
+    ``Apply("a", function=str.upper, to_field="b")``
+    Dump the json representation of field "t" and store back in the same field:
+    ``Apply("t", function=json.dumps, to_field="t")``
+    Set the time in a field 'b':
+    ``Apply(function=time.time, to_field="b")``
     """
 class ZipFieldValues(InstanceOperator):
+    """Zips values of multiple fields in a given instance, similar to ``list(zip(*fields))``.
     The value in each of the specified 'fields' is assumed to be a list. The lists from all 'fields'
     are zipped, and stored into 'to_field'.
+    | If 'longest'=False, the length of the zipped result is determined by the shortest input value.
+    | If 'longest'=True, the length of the zipped result is determined by the longest input, padding shorter inputs with None-s.
     """
 class InterleaveListsToDialogOperator(InstanceOperator):
     """Interleaves two lists, one of user dialog turns and one of assistant dialog turns, into a single list of tuples, alternating between "user" and "assistant".
+    The list of tuples if of format (role, turn_content), where the role label is specified by
+    the 'user_role_label' and 'assistant_role_label' fields (default to "user" and "assistant").
     The user turns and assistant turns field are specified in the arguments.
+    The value of each of the 'fields' is assumed to be a list.
     """
     Examples:
         An input instance {"a": 2, "b": 3}, when processed by
+        ``Copy(field_to_field={"a": "b"})``
         would yield {"a": 2, "b": 2}, and when processed by
+        ``Copy(field_to_field={"a": "c"})`` would yield
         {"a": 2, "b": 3, "c": 2}
         with field names containing / , we can also copy inside the field:
+        ``Copy(field="a/0",to_field="a")``
         would process instance {"a": [1, 3]} into {"a": 1}
     """Casts specified fields to specified types.
     Args:
         fields (Dict[str, str]): A dictionary mapping field names to the names of the types to cast the fields to.
+        e.g: "int", "str", "float", "bool". Basic names of types
         defaults (Dict[str, object]): A dictionary mapping field names to default values for cases of casting failure.
         process_every_value (bool): If true, all fields involved must contain lists, and each value in the list is then casted. Defaults to False.
+    Example:
+        .. code-block:: python
+                CastFields(
+                    fields={"a/d": "float", "b": "int"},
+                    failure_defaults={"a/d": 0.0, "b": 0},
+                    process_every_value=True,
+                )
+    would process the input instance: ``{"a": {"d": ["half", "0.6", 1, 12]}, "b": ["2"]}``
+    into ``{"a": {"d": [0.0, 0.6, 1.0, 12.0]}, "b": [2]}``.
     """
     fields: Dict[str, str] = field(default_factory=dict)
     failure_defaults: Dict[str, object] = field(default_factory=dict)
+    use_nested_query: bool = None  # deprecated field
     process_every_value: bool = False
     def prepare(self):
         self.types = {"int": int, "float": float, "str": str, "bool": bool}
+    def verify(self):
+        super().verify()
+        if self.use_nested_query is not None:
+            depr_message = "Field 'use_nested_query' is deprecated. From now on, default behavior is compatible to use_nested_query=True. Please remove this field from your code."
+            warnings.warn(depr_message, DeprecationWarning, stacklevel=2)
     def _cast_single(self, value, type, field):
         try:
             return self.types[type](value)
     Args:
        values (Dict[str, Any]): Field names and respective Values that instances must match according the condition, to be included in the output.
        condition: the name of the desired condition operator between the specified (sub) field's value  and the provided constant value.  Supported conditions are  ("gt", "ge", "lt", "le", "ne", "eq", "in","not in")
        error_on_filtered_all (bool, optional): If True, raises an error if all instances are filtered out. Defaults to True.
     Examples:
+       | ``FilterByCondition(values = {"a":4}, condition = "gt")`` will yield only instances where field ``"a"`` contains a value ``> 4``
+       | ``FilterByCondition(values = {"a":4}, condition = "le")`` will yield only instances where ``"a"<=4``
+       | ``FilterByCondition(values = {"a":[4,8]}, condition = "in")`` will yield only instances where ``"a"`` is ``4`` or ``8``
+       | ``FilterByCondition(values = {"a":[4,8]}, condition = "not in")`` will yield only instances where ``"a"`` is different from ``4`` or ``8``
+       | ``FilterByCondition(values = {"a/b":[4,8]}, condition = "not in")`` will yield only instances where ``"a"`` is a dict in which key ``"b"`` is mapped to a value that is neither ``4`` nor ``8``
+       | ``FilterByCondition(values = {"a[2]":4}, condition = "le")`` will yield only instances where "a" is a list whose 3-rd element is ``<= 4``
     """
     Args:
         fields (List[str]): The fields to encode together.
+    Example:
+        applying ``EncodeLabels(fields = ["a", "b/*"])``
+        on input stream = ``[{"a": "red", "b": ["red", "blue"], "c":"bread"},
+        {"a": "blue", "b": ["green"], "c":"water"}]``   will yield the
+        output stream = ``[{'a': 0, 'b': [0, 1], 'c': 'bread'}, {'a': 1, 'b': [2], 'c': 'water'}]``
+        Note: dict_utils are applied here, and hence, fields that are lists, should be included in
+        input 'fields' with the appendix ``"/*"``  as in the above example.
     """
         batch_size (int)
     Example:
+        .. code-block:: text
+            CollateInstances(batch_size=2)
+            Given inputs = [
+                {"a": 1, "b": 2},
+                {"a": 2, "b": 2},
+                {"a": 3, "b": 2},
+                {"a": 4, "b": 2},
+                {"a": 5, "b": 2}
+            ]
+            Returns targets = [
+                {"a": [1,2], "b": [2,2]},
+                {"a": [3,4], "b": [2,2]},
+                {"a": [5], "b": [2]},
+            ]
     """

span_lableing_operators.py CHANGED Viewed

@@ -8,29 +8,35 @@ class IobExtractor(InstanceOperator):
     Attributes:
         labels (List[str]): A list of entity type labels, e.g., ["Person", "Organization", "Location"].
         begin_labels (List[str]): A list of labels indicating the beginning of an entity, e.g., ["B-PER", "B-ORG", "B-LOC"].
         inside_labels (List[str]): A list of labels indicating the continuation of an entity, e.g., ["I-PER", "I-ORG", "I-LOC"].
         outside_label (str): The label indicating tokens outside of any entity, typically "O".
     The extraction process identifies spans of text corresponding to entities and labels them according to their entity type. Each span is annotated with a start and end character offset, the entity text, and the corresponding label.
     Example of instantiation and usage:
-    ```python
-    operator = IobExtractor(
-        labels=["Person", "Organization", "Location"],
-        begin_labels=["B-PER", "B-ORG", "B-LOC"],
-        inside_labels=["I-PER", "I-ORG", "I-LOC"],
-        outside_label="O",
-    )
-    instance = {
-        "labels": ["B-PER", "I-PER", "O", "B-ORG", "I-ORG"],
-        "tokens": ["John", "Doe", "works", "at", "OpenAI"]
-    }
-    processed_instance = operator.process(instance)
-    print(processed_instance["spans"])
-    # Output: [{'start': 0, 'end': 8, 'text': 'John Doe', 'label': 'Person'}, ...]
-    ```
     For more details on the IOB tagging convention, see: https://en.wikipedia.org/wiki/Inside-outside-beginning_(tagging)

     Attributes:
         labels (List[str]): A list of entity type labels, e.g., ["Person", "Organization", "Location"].
         begin_labels (List[str]): A list of labels indicating the beginning of an entity, e.g., ["B-PER", "B-ORG", "B-LOC"].
         inside_labels (List[str]): A list of labels indicating the continuation of an entity, e.g., ["I-PER", "I-ORG", "I-LOC"].
         outside_label (str): The label indicating tokens outside of any entity, typically "O".
     The extraction process identifies spans of text corresponding to entities and labels them according to their entity type. Each span is annotated with a start and end character offset, the entity text, and the corresponding label.
     Example of instantiation and usage:
+    .. code-block:: python
+        operator = IobExtractor(
+            labels=["Person", "Organization", "Location"],
+            begin_labels=["B-PER", "B-ORG", "B-LOC"],
+            inside_labels=["I-PER", "I-ORG", "I-LOC"],
+            outside_label="O",
+        )
+        instance = {
+            "labels": ["B-PER", "I-PER", "O", "B-ORG", "I-ORG"],
+            "tokens": ["John", "Doe", "works", "at", "OpenAI"]
+        }
+        processed_instance = operator.process(instance)
+        print(processed_instance["spans"])
+        # Output: [{'start': 0, 'end': 8, 'text': 'John Doe', 'label': 'Person'}, ...]
     For more details on the IOB tagging convention, see: https://en.wikipedia.org/wiki/Inside-outside-beginning_(tagging)

struct_data_operators.py CHANGED Viewed

@@ -2,17 +2,25 @@
 These operators are specialized in handling structured data like tables.
 For tables, expected input format is:
-{
-  "header": ["col1", "col2"],
-  "rows": [["row11", "row12"], ["row21", "row22"], ["row31", "row32"]]
-}
 For triples, expected input format is:
-[[ "subject1", "relation1", "object1" ], [ "subject1", "relation2", "object2"]]
 For key-value pairs, expected input format is:
-{"key1": "value1", "key2": value2, "key3": "value3"}
-------------------------
 """
 import json
@@ -148,11 +156,15 @@ class SerializeTableAsMarkdown(SerializeTable):
     Markdown table format is used in GitHub code primarily.
     Format:
-    |col1|col2|col3|
-    |---|---|---|
-    |A|4|1|
-    |I|2|1|
-    ...
     """
     # main method that serializes a table.
@@ -192,11 +204,14 @@ class SerializeTableAsDFLoader(SerializeTable):
     Pandas dataframe based code snippet format serializer.
     Format(Sample):
-    pd.DataFrame({
-        "name" : ["Alex", "Diana", "Donald"],
-        "age" : [26, 34, 39]
-    },
-    index=[0,1,2])
     """
     # main method that serializes a table.
@@ -234,11 +249,14 @@ class SerializeTableAsJson(SerializeTable):
     Json format based serializer.
     Format(Sample):
-    {
-        "0":{"name":"Alex","age":26},
-        "1":{"name":"Diana","age":34},
-        "2":{"name":"Donald","age":39}
-    }
     """
     # main method that serializes a table.
@@ -264,15 +282,18 @@ class SerializeTableAsHTML(SerializeTable):
     HTML table format used for rendering tables in web pages.
     Format(Sample):
-    <table>
-        <thead>
-            <tr><th>name</th><th>age</th><th>sex</th></tr>
-        </thead>
-        <tbody>
-            <tr><td>Alice</td><td>26</td><td>F</td></tr>
-            <tr><td>Raj</td><td>34</td><td>M</td></tr>
-        </tbody>
-    </table>
     """
     # main method that serializes a table.
@@ -404,7 +425,7 @@ class TruncateTableRows(FieldOperator):
     """Limits table rows to specified limit by removing excess rows via random selection.
     Args:
-        rows_to_keep (int) - number of rows to keep.
     """
     rows_to_keep: int = 10
@@ -563,16 +584,19 @@ class ListToKeyValPairs(InstanceOperator):
 class ConvertTableColNamesToSequential(FieldOperator):
     """Replaces actual table column names with static sequential names like col_0, col_1,...
-    Sample input:
-    {
-        "header": ["name", "age"],
-        "rows": [["Alex", 21], ["Donald", 34]]
-    }
-    Sample output:
-    {
-        "header": ["col_0", "col_1"],
-        "rows": [["Alex", 21], ["Donald", 34]]
-    }
     """
     def process_value(self, table: Any) -> Any:
@@ -595,17 +619,19 @@ class ConvertTableColNamesToSequential(FieldOperator):
 class ShuffleTableRows(TypeDependentAugmentor):
     """Shuffles the input table rows randomly.
-    Sample Input:
-    {
-        "header": ["name", "age"],
-        "rows": [["Alex", 26], ["Raj", 34], ["Donald", 39]],
-    }
-    Sample Output:
-    {
-        "header": ["name", "age"],
-        "rows": [["Donald", 39], ["Raj", 34], ["Alex", 26]],
-    }
     """
     augmented_type = Table
@@ -619,17 +645,19 @@ class ShuffleTableRows(TypeDependentAugmentor):
 class ShuffleTableColumns(TypeDependentAugmentor):
     """Shuffles the table columns randomly.
-    Sample Input:
-        {
-            "header": ["name", "age"],
-            "rows": [["Alex", 26], ["Raj", 34], ["Donald", 39]],
-        }
-    Sample Output:
-        {
-            "header": ["age", "name"],
-            "rows": [[26, "Alex"], [34, "Raj"], [39, "Donald"]],
-        }
     """
     augmented_type = Table
@@ -662,11 +690,14 @@ class DumpJson(FieldOperator):
 class MapHTMLTableToJSON(FieldOperator):
     """Converts HTML table format to the basic one (JSON).
-    JSON format
-    {
-        "header": ["col1", "col2"],
-        "rows": [["row11", "row12"], ["row21", "row22"], ["row31", "row32"]]
-    }
     """
     _requirements_list = ["bs4"]
@@ -701,11 +732,14 @@ class MapHTMLTableToJSON(FieldOperator):
 class MapTableListsToStdTableJSON(FieldOperator):
     """Converts lists table format to the basic one (JSON).
-    JSON format
-    {
-        "header": ["col1", "col2"],
-        "rows": [["row11", "row12"], ["row21", "row22"], ["row31", "row32"]]
-    }
     """
     def process_value(self, table: Any) -> Any:
@@ -755,17 +789,20 @@ class ConstructTableFromRowsCols(InstanceOperator):
 class TransposeTable(TypeDependentAugmentor):
     """Transpose a table.
-    Sample Input:
-        {
-            "header": ["name", "age", "sex"],
-            "rows": [["Alice", 26, "F"], ["Raj", 34, "M"], ["Donald", 39, "M"]],
-        }
-    Sample Output:
-        {
-            "header": [" ", "0", "1", "2"],
-            "rows": [["name", "Alice", "Raj", "Donald"], ["age", 26, 34, 39], ["sex", "F", "M", "M"]],
-        }
     """
     augmented_type = Table
@@ -791,8 +828,9 @@ class DuplicateTableRows(TypeDependentAugmentor):
     """Duplicates specific rows of a table for the given number of times.
     Args:
-        row_indices (List[int]) - rows to be duplicated
-        times(int) - how many times to duplicate
     """
     augmented_type = Table
@@ -823,8 +861,9 @@ class DuplicateTableColumns(TypeDependentAugmentor):
     """Duplicates specific columns of a table for the given number of times.
     Args:
-        column_indices (List[int]) - columns to be duplicated
-        times(int) - how many times to duplicate
     """
     augmented_type = Table

 These operators are specialized in handling structured data like tables.
 For tables, expected input format is:
+.. code-block:: text
+    {
+        "header": ["col1", "col2"],
+        "rows": [["row11", "row12"], ["row21", "row22"], ["row31", "row32"]]
+    }
 For triples, expected input format is:
+.. code-block:: text
+    [[ "subject1", "relation1", "object1" ], [ "subject1", "relation2", "object2"]]
 For key-value pairs, expected input format is:
+.. code-block:: text
+    {"key1": "value1", "key2": value2, "key3": "value3"}
 """
 import json
     Markdown table format is used in GitHub code primarily.
     Format:
+    .. code-block:: text
+        |col1|col2|col3|
+        |---|---|---|
+        |A|4|1|
+        |I|2|1|
+        ...
     """
     # main method that serializes a table.
     Pandas dataframe based code snippet format serializer.
     Format(Sample):
+    .. code-block:: python
+        pd.DataFrame({
+            "name" : ["Alex", "Diana", "Donald"],
+            "age" : [26, 34, 39]
+        },
+        index=[0,1,2])
     """
     # main method that serializes a table.
     Json format based serializer.
     Format(Sample):
+    .. code-block:: json
+        {
+            "0":{"name":"Alex","age":26},
+            "1":{"name":"Diana","age":34},
+            "2":{"name":"Donald","age":39}
+        }
     """
     # main method that serializes a table.
     HTML table format used for rendering tables in web pages.
     Format(Sample):
+    .. code-block:: html
+        <table>
+            <thead>
+                <tr><th>name</th><th>age</th><th>sex</th></tr>
+            </thead>
+            <tbody>
+                <tr><td>Alice</td><td>26</td><td>F</td></tr>
+                <tr><td>Raj</td><td>34</td><td>M</td></tr>
+            </tbody>
+        </table>
     """
     # main method that serializes a table.
     """Limits table rows to specified limit by removing excess rows via random selection.
     Args:
+        rows_to_keep (int): number of rows to keep.
     """
     rows_to_keep: int = 10
 class ConvertTableColNamesToSequential(FieldOperator):
     """Replaces actual table column names with static sequential names like col_0, col_1,...
+    .. code-block:: text
+        Sample input:
+        {
+            "header": ["name", "age"],
+            "rows": [["Alex", 21], ["Donald", 34]]
+        }
+        Sample output:
+        {
+            "header": ["col_0", "col_1"],
+            "rows": [["Alex", 21], ["Donald", 34]]
+        }
     """
     def process_value(self, table: Any) -> Any:
 class ShuffleTableRows(TypeDependentAugmentor):
     """Shuffles the input table rows randomly.
+    .. code-block:: text
+        Sample Input:
+        {
+            "header": ["name", "age"],
+            "rows": [["Alex", 26], ["Raj", 34], ["Donald", 39]],
+        }
+        Sample Output:
+        {
+            "header": ["name", "age"],
+            "rows": [["Donald", 39], ["Raj", 34], ["Alex", 26]],
+        }
     """
     augmented_type = Table
 class ShuffleTableColumns(TypeDependentAugmentor):
     """Shuffles the table columns randomly.
+    .. code-block:: text
+        Sample Input:
+            {
+                "header": ["name", "age"],
+                "rows": [["Alex", 26], ["Raj", 34], ["Donald", 39]],
+            }
+        Sample Output:
+            {
+                "header": ["age", "name"],
+                "rows": [[26, "Alex"], [34, "Raj"], [39, "Donald"]],
+            }
     """
     augmented_type = Table
 class MapHTMLTableToJSON(FieldOperator):
     """Converts HTML table format to the basic one (JSON).
+    JSON format:
+    .. code-block:: json
+        {
+            "header": ["col1", "col2"],
+            "rows": [["row11", "row12"], ["row21", "row22"], ["row31", "row32"]]
+        }
     """
     _requirements_list = ["bs4"]
 class MapTableListsToStdTableJSON(FieldOperator):
     """Converts lists table format to the basic one (JSON).
+    JSON format:
+    .. code-block:: json
+        {
+            "header": ["col1", "col2"],
+            "rows": [["row11", "row12"], ["row21", "row22"], ["row31", "row32"]]
+        }
     """
     def process_value(self, table: Any) -> Any:
 class TransposeTable(TypeDependentAugmentor):
     """Transpose a table.
+    .. code-block:: text
+        Sample Input:
+            {
+                "header": ["name", "age", "sex"],
+                "rows": [["Alice", 26, "F"], ["Raj", 34, "M"], ["Donald", 39, "M"]],
+            }
+        Sample Output:
+            {
+                "header": [" ", "0", "1", "2"],
+                "rows": [["name", "Alice", "Raj", "Donald"], ["age", 26, 34, 39], ["sex", "F", "M", "M"]],
+            }
     """
     augmented_type = Table
     """Duplicates specific rows of a table for the given number of times.
     Args:
+        row_indices (List[int]): rows to be duplicated
+        times(int): each row to be duplicated is to show that many times
     """
     augmented_type = Table
     """Duplicates specific columns of a table for the given number of times.
     Args:
+        column_indices (List[int]): columns to be duplicated
+        times(int): each column to be duplicated is to show that many times
     """
     augmented_type = Table

task.py CHANGED Viewed

@@ -41,24 +41,28 @@ class Task(InstanceOperator, ArtifactFetcherMixin):
     Attributes:
         input_fields (Union[Dict[str, str], List[str]]):
-            Dictionary with string names of instance input fields and types of respective values.
-            In case a list is passed, each type will be assumed to be Any.
         reference_fields (Union[Dict[str, str], List[str]]):
-            Dictionary with string names of instance output fields and types of respective values.
-            In case a list is passed, each type will be assumed to be Any.
         metrics (List[str]): List of names of metrics to be used in the task.
         prediction_type (Optional[str]):
-            Need to be consistent with all used metrics. Defaults to None, which means that it will
-            be set to Any.
         defaults (Optional[Dict[str, Any]]):
-            An optional dictionary with default values for chosen input/output keys. Needs to be
-            consistent with names and types provided in 'input_fields' and/or 'output_fields' arguments.
-            Will not overwrite values if already provided in a given instance.
     The output instance contains three fields:
-        "input_fields" whose value is a sub-dictionary of the input instance, consisting of all the fields listed in Arg 'input_fields'.
-        "reference_fields" -- for the fields listed in Arg "reference_fields".
-        "metrics" -- to contain the value of Arg 'metrics'
     """
     input_fields: Optional[Union[Dict[str, Type], Dict[str, str], List[str]]] = None

     Attributes:
         input_fields (Union[Dict[str, str], List[str]]):
+        Dictionary with string names of instance input fields and types of respective values.
+        In case a list is passed, each type will be assumed to be Any.
         reference_fields (Union[Dict[str, str], List[str]]):
+        Dictionary with string names of instance output fields and types of respective values.
+        In case a list is passed, each type will be assumed to be Any.
         metrics (List[str]): List of names of metrics to be used in the task.
         prediction_type (Optional[str]):
+        Need to be consistent with all used metrics. Defaults to None, which means that it will
+        be set to Any.
         defaults (Optional[Dict[str, Any]]):
+        An optional dictionary with default values for chosen input/output keys. Needs to be
+        consistent with names and types provided in 'input_fields' and/or 'output_fields' arguments.
+        Will not overwrite values if already provided in a given instance.
     The output instance contains three fields:
+        1. "input_fields" whose value is a sub-dictionary of the input instance, consisting of all the fields listed in Arg 'input_fields'.
+        2. "reference_fields" -- for the fields listed in Arg "reference_fields".
+        3. "metrics" -- to contain the value of Arg 'metrics'
     """
     input_fields: Optional[Union[Dict[str, Type], Dict[str, str], List[str]]] = None

templates.py CHANGED Viewed

@@ -308,19 +308,25 @@ class PairwiseChoiceTemplate(InputOutputTemplate):
     Args:
          choice_a_field (str): The field which contains choice_a value
          choice_b_field (str): The field which contains choice_b value
          answer_field (str): The field which contains the answer value.
-           Should be of type Literal["choice_1", "choice_2", "tie"]
          choice_a_label (str): The label of choice A answer as it is verbalized in the template.
          choice_b_label (str): The label of choice B answer as it is verbalized in the template.
          choice_tie_label (str): The label of a tie answer as it should be verbalized in the template.
          shuffle (bool): whether to shuffle the choices or not. This is done to take into account position bias.
     shuffle: 50% of the time:
-     1) The values of choice_a_field and choice_b_field will be swapped.
-     2) If the values of answer_field is choice_a_label, set it to choice_b_label.
-         Else if the values of answer_field is choice_b_label, set it to choice_a_label.
-         Else if the value of answer_field is choice_tie_label, do nothing.
     """
@@ -433,14 +439,17 @@ class PairwiseComparativeRatingTemplate(InputOutputTemplate):
     Args:
          choice_a_field (str): The field which contains choice_a value
          choice_b_field (str): The field which contains choice_b value
          answer_field (str): The field which contains the answer value. The value should be an int.
-          Positive for preferring choice_a, and negative for preferring choice_b
          shuffle (bool): whether to shuffle the choices or not. This is done to take into account position bias.
     shuffle: 50% of the time:
-     1) The values of choice_a_field and choice_b_field will be swapped.
-     2) Replace the values of answer_field with its mapped value according to the reverse_preference_map Dict.
     """

     Args:
          choice_a_field (str): The field which contains choice_a value
          choice_b_field (str): The field which contains choice_b value
          answer_field (str): The field which contains the answer value.
+         Should be of type Literal["choice_1", "choice_2", "tie"]
          choice_a_label (str): The label of choice A answer as it is verbalized in the template.
          choice_b_label (str): The label of choice B answer as it is verbalized in the template.
          choice_tie_label (str): The label of a tie answer as it should be verbalized in the template.
          shuffle (bool): whether to shuffle the choices or not. This is done to take into account position bias.
     shuffle: 50% of the time:
+     1. The values of choice_a_field and choice_b_field will be swapped.
+     2. If the values of answer_field is choice_a_label, set it to choice_b_label.
+        | Else if the values of answer_field is choice_b_label, set it to choice_a_label.
+        | Else if the value of answer_field is choice_tie_label, do nothing.
     """
     Args:
          choice_a_field (str): The field which contains choice_a value
          choice_b_field (str): The field which contains choice_b value
          answer_field (str): The field which contains the answer value. The value should be an int.
+         Positive for preferring choice_a, and negative for preferring choice_b
          shuffle (bool): whether to shuffle the choices or not. This is done to take into account position bias.
     shuffle: 50% of the time:
+    | 1) The values of choice_a_field and choice_b_field will be swapped.
+    | 2) Replace the values of answer_field with its mapped value according to the reverse_preference_map Dict.
     """

type_utils.py CHANGED Viewed

@@ -307,21 +307,22 @@ def infer_type_string(obj: typing.Any) -> str:
         obj:Any
     Returns:
-        a string representation of the type of the object. e.g. 'str', 'List[int]', 'Dict[str, Any]'
-    formal definition of the returned string:
-    Type -> basic | List[Type] | Dict[Type, Type] | Union[Type (, Type)* | Tuple[Type (,Type)*]
-    basic -> bool,str,int,float,Any
-    no spaces at all.
     Examples:
-        infer_type_string({"how_much": 7}) returns "Dict[str,int]"
-        infer_type_string([1, 2]) returns "List[int]"
-        infer_type_string([]) returns "List[Any]")    no contents to list to indicate any type
-        infer_type_string([[], [7]]) returns "List[List[int]]"  type of parent list indicated by the type
-                of the non-empty child list. The empty child list is indeed, by default, also of that type
-                of the non-empty child.
-        infer_type_string([[], 7, True]) returns "List[Union[List[Any],int]]"   because bool is also an int
     """

         obj:Any
     Returns:
+      a string representation of the type of the object. e.g. ``"str"``, ``"List[int]"``, ``"Dict[str, Any]"``
+    | formal definition of the returned string:
+    | Type -> basic | List[Type] | Dict[Type, Type] | Union[Type(, Type)*] | Tuple[Type(, Type)*]
+    | basic -> ``bool`` | ``str`` | ``int`` | ``float`` | ``Any``
     Examples:
+        | ``infer_type_string({"how_much": 7})`` returns ``"Dict[str,int]"``
+        | ``infer_type_string([1, 2])`` returns ``"List[int]"``
+        | ``infer_type_string([])`` returns ``"List[Any]")``    no contents to list to indicate any type
+        | ``infer_type_string([[], [7]])`` returns ``"List[List[int]]"``  type of parent list indicated
+          by the type of the non-empty child list. The empty child list is indeed, by default, also of
+          that type of the non-empty child.
+        | ``infer_type_string([[], 7, True])`` returns ``"List[Union[List[Any],int]]"``
+          because ``bool`` is also an ``int``
     """

utils.py CHANGED Viewed

@@ -32,8 +32,8 @@ class LRUCache:
     Attributes:
         max_size (int): The maximum number of items to store in the cache.
-            Items exceeding this limit are automatically removed based on least
-            recent usage.
     """
     def __init__(self, max_size=10):

     Attributes:
         max_size (int): The maximum number of items to store in the cache.
+        Items exceeding this limit are automatically removed based on least
+        recent usage.
     """
     def __init__(self, max_size=10):

version.py CHANGED Viewed

	@@ -1 +1 @@
1	- version = "1.15.9"


1	+ version = "1.15.10"