Spaces:

unitxt
/

metric

Running

App Files Files Community

Elron commited on Mar 5, 2024

Commit

3157b84

verified ·

1 Parent(s): 50b5364

Upload metric_utils.py with huggingface_hub

Browse files

Files changed (1) hide show

metric_utils.py +97 -9

metric_utils.py CHANGED Viewed

@@ -1,7 +1,9 @@
-from typing import Dict, Iterable, List
 from datasets import Features, Value
 from .operator import (
     MultiStreamOperator,
     SequentialOperatorInitilizer,
@@ -17,6 +19,7 @@ from .operators import (
 )
 from .register import _reset_env_local_catalogs, register_all_artifacts
 from .schema import UNITXT_DATASET_SCHEMA
 from .stream import MultiStream, Stream
@@ -83,16 +86,12 @@ class FromPredictionsAndOriginalData(StreamInitializerOperator):
         )
-# The additional_inputs field in the schema is defined as
 # Sequence({"key": Value(dtype="string"), "value": Value("string")})
 # When receiving instances from this scheme, the keys and values are returned as two separate
 # lists, and are converted to a dictionary.
-def _from_key_value_pairs(key_value_list: Dict[str, list]) -> Dict[str, str]:
-    return dict(zip(key_value_list["key"], key_value_list["value"]))
 class MetricRecipe(SequentialOperatorInitilizer):
     calc_confidence_intervals: bool = True
@@ -101,9 +100,9 @@ class MetricRecipe(SequentialOperatorInitilizer):
         self.steps = [
             FromPredictionsAndOriginalData(),
             Apply(
-                "additional_inputs",
-                function=_from_key_value_pairs,
-                to_field="additional_inputs",
             ),
             ApplyOperatorsField(
                 operators_field="postprocessors",
@@ -144,3 +143,92 @@ def _compute(
     stream = multi_stream[split_name]
     return list(stream)

+import json
+from typing import Any, Dict, Iterable, List, Optional
 from datasets import Features, Value
+from .dataclass import Dataclass
 from .operator import (
     MultiStreamOperator,
     SequentialOperatorInitilizer,
 )
 from .register import _reset_env_local_catalogs, register_all_artifacts
 from .schema import UNITXT_DATASET_SCHEMA
+from .settings_utils import get_settings
 from .stream import MultiStream, Stream
         )
+# The task_data field in the schema is defined as
 # Sequence({"key": Value(dtype="string"), "value": Value("string")})
 # When receiving instances from this scheme, the keys and values are returned as two separate
 # lists, and are converted to a dictionary.
 class MetricRecipe(SequentialOperatorInitilizer):
     calc_confidence_intervals: bool = True
         self.steps = [
             FromPredictionsAndOriginalData(),
             Apply(
+                "task_data",
+                function="json.loads",
+                to_field="task_data",
             ),
             ApplyOperatorsField(
                 operators_field="postprocessors",
     stream = multi_stream[split_name]
     return list(stream)
+"""
+The API of a metric service:
+- MetricRequest: A single input request to the metrics service.
+- MetricResponse: A response returned from a metrics service.
+"""
+class InstanceInput(Dataclass):
+    """A single instance inputted to a metric service."""
+    prediction: Any
+    references: List[Any]
+    additional_inputs: Optional[Dict] = None
+class MetricRequest(Dataclass):
+    """A request to a metrics service, includes a list of input instances."""
+    instance_inputs: List[InstanceInput]
+class MetricResponse(Dataclass):
+    """A response produced by a metrics service, includes the computed scores."""
+    # A list of instance score dictionaries. Each dictionary contains the
+    # score names and score values for a single instance.
+    instances_scores: List[Dict[str, Any]]
+    # The global scores dictionary, containing global score names and values.
+    # These are scores computed over the entire set of input instances, e.g.
+    # an average over a score computed per instance.
+    global_score: Dict[str, Any]
+"""
+Functionality for loading the remote metrics configuration from local environment variables.
+"""
+# A list of metrics to be executed remotely.
+# For example: '["metrics.rag.context_relevance","metrics.rag.bert_k_precision"]'
+# This value should be a valid json list
+UNITXT_REMOTE_METRICS = "UNITXT_REMOTE_METRICS"
+# The remote endpoint on which the remote metrics are available.
+# For example, 'http://127.0.0.1:8000/compute'
+UNITXT_REMOTE_METRICS_ENDPOINT = "UNITXT_REMOTE_METRICS_ENDPOINT"
+def get_remote_metrics_names() -> List[str]:
+    """Load the remote metrics names from an environment variable.
+    Returns:
+        List[str] - names of metrics to be executed remotely.
+    """
+    settings = get_settings()
+    remote_metrics = settings.remote_metrics
+    if remote_metrics:
+        remote_metrics = json.loads(remote_metrics)
+    if not isinstance(remote_metrics, list):
+        raise RuntimeError(
+            f"Unexpected value {remote_metrics} for the '{UNITXT_REMOTE_METRICS}' environment variable. "
+            f"The value is expected to be a list of metric names in json format."
+        )
+    for remote_metric in remote_metrics:
+        if not isinstance(remote_metric, str):
+            raise RuntimeError(
+                f"Unexpected value {remote_metric} within the '{UNITXT_REMOTE_METRICS}' environment variable. "
+                f"The value is expected to be a string but its type is {type(remote_metric)}."
+            )
+    return remote_metrics
+def get_remote_metrics_endpoint() -> str:
+    """Load the remote metrics endpoint from an environment variable.
+    Returns:
+        str - The remote endpoint on which the remote metrics are available.
+    """
+    settings = get_settings()
+    try:
+        remote_metrics_endpoint = settings.remote_metrics_endpoint
+    except AttributeError as e:
+        raise RuntimeError(
+            f"Unexpected None value for '{UNITXT_REMOTE_METRICS_ENDPOINT}'. "
+            f"Running remote metrics requires defining an "
+            f"endpoint in the environment variable '{UNITXT_REMOTE_METRICS_ENDPOINT}'."
+        ) from e
+    return remote_metrics_endpoint