Spaces:

BridgeAI-Lab
/

SemF1

Sleeping

App Files Files Community

nbansal commited on Jul 20

Commit

57111be

•

1 Parent(s): 0377c9d

Handled the None and empty string cases

Browse files

Files changed (3) hide show

semf1.py +26 -17
tests.py +104 -12
utils.py +78 -34

semf1.py CHANGED Viewed

@@ -27,7 +27,7 @@ from sklearn.metrics.pairwise import cosine_similarity
 from .encoder_models import get_encoder
 from .type_aliases import DEVICE_TYPE, PREDICTION_TYPE, REFERENCE_TYPE
-from .utils import is_nested_list_of_type, Scores, slice_embeddings, flatten_list, get_gpu
 _CITATION = """\
 @inproceedings{bansal-etal-2022-sem,
@@ -223,22 +223,33 @@ def _validate_input_format(
     """
     if len(predictions) != len(references):
-        raise ValueError("Predictions and references must have the same length.")
     def is_list_of_strings_at_depth(lst_obj, depth: int):
         return is_nested_list_of_type(lst_obj, element_type=str, depth=depth)
-    if tokenize_sentences and multi_references:
-        condition = is_list_of_strings_at_depth(predictions, 1) and is_list_of_strings_at_depth(references, 2)
-    elif not tokenize_sentences and multi_references:
-        condition = is_list_of_strings_at_depth(predictions, 2) and is_list_of_strings_at_depth(references, 3)
-    elif tokenize_sentences and not multi_references:
-        condition = is_list_of_strings_at_depth(predictions, 1) and is_list_of_strings_at_depth(references, 1)
-    else:
-        condition = is_list_of_strings_at_depth(predictions, 2) and is_list_of_strings_at_depth(references, 2)
-    if not condition:
-        raise ValueError("Predictions are references are not valid input format. Refer to documentation.")
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
@@ -317,8 +328,6 @@ class SemF1(evaluate.Metric):
         """Optional: download external resources useful to compute the scores"""
         import nltk
         nltk.download("punkt", quiet=True)
-        # if not nltk.data.find("tokenizers/punkt"):  # TODO: check why it is not working
-        #     pass
     def _compute(
             self,
@@ -377,8 +386,8 @@ class SemF1(evaluate.Metric):
         # Tokenize sentences if required
         if tokenize_sentences:
-            predictions = [nltk.tokenize.sent_tokenize(pred) for pred in predictions]
-            references = [[nltk.tokenize.sent_tokenize(ref) for ref in refs] for refs in references]
         # Flatten the data for batch processing
         all_sentences = flatten_list(predictions) + flatten_list(references)

 from .encoder_models import get_encoder
 from .type_aliases import DEVICE_TYPE, PREDICTION_TYPE, REFERENCE_TYPE
+from .utils import is_nested_list_of_type, Scores, slice_embeddings, flatten_list, get_gpu, sent_tokenize
 _CITATION = """\
 @inproceedings{bansal-etal-2022-sem,
     """
     if len(predictions) != len(references):
+        raise ValueError(f"Predictions and references must have the same length. "
+                         f"Got {len(predictions)} predictions and {len(references)} references.")
     def is_list_of_strings_at_depth(lst_obj, depth: int):
         return is_nested_list_of_type(lst_obj, element_type=str, depth=depth)
+    def check_format(lst_obj, expected_depth: int, name: str):
+        is_valid, error_message = is_list_of_strings_at_depth(lst_obj, expected_depth)
+        if not is_valid:
+            raise ValueError(f"{name} are not in the expected format.\n"
+                             f"Error: {error_message}.")
+    try:
+        if tokenize_sentences and multi_references:
+            check_format(predictions, 1, "Predictions")
+            check_format(references, 2, "References")
+        elif not tokenize_sentences and multi_references:
+            check_format(predictions, 2, "Predictions")
+            check_format(references, 3, "References")
+        elif tokenize_sentences and not multi_references:
+            check_format(predictions, 1, "Predictions")
+            check_format(references, 1, "References")
+        else:
+            check_format(predictions, 2, "Predictions")
+            check_format(references, 2, "References")
+    except ValueError as ve:
+        raise ValueError(f"Input validation error: {ve}")
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
         """Optional: download external resources useful to compute the scores"""
         import nltk
         nltk.download("punkt", quiet=True)
     def _compute(
             self,
         # Tokenize sentences if required
         if tokenize_sentences:
+            predictions = [sent_tokenize(pred) for pred in predictions]
+            references = [[sent_tokenize(ref) for ref in refs] for refs in references]
         # Flatten the data for batch processing
         all_sentences = flatten_list(predictions) + flatten_list(references)

tests.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import statistics
 import unittest
-from unittest.mock import patch, MagicMock
 import numpy as np
 import torch
@@ -73,29 +72,36 @@ class TestUtils(unittest.TestCase):
     def test_is_nested_list_of_type(self):
         # Test case: Depth 0, single element matching element_type
-        self.assertTrue(is_nested_list_of_type("test", str, 0))
         # Test case: Depth 0, single element not matching element_type
-        self.assertFalse(is_nested_list_of_type("test", int, 0))
         # Test case: Depth 1, list of elements matching element_type
-        self.assertTrue(is_nested_list_of_type(["apple", "banana"], str, 1))
         # Test case: Depth 1, list of elements not matching element_type
-        self.assertFalse(is_nested_list_of_type([1, 2, 3], str, 1))
         # Test case: Depth 0 (Wrong), list of elements matching element_type
-        self.assertFalse(is_nested_list_of_type([1, 2, 3], str, 0))
         # Depth 2
-        self.assertTrue(is_nested_list_of_type([[1, 2], [3, 4]], int, 2))
-        self.assertTrue(is_nested_list_of_type([['1', '2'], ['3', '4']], str, 2))
-        self.assertFalse(is_nested_list_of_type([[1, 2], ["a", "b"]], int, 2))
         # Depth 3
-        self.assertFalse(is_nested_list_of_type([[[1], [2]], [[3], [4]]], list, 3))
-        self.assertTrue(is_nested_list_of_type([[[1], [2]], [[3], [4]]], int, 3))
         with self.assertRaises(ValueError):
             is_nested_list_of_type([1, 2], int, -1)
@@ -335,6 +341,93 @@ class TestSemF1(unittest.TestCase):
         self.assertAlmostEqual(score.precision, 0.73, places=2)
         self.assertAlmostEqual(score.recall[0], 0.63, places=2)
 class TestCosineSimilarity(unittest.TestCase):
@@ -509,4 +602,3 @@ class TestValidateInputFormat(unittest.TestCase):
 if __name__ == '__main__':
     unittest.main(verbosity=2)

 import statistics
 import unittest
 import numpy as np
 import torch
     def test_is_nested_list_of_type(self):
         # Test case: Depth 0, single element matching element_type
+        self.assertEqual(is_nested_list_of_type("test", str, 0), (True, ""))
         # Test case: Depth 0, single element not matching element_type
+        is_valid, err_msg = is_nested_list_of_type("test", int, 0)
+        self.assertEqual(is_valid, False)
         # Test case: Depth 1, list of elements matching element_type
+        self.assertEqual(is_nested_list_of_type(["apple", "banana"], str, 1), (True, ""))
         # Test case: Depth 1, list of elements not matching element_type
+        is_valid, err_msg = is_nested_list_of_type([1, 2, 3], str, 1)
+        self.assertEqual(is_valid, False)
         # Test case: Depth 0 (Wrong), list of elements matching element_type
+        is_valid, err_msg = is_nested_list_of_type([1, 2, 3], str, 0)
+        self.assertEqual(is_valid, False)
         # Depth 2
+        self.assertEqual(is_nested_list_of_type([[1, 2], [3, 4]], int, 2), (True, ""))
+        self.assertEqual(is_nested_list_of_type([['1', '2'], ['3', '4']], str, 2), (True, ""))
+        is_valid, err_msg = is_nested_list_of_type([[1, 2], ["a", "b"]], int, 2)
+        self.assertEqual(is_valid, False)
         # Depth 3
+        is_valid, err_msg = is_nested_list_of_type([[[1], [2]], [[3], [4]]], list, 3)
+        self.assertEqual(is_valid, False)
+        self.assertEqual(is_nested_list_of_type([[[1], [2]], [[3], [4]]], int, 3), (True, ""))
+        # Test case: Depth is negative, expecting ValueError
         with self.assertRaises(ValueError):
             is_nested_list_of_type([1, 2], int, -1)
         self.assertAlmostEqual(score.precision, 0.73, places=2)
         self.assertAlmostEqual(score.recall[0], 0.63, places=2)
+    def test_none_input(self):
+        def _call_metric(preds, refs, tok, mul_ref):
+            with self.assertRaises(ValueError) as ctx:
+                _ = self.semf1_metric.compute(
+                    predictions=preds,
+                    references=refs,
+                    tokenize_sentences=tok,
+                    multi_references=mul_ref,
+                    gpu=False,
+                    batch_size=32,
+                    verbose=False,
+                    model_type="use",
+                )
+            print(f"Raised ValueError with message: {ctx.exception}")
+            return ""
+        # # Case 1: tokenize_sentences = True, multi_references = True
+        tokenize_sentences = True
+        multi_references = True
+        predictions = [
+            "I go to School. You are stupid.",
+            "I go to School. You are stupid.",
+        ]
+        references = [
+            ["I am", "I am"],
+            [None, "I am"],
+        ]
+        print(f"Case I\n{_call_metric(predictions, references, tokenize_sentences, multi_references)}\n")
+        # Case 2: tokenize_sentences = False, multi_references = True
+        tokenize_sentences = False
+        multi_references = True
+        predictions = [
+            ["I go to School.", "You are stupid."],
+            ["I go to School.", "You are stupid."],
+        ]
+        references = [
+            [["I am", "I am"], [None, "I am"]],
+            [[None, "I am"]],
+        ]
+        print(f"Case II\n{_call_metric(predictions, references, tokenize_sentences, multi_references)}\n")
+        # Case 3: tokenize_sentences = True, multi_references = False
+        tokenize_sentences = True
+        multi_references = False
+        predictions = [
+            None,
+            "I go to School. You are stupid.",
+        ]
+        references = [
+            "I am. I am.",
+            "I am. I am.",
+        ]
+        print(f"Case III\n{_call_metric(predictions, references, tokenize_sentences, multi_references)}\n")
+        # Case 4: tokenize_sentences = False, multi_references = False
+        # This is taken care by the library itself
+        tokenize_sentences = False
+        multi_references = False
+        predictions = [
+            ["I go to School.", None],
+            ["I go to School.", "You are stupid."],
+        ]
+        references = [
+            ["I am.", "I am."],
+            ["I am.", "I am."],
+        ]
+        print(f"Case IV\n{_call_metric(predictions, references, tokenize_sentences, multi_references)}\n")
+    def test_empty_input(self):
+        predictions = [""]
+        references = ["I go to School. You are stupid."]
+        scores = self.semf1_metric.compute(
+            predictions=predictions,
+            references=references,
+        )
+        print(scores)
+        # # Test with Gibberish Cases
+        # predictions = ["lth cgezawrxretxdr", "dsfgsdfhsdfh"]
+        # references = ["dzfgzeWfnAfse", "dtjsrtzerZJSEWr"]
+        # scores = self.semf1_metric.compute(
+        #     predictions=predictions,
+        #     references=references,
+        # )
+        # print(scores)
 class TestCosineSimilarity(unittest.TestCase):
 if __name__ == '__main__':
     unittest.main(verbosity=2)

utils.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import statistics
 import sys
 from dataclasses import dataclass, field
-from typing import List, Union
 import torch
 from numpy.typing import NDArray
@@ -149,44 +151,65 @@ def slice_embeddings(embeddings: NDArray, num_sentences: NumSentencesType) -> Em
         raise TypeError(f"Incorrect Type for {num_sentences=}")
-def is_nested_list_of_type(lst_obj, element_type, depth: int) -> bool:
     """
-        Check if the given object is a nested list of a specific type up to a specified depth.
-        Args:
-        - lst_obj: The object to check, expected to be a list or a single element.
-        - element_type: The type that each element in the nested list should match.
-        - depth (int): The depth of nesting to check. Must be non-negative.
-        Returns:
-        - bool: True if lst_obj is a nested list of the specified type up to the given depth, False otherwise.
-        Raises:
-        - ValueError: If depth is negative.
-        Example:
-        ```python
-        # Test cases
-        is_nested_list_of_type("test", str, 0)  # Returns True
-        is_nested_list_of_type([1, 2, 3], str, 0)  # Returns False
-        is_nested_list_of_type(["apple", "banana"], str, 1)  # Returns True
-        is_nested_list_of_type([[1, 2], [3, 4]], int, 2)  # Returns True
-        is_nested_list_of_type([[1, 2], ["a", "b"]], int, 2)  # Returns False
-        is_nested_list_of_type([[[1], [2]], [[3], [4]]], int, 3)  # Returns True
-        ```
-        Explanation:
-        - The function checks if `lst_obj` is a nested list of elements of type `element_type` up to `depth` levels deep.
-        - If `depth` is 0, it checks if `lst_obj` itself is of type `element_type`.
-        - If `depth` is greater than 0, it recursively checks each level of nesting to ensure all elements match `element_type`.
-        - Raises a `ValueError` if `depth` is negative, as depth must be a non-negative integer.
-    """
-    if depth == 0:
-        return isinstance(lst_obj, element_type)
-    elif depth > 0:
-        return isinstance(lst_obj, list) and all(is_nested_list_of_type(item, element_type, depth - 1) for item in lst_obj)
-    else:
-        raise ValueError("Depth can't be negative")
 def flatten_list(nested_list: list) -> list:
@@ -220,6 +243,27 @@ def compute_f1(p: float, r: float, eps=sys.float_info.epsilon) -> float:
     return f1
 @dataclass
 class Scores:
     """

 import statistics
+import string
 import sys
 from dataclasses import dataclass, field
+from typing import List, Union, Tuple
+import nltk
 import torch
 from numpy.typing import NDArray
         raise TypeError(f"Incorrect Type for {num_sentences=}")
+def is_nested_list_of_type(lst_obj, element_type, depth: int) -> Tuple[bool, str]:
     """
+    Check if the given object is a nested list of a specific type up to a specified depth.
+    Args:
+    - lst_obj: The object to check, expected to be a list or a single element.
+    - element_type: The type that each element in the nested list should match.
+    - depth (int): The depth of nesting to check. Must be non-negative.
+    Returns:
+    - Tuple[bool, str]: A tuple containing:
+        - A boolean indicating if lst_obj is a nested list of the specified type up to the given depth.
+        - A string containing an error message if the check fails, or an empty string if the check passes.
+    Raises:
+    - ValueError: If depth is negative.
+    Example:
+    ```python
+    # Test cases
+    is_nested_list_of_type("test", str, 0)  # Returns (True, "")
+    is_nested_list_of_type([1, 2, 3], str, 0)  # Returns (False, "Element is of type int, expected type str.")
+    is_nested_list_of_type(["apple", "banana"], str, 1)  # Returns (True, "")
+    is_nested_list_of_type([[1, 2], [3, 4]], int, 2)  # Returns (True, "")
+    is_nested_list_of_type([[1, 2], ["a", "b"]], int, 2)  # Returns (False, "Element at index 1 is of incorrect type.")
+    is_nested_list_of_type([[[1], [2]], [[3], [4]]], int, 3)  # Returns (True, "")
+    ```
+    Explanation:
+    - The function checks if `lst_obj` is a nested list of elements of type `element_type` up to `depth` levels deep.
+    - If `depth` is 0, it checks if `lst_obj` itself is of type `element_type`.
+    - If `depth` is greater than 0, it recursively checks each level of nesting to ensure all elements match
+    `element_type`.
+    - Returns a tuple containing a boolean and an error message. The boolean is `True` if `lst_obj` matches the
+    criteria, `False` otherwise. The error message provides details if the check fails.
+    - Raises a `ValueError` if `depth` is negative, as depth must be a non-negative integer.
+    """
+    orig_depth = depth
+    def _is_nested_list_of_type(lst_o, e_type, d) -> Tuple[bool, str]:
+        if d == 0:
+            if isinstance(lst_o, e_type):
+                return True, ""
+            else:
+                return False, f"Element is of type {type(lst_o).__name__}, expected type {e_type.__name__}."
+        elif d > 0:
+            if isinstance(lst_o, list):
+                for i, item in enumerate(lst_o):
+                    is_valid, err = _is_nested_list_of_type(item, e_type, d - 1)
+                    if not is_valid:
+                        msg = f"Element at index {i} has incorrect type.\n{err}" if d == orig_depth else err
+                        return False, msg
+                return True, ""
+            else:
+                return False, f"Object is not a list but {type(lst_o)}."
+        else:
+            raise ValueError("Depth can't be negative")
+    return _is_nested_list_of_type(lst_obj, element_type, depth)
 def flatten_list(nested_list: list) -> list:
     return f1
+def sent_tokenize(text: str) -> List[str]:
+    """
+    Tokenizes the input text into a list of sentences.
+    This function uses the NLTK library's sentence tokenizer to split the input
+    text into individual sentences. Leading and trailing whitespace is removed
+    from the input text before tokenization. If the input text is empty or consists
+    only of whitespace, a list containing an empty string is returned.
+    Args:
+        text (str): The input text to be tokenized into sentences.
+    Returns:
+        List[str]: A list of sentences tokenized from the input text.
+    """
+    text = text.strip()
+    if text == "":
+        return [""]
+    return [sent.strip() for sent in nltk.tokenize.sent_tokenize(text)]
 @dataclass
 class Scores:
     """