Spaces:

BridgeAI-Lab
/

SemF1

Running

nbansal commited on Jul 8

Commit

6deb98d

•

1 Parent(s): 6e1f1ed

Minor

Files changed (5) hide show

README.md CHANGED Viewed

@@ -57,14 +57,10 @@ for score in results:
 Sem-F1 also accepts multiple optional arguments:
-- `model_type (str)`: Model to use for encoding sentences. Options: ['pv1', 'stsb', 'use']
-    - `pv1` - [paraphrase-distilroberta-base-v1](https://huggingface.co/sentence-transformers/paraphrase-distilroberta-base-v1)
-    - `stsb` - [stsb-roberta-large](https://huggingface.co/sentence-transformers/stsb-roberta-large)
-    - `use` - [Universal Sentence Encoder](https://huggingface.co/sentence-transformers/use-cmlm-multilingual) (Default)
-      Furthermore, you can use any model on Huggingface/SentenceTransformer that is supported by SentenceTransformer
-       such as `all-mpnet-base-v2` or `roberta-base`
 - `tokenize_sentences (bool)`: Flag to indicate whether to tokenize the sentences in the input documents. Default: True.
 - `multi_references (bool)`: Flag to indicate whether multiple references are provided. Default: False.

 Sem-F1 also accepts multiple optional arguments:
+- `model_type (str)`: Model to use for encoding sentences. Options: ['pv1' ([paraphrase-distilroberta-base-v1](https://huggingface.co/sentence-transformers/paraphrase-distilroberta-base-v1)), 'stsb' ([stsb-roberta-large](https://huggingface.co/sentence-transformers/stsb-roberta-large)), 'use' ([Universal Sentence Encoder](https://huggingface.co/sentence-transformers/use-cmlm-multilingual)) (Default)]
+  Furthermore, you can use any model on Huggingface/SentenceTransformer that is supported by SentenceTransformer
+  such as `all-mpnet-base-v2` or `roberta-base`
 - `tokenize_sentences (bool)`: Flag to indicate whether to tokenize the sentences in the input documents. Default: True.
 - `multi_references (bool)`: Flag to indicate whether multiple references are provided. Default: False.

__init__.py ADDED Viewed

File without changes

encoder_models.py CHANGED Viewed

@@ -72,28 +72,28 @@ class SBertEncoder(Encoder):
 def get_encoder(model_name: str, device: ENCODER_DEVICE_TYPE, batch_size: int, verbose: bool) -> Encoder:
     """
-        Get the encoder instance based on the specified model name.
-        Args:
-            model_name (str): Name of the model to instantiate
-                Options:
-                    paraphrase-distilroberta-base-v1,
-                    stsb-roberta-large,
-                    sentence-transformers/use-cmlm-multilingual
-                Furthermore, you can use any model on Huggingface/SentenceTransformer that is supported by
-                SentenceTransformer.
-            device (Union[str, int, List[Union[str, int]]): Device specification for the encoder
-                (e.g., "cuda", 0 for GPU, "cpu").
-            batch_size (int): Batch size for encoding.
-            verbose (bool): Whether to print verbose information during encoder initialization.
-        Returns:
-            Encoder: Instance of the selected encoder based on the model_name.
-        Raises:
-            EnvironmentError/RuntimeError: If an unsupported model_name is provided.
-        """
     try:
         encoder = SBertEncoder(model_name, device, batch_size, verbose)

 def get_encoder(model_name: str, device: ENCODER_DEVICE_TYPE, batch_size: int, verbose: bool) -> Encoder:
     """
+    Get the encoder instance based on the specified model name.
+    Args:
+        model_name (str): Name of the model to instantiate
+            Options:
+                paraphrase-distilroberta-base-v1,
+                stsb-roberta-large,
+                sentence-transformers/use-cmlm-multilingual
+            Furthermore, you can use any model on Huggingface/SentenceTransformer that is supported by
+            SentenceTransformer.
+        device (Union[str, int, List[Union[str, int]]): Device specification for the encoder
+            (e.g., "cuda", 0 for GPU, "cpu").
+        batch_size (int): Batch size for encoding.
+        verbose (bool): Whether to print verbose information during encoder initialization.
+    Returns:
+        Encoder: Instance of the selected encoder based on the model_name.
+    Raises:
+        EnvironmentError/RuntimeError: If an unsupported model_name is provided.
+    """
     try:
         encoder = SBertEncoder(model_name, device, batch_size, verbose)

semf1.py CHANGED Viewed

@@ -11,8 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# TODO: Add test cases, Remove tokenize_sentences flag since it can be determined from the input itself.
-"""Sem-F1 metric"""
 from typing import List, Optional, Tuple
@@ -141,7 +143,7 @@ Examples:
         ["I go to School. You are stupid."],
         ["I love outdoor sports."],
     ]
-    >>> metric = evaluate.load("semf1")
     >>> results = metric.compute(predictions=predictions, references=references)
     >>> for score in results:
     >>>     print(f"Precision: {score.precision}, Recall: {score.recall}, F1: {score.f1}")

 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Sem-F1 metric
+Author: Naman Bansal
+"""
 from typing import List, Optional, Tuple
         ["I go to School. You are stupid."],
         ["I love outdoor sports."],
     ]
+    >>> metric = evaluate.load("nbansal/semf1")
     >>> results = metric.compute(predictions=predictions, references=references)
     >>> for score in results:
     >>>     print(f"Precision: {score.precision}, Recall: {score.recall}, F1: {score.f1}")

tests.py CHANGED Viewed

@@ -8,9 +8,9 @@ from numpy.testing import assert_almost_equal
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
-from encoder_models import SBertEncoder, get_encoder
-from semf1 import SemF1, _compute_cosine_similarity, _validate_input_format
-from utils import get_gpu, slice_embeddings, is_nested_list_of_type, flatten_list, compute_f1, Scores
 class TestUtils(unittest.TestCase):
@@ -509,4 +509,4 @@ class TestValidateInputFormat(unittest.TestCase):
 if __name__ == '__main__':
     unittest.main(verbosity=2)
-    # unittest.main()

 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
+from .encoder_models import SBertEncoder, get_encoder
+from .semf1 import SemF1, _compute_cosine_similarity, _validate_input_format
+from .utils import get_gpu, slice_embeddings, is_nested_list_of_type, flatten_list, compute_f1, Scores
 class TestUtils(unittest.TestCase):
 if __name__ == '__main__':
     unittest.main(verbosity=2)