nbansal commited on
Commit
6deb98d
·
1 Parent(s): 6e1f1ed
Files changed (5) hide show
  1. README.md +3 -7
  2. __init__.py +0 -0
  3. encoder_models.py +22 -22
  4. semf1.py +5 -3
  5. tests.py +4 -4
README.md CHANGED
@@ -57,14 +57,10 @@ for score in results:
57
  Sem-F1 also accepts multiple optional arguments:
58
 
59
 
60
- - `model_type (str)`: Model to use for encoding sentences. Options: ['pv1', 'stsb', 'use']
61
 
62
- - `pv1` - [paraphrase-distilroberta-base-v1](https://huggingface.co/sentence-transformers/paraphrase-distilroberta-base-v1)
63
- - `stsb` - [stsb-roberta-large](https://huggingface.co/sentence-transformers/stsb-roberta-large)
64
- - `use` - [Universal Sentence Encoder](https://huggingface.co/sentence-transformers/use-cmlm-multilingual) (Default)
65
-
66
- Furthermore, you can use any model on Huggingface/SentenceTransformer that is supported by SentenceTransformer
67
- such as `all-mpnet-base-v2` or `roberta-base`
68
 
69
  - `tokenize_sentences (bool)`: Flag to indicate whether to tokenize the sentences in the input documents. Default: True.
70
  - `multi_references (bool)`: Flag to indicate whether multiple references are provided. Default: False.
 
57
  Sem-F1 also accepts multiple optional arguments:
58
 
59
 
60
+ - `model_type (str)`: Model to use for encoding sentences. Options: ['pv1' ([paraphrase-distilroberta-base-v1](https://huggingface.co/sentence-transformers/paraphrase-distilroberta-base-v1)), 'stsb' ([stsb-roberta-large](https://huggingface.co/sentence-transformers/stsb-roberta-large)), 'use' ([Universal Sentence Encoder](https://huggingface.co/sentence-transformers/use-cmlm-multilingual)) (Default)]
61
 
62
+ Furthermore, you can use any model on Huggingface/SentenceTransformer that is supported by SentenceTransformer
63
+ such as `all-mpnet-base-v2` or `roberta-base`
 
 
 
 
64
 
65
  - `tokenize_sentences (bool)`: Flag to indicate whether to tokenize the sentences in the input documents. Default: True.
66
  - `multi_references (bool)`: Flag to indicate whether multiple references are provided. Default: False.
__init__.py ADDED
File without changes
encoder_models.py CHANGED
@@ -72,28 +72,28 @@ class SBertEncoder(Encoder):
72
 
73
  def get_encoder(model_name: str, device: ENCODER_DEVICE_TYPE, batch_size: int, verbose: bool) -> Encoder:
74
  """
75
- Get the encoder instance based on the specified model name.
76
-
77
- Args:
78
- model_name (str): Name of the model to instantiate
79
- Options:
80
- paraphrase-distilroberta-base-v1,
81
- stsb-roberta-large,
82
- sentence-transformers/use-cmlm-multilingual
83
- Furthermore, you can use any model on Huggingface/SentenceTransformer that is supported by
84
- SentenceTransformer.
85
-
86
- device (Union[str, int, List[Union[str, int]]): Device specification for the encoder
87
- (e.g., "cuda", 0 for GPU, "cpu").
88
- batch_size (int): Batch size for encoding.
89
- verbose (bool): Whether to print verbose information during encoder initialization.
90
-
91
- Returns:
92
- Encoder: Instance of the selected encoder based on the model_name.
93
-
94
- Raises:
95
- EnvironmentError/RuntimeError: If an unsupported model_name is provided.
96
- """
97
 
98
  try:
99
  encoder = SBertEncoder(model_name, device, batch_size, verbose)
 
72
 
73
  def get_encoder(model_name: str, device: ENCODER_DEVICE_TYPE, batch_size: int, verbose: bool) -> Encoder:
74
  """
75
+ Get the encoder instance based on the specified model name.
76
+
77
+ Args:
78
+ model_name (str): Name of the model to instantiate
79
+ Options:
80
+ paraphrase-distilroberta-base-v1,
81
+ stsb-roberta-large,
82
+ sentence-transformers/use-cmlm-multilingual
83
+ Furthermore, you can use any model on Huggingface/SentenceTransformer that is supported by
84
+ SentenceTransformer.
85
+
86
+ device (Union[str, int, List[Union[str, int]]): Device specification for the encoder
87
+ (e.g., "cuda", 0 for GPU, "cpu").
88
+ batch_size (int): Batch size for encoding.
89
+ verbose (bool): Whether to print verbose information during encoder initialization.
90
+
91
+ Returns:
92
+ Encoder: Instance of the selected encoder based on the model_name.
93
+
94
+ Raises:
95
+ EnvironmentError/RuntimeError: If an unsupported model_name is provided.
96
+ """
97
 
98
  try:
99
  encoder = SBertEncoder(model_name, device, batch_size, verbose)
semf1.py CHANGED
@@ -11,8 +11,10 @@
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
- # TODO: Add test cases, Remove tokenize_sentences flag since it can be determined from the input itself.
15
- """Sem-F1 metric"""
 
 
16
 
17
  from typing import List, Optional, Tuple
18
 
@@ -141,7 +143,7 @@ Examples:
141
  ["I go to School. You are stupid."],
142
  ["I love outdoor sports."],
143
  ]
144
- >>> metric = evaluate.load("semf1")
145
  >>> results = metric.compute(predictions=predictions, references=references)
146
  >>> for score in results:
147
  >>> print(f"Precision: {score.precision}, Recall: {score.recall}, F1: {score.f1}")
 
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
+ """
15
+ Sem-F1 metric
16
+ Author: Naman Bansal
17
+ """
18
 
19
  from typing import List, Optional, Tuple
20
 
 
143
  ["I go to School. You are stupid."],
144
  ["I love outdoor sports."],
145
  ]
146
+ >>> metric = evaluate.load("nbansal/semf1")
147
  >>> results = metric.compute(predictions=predictions, references=references)
148
  >>> for score in results:
149
  >>> print(f"Precision: {score.precision}, Recall: {score.recall}, F1: {score.f1}")
tests.py CHANGED
@@ -8,9 +8,9 @@ from numpy.testing import assert_almost_equal
8
  from sentence_transformers import SentenceTransformer
9
  from sklearn.metrics.pairwise import cosine_similarity
10
 
11
- from encoder_models import SBertEncoder, get_encoder
12
- from semf1 import SemF1, _compute_cosine_similarity, _validate_input_format
13
- from utils import get_gpu, slice_embeddings, is_nested_list_of_type, flatten_list, compute_f1, Scores
14
 
15
 
16
  class TestUtils(unittest.TestCase):
@@ -509,4 +509,4 @@ class TestValidateInputFormat(unittest.TestCase):
509
 
510
  if __name__ == '__main__':
511
  unittest.main(verbosity=2)
512
- # unittest.main()
 
8
  from sentence_transformers import SentenceTransformer
9
  from sklearn.metrics.pairwise import cosine_similarity
10
 
11
+ from .encoder_models import SBertEncoder, get_encoder
12
+ from .semf1 import SemF1, _compute_cosine_similarity, _validate_input_format
13
+ from .utils import get_gpu, slice_embeddings, is_nested_list_of_type, flatten_list, compute_f1, Scores
14
 
15
 
16
  class TestUtils(unittest.TestCase):
 
509
 
510
  if __name__ == '__main__':
511
  unittest.main(verbosity=2)
512
+