Spaces:
Sleeping
Sleeping
Minor
Browse files- README.md +3 -7
- __init__.py +0 -0
- encoder_models.py +22 -22
- semf1.py +5 -3
- tests.py +4 -4
README.md
CHANGED
@@ -57,14 +57,10 @@ for score in results:
|
|
57 |
Sem-F1 also accepts multiple optional arguments:
|
58 |
|
59 |
|
60 |
-
- `model_type (str)`: Model to use for encoding sentences. Options: ['pv1', 'stsb', 'use']
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
- `use` - [Universal Sentence Encoder](https://huggingface.co/sentence-transformers/use-cmlm-multilingual) (Default)
|
65 |
-
|
66 |
-
Furthermore, you can use any model on Huggingface/SentenceTransformer that is supported by SentenceTransformer
|
67 |
-
such as `all-mpnet-base-v2` or `roberta-base`
|
68 |
|
69 |
- `tokenize_sentences (bool)`: Flag to indicate whether to tokenize the sentences in the input documents. Default: True.
|
70 |
- `multi_references (bool)`: Flag to indicate whether multiple references are provided. Default: False.
|
|
|
57 |
Sem-F1 also accepts multiple optional arguments:
|
58 |
|
59 |
|
60 |
+
- `model_type (str)`: Model to use for encoding sentences. Options: ['pv1' ([paraphrase-distilroberta-base-v1](https://huggingface.co/sentence-transformers/paraphrase-distilroberta-base-v1)), 'stsb' ([stsb-roberta-large](https://huggingface.co/sentence-transformers/stsb-roberta-large)), 'use' ([Universal Sentence Encoder](https://huggingface.co/sentence-transformers/use-cmlm-multilingual)) (Default)]
|
61 |
|
62 |
+
Furthermore, you can use any model on Huggingface/SentenceTransformer that is supported by SentenceTransformer
|
63 |
+
such as `all-mpnet-base-v2` or `roberta-base`
|
|
|
|
|
|
|
|
|
64 |
|
65 |
- `tokenize_sentences (bool)`: Flag to indicate whether to tokenize the sentences in the input documents. Default: True.
|
66 |
- `multi_references (bool)`: Flag to indicate whether multiple references are provided. Default: False.
|
__init__.py
ADDED
File without changes
|
encoder_models.py
CHANGED
@@ -72,28 +72,28 @@ class SBertEncoder(Encoder):
|
|
72 |
|
73 |
def get_encoder(model_name: str, device: ENCODER_DEVICE_TYPE, batch_size: int, verbose: bool) -> Encoder:
|
74 |
"""
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
|
98 |
try:
|
99 |
encoder = SBertEncoder(model_name, device, batch_size, verbose)
|
|
|
72 |
|
73 |
def get_encoder(model_name: str, device: ENCODER_DEVICE_TYPE, batch_size: int, verbose: bool) -> Encoder:
|
74 |
"""
|
75 |
+
Get the encoder instance based on the specified model name.
|
76 |
+
|
77 |
+
Args:
|
78 |
+
model_name (str): Name of the model to instantiate
|
79 |
+
Options:
|
80 |
+
paraphrase-distilroberta-base-v1,
|
81 |
+
stsb-roberta-large,
|
82 |
+
sentence-transformers/use-cmlm-multilingual
|
83 |
+
Furthermore, you can use any model on Huggingface/SentenceTransformer that is supported by
|
84 |
+
SentenceTransformer.
|
85 |
+
|
86 |
+
device (Union[str, int, List[Union[str, int]]): Device specification for the encoder
|
87 |
+
(e.g., "cuda", 0 for GPU, "cpu").
|
88 |
+
batch_size (int): Batch size for encoding.
|
89 |
+
verbose (bool): Whether to print verbose information during encoder initialization.
|
90 |
+
|
91 |
+
Returns:
|
92 |
+
Encoder: Instance of the selected encoder based on the model_name.
|
93 |
+
|
94 |
+
Raises:
|
95 |
+
EnvironmentError/RuntimeError: If an unsupported model_name is provided.
|
96 |
+
"""
|
97 |
|
98 |
try:
|
99 |
encoder = SBertEncoder(model_name, device, batch_size, verbose)
|
semf1.py
CHANGED
@@ -11,8 +11,10 @@
|
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
-
|
15 |
-
|
|
|
|
|
16 |
|
17 |
from typing import List, Optional, Tuple
|
18 |
|
@@ -141,7 +143,7 @@ Examples:
|
|
141 |
["I go to School. You are stupid."],
|
142 |
["I love outdoor sports."],
|
143 |
]
|
144 |
-
>>> metric = evaluate.load("semf1")
|
145 |
>>> results = metric.compute(predictions=predictions, references=references)
|
146 |
>>> for score in results:
|
147 |
>>> print(f"Precision: {score.precision}, Recall: {score.recall}, F1: {score.f1}")
|
|
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
+
"""
|
15 |
+
Sem-F1 metric
|
16 |
+
Author: Naman Bansal
|
17 |
+
"""
|
18 |
|
19 |
from typing import List, Optional, Tuple
|
20 |
|
|
|
143 |
["I go to School. You are stupid."],
|
144 |
["I love outdoor sports."],
|
145 |
]
|
146 |
+
>>> metric = evaluate.load("nbansal/semf1")
|
147 |
>>> results = metric.compute(predictions=predictions, references=references)
|
148 |
>>> for score in results:
|
149 |
>>> print(f"Precision: {score.precision}, Recall: {score.recall}, F1: {score.f1}")
|
tests.py
CHANGED
@@ -8,9 +8,9 @@ from numpy.testing import assert_almost_equal
|
|
8 |
from sentence_transformers import SentenceTransformer
|
9 |
from sklearn.metrics.pairwise import cosine_similarity
|
10 |
|
11 |
-
from encoder_models import SBertEncoder, get_encoder
|
12 |
-
from semf1 import SemF1, _compute_cosine_similarity, _validate_input_format
|
13 |
-
from utils import get_gpu, slice_embeddings, is_nested_list_of_type, flatten_list, compute_f1, Scores
|
14 |
|
15 |
|
16 |
class TestUtils(unittest.TestCase):
|
@@ -509,4 +509,4 @@ class TestValidateInputFormat(unittest.TestCase):
|
|
509 |
|
510 |
if __name__ == '__main__':
|
511 |
unittest.main(verbosity=2)
|
512 |
-
|
|
|
8 |
from sentence_transformers import SentenceTransformer
|
9 |
from sklearn.metrics.pairwise import cosine_similarity
|
10 |
|
11 |
+
from .encoder_models import SBertEncoder, get_encoder
|
12 |
+
from .semf1 import SemF1, _compute_cosine_similarity, _validate_input_format
|
13 |
+
from .utils import get_gpu, slice_embeddings, is_nested_list_of_type, flatten_list, compute_f1, Scores
|
14 |
|
15 |
|
16 |
class TestUtils(unittest.TestCase):
|
|
|
509 |
|
510 |
if __name__ == '__main__':
|
511 |
unittest.main(verbosity=2)
|
512 |
+
|