codebyzeb commited on
Commit
313d779
1 Parent(s): 35ec54c

Update segmentation_scores.py

Browse files
Files changed (1) hide show
  1. segmentation_scores.py +7 -7
segmentation_scores.py CHANGED
@@ -38,10 +38,10 @@ Calculates how good are predicted segmentations, using boundary, token and type
38
  Args:
39
  predictions: list of segmented utterances to score. Each predictions
40
  should be a string with phonemes separated by spaces and estimated word boundaries
41
- denoted by the token ';eword'.
42
  references: list of segmented utterances to score. Each predictions
43
  should be a string with phonemes separated by spaces and gold word boundaries
44
- denoted by the token ';eword'.
45
  Returns:
46
  type_fscore: lexicon f1 score
47
  type_precision: lexicon precision
@@ -57,7 +57,7 @@ Returns:
57
  boundary_noedge_recall: boundary recall, excluding utterance boundaries
58
  Examples:
59
  >>> segmentation_scores = evaluate.load("transformersegmentation/segmentation_scores")
60
- >>> results = segmentation_scores.compute(references=["w ɛ ɹ ;eword ɪ z ;eword ð ɪ s ;eword", "l ɪ ɾ əl ;eword aɪ z ;eword"], predictions=["w ɛ ɹ ;eword ɪ z ;eword ð ɪ s ;eword", "l ɪ ɾ əl ;eword aɪ z ;eword"])
61
  >>> print(results)
62
  {'type_fscore': 1.0, 'type_precision': 1.0, 'type_recall': 1.0, 'token_fscore': 1.0, 'token_precision': 1.0, 'token_recall': 1.0, 'boundary_all_fscore': 1.0, 'boundary_all_precision': 1.0, 'boundary_all_recall': 1.0, 'boundary_noedge_fscore': 1.0, 'boundary_noedge_precision': 1.0, 'boundary_noedge_recall': 1.0}
63
  """
@@ -227,11 +227,11 @@ class segmentation_scores(evaluate.Metric):
227
  for utt in (utt for utt in text if utt.strip()):
228
  # list of phones in the utterance with word seperator removed
229
  phone_in_utterance = [
230
- phone for phone in utt.split(" ") if phone != ";eword"
231
  ]
232
  words_in_utterance = (
233
  "".join(
234
- " " if phone == ";eword" else phone for phone in utt.split(" ")
235
  )
236
  .strip()
237
  .split(" ")
@@ -252,9 +252,9 @@ class segmentation_scores(evaluate.Metric):
252
  Parameters
253
  ----------
254
  predictions : sequence of str
255
- A suite of word utterances, each string using ';eword' as as word separator.
256
  references : sequence of str
257
- A suite of word utterances, each string using ';eword' as as word separator.
258
 
259
  Returns
260
  -------
 
38
  Args:
39
  predictions: list of segmented utterances to score. Each predictions
40
  should be a string with phonemes separated by spaces and estimated word boundaries
41
+ denoted by the token 'WORD_BOUNDARY'.
42
  references: list of segmented utterances to score. Each predictions
43
  should be a string with phonemes separated by spaces and gold word boundaries
44
+ denoted by the token 'WORD_BOUNDARY'.
45
  Returns:
46
  type_fscore: lexicon f1 score
47
  type_precision: lexicon precision
 
57
  boundary_noedge_recall: boundary recall, excluding utterance boundaries
58
  Examples:
59
  >>> segmentation_scores = evaluate.load("transformersegmentation/segmentation_scores")
60
+ >>> results = segmentation_scores.compute(references=["w ɛ ɹ WORD_BOUNDARY ɪ z WORD_BOUNDARY ð ɪ s WORD_BOUNDARY", "l ɪ ɾ əl WORD_BOUNDARY aɪ z WORD_BOUNDARY"], predictions=["w ɛ ɹ WORD_BOUNDARY ɪ z WORD_BOUNDARY ð ɪ s WORD_BOUNDARY", "l ɪ ɾ əl WORD_BOUNDARY aɪ z WORD_BOUNDARY"])
61
  >>> print(results)
62
  {'type_fscore': 1.0, 'type_precision': 1.0, 'type_recall': 1.0, 'token_fscore': 1.0, 'token_precision': 1.0, 'token_recall': 1.0, 'boundary_all_fscore': 1.0, 'boundary_all_precision': 1.0, 'boundary_all_recall': 1.0, 'boundary_noedge_fscore': 1.0, 'boundary_noedge_precision': 1.0, 'boundary_noedge_recall': 1.0}
63
  """
 
227
  for utt in (utt for utt in text if utt.strip()):
228
  # list of phones in the utterance with word seperator removed
229
  phone_in_utterance = [
230
+ phone for phone in utt.split(" ") if phone != "WORD_BOUNDARY"
231
  ]
232
  words_in_utterance = (
233
  "".join(
234
+ " " if phone == "WORD_BOUNDARY" else phone for phone in utt.split(" ")
235
  )
236
  .strip()
237
  .split(" ")
 
252
  Parameters
253
  ----------
254
  predictions : sequence of str
255
+ A suite of word utterances, each string using 'WORD_BOUNDARY' as as word separator.
256
  references : sequence of str
257
+ A suite of word utterances, each string using 'WORD_BOUNDARY' as as word separator.
258
 
259
  Returns
260
  -------