lsy641 commited on
Commit
2c7cea9
1 Parent(s): 196af8b
Files changed (1) hide show
  1. distinct.py +1 -7
distinct.py CHANGED
@@ -117,7 +117,6 @@ class distinct(evaluate.Measurement):
117
 
118
  def _compute(self, predictions, dataForVocabCal=None, vocab_size=None, tokenizer=Tokenizer13a(), mode="Expectation-Adjusted-Distinct"):
119
  from nltk.util import ngrams
120
- from nltk.tokenize import WhitespaceTokenizer
121
 
122
 
123
 
@@ -128,9 +127,7 @@ class distinct(evaluate.Measurement):
128
  raise Warning("We've detected that both vocab_size and dataForVocabCal are specified. We will use dataForVocabCal.")
129
  elif mode == "Distinct":
130
  pass
131
-
132
- if tokenizer == "white_space":
133
- tokenizer = WhitespaceTokenizer()
134
 
135
  if mode == "Expectation-Adjusted-Distinct" and dataForVocabCal is not None:
136
  if isinstance(dataForVocabCal, list) and len(dataForVocabCal) > 0 and isinstance(dataForVocabCal[0], str):
@@ -152,10 +149,7 @@ class distinct(evaluate.Measurement):
152
 
153
  for prediction in predictions:
154
  try:
155
- print(prediction)
156
- print(tokenizer.tokenize(prediction))
157
  tokens = list(tokenizer.tokenize(prediction))
158
- print(tokens)
159
  tokens_2grams = list(ngrams(list(tokenizer.tokenize(prediction)), 2, pad_left=True, left_pad_symbol='<s>'))
160
  tokens_3grams = list(ngrams(list(tokenizer.tokenize(prediction)), 3, pad_left=True, left_pad_symbol='<s>'))
161
  except Exception as e:
 
117
 
118
  def _compute(self, predictions, dataForVocabCal=None, vocab_size=None, tokenizer=Tokenizer13a(), mode="Expectation-Adjusted-Distinct"):
119
  from nltk.util import ngrams
 
120
 
121
 
122
 
 
127
  raise Warning("We've detected that both vocab_size and dataForVocabCal are specified. We will use dataForVocabCal.")
128
  elif mode == "Distinct":
129
  pass
130
+
 
 
131
 
132
  if mode == "Expectation-Adjusted-Distinct" and dataForVocabCal is not None:
133
  if isinstance(dataForVocabCal, list) and len(dataForVocabCal) > 0 and isinstance(dataForVocabCal[0], str):
 
149
 
150
  for prediction in predictions:
151
  try:
 
 
152
  tokens = list(tokenizer.tokenize(prediction))
 
153
  tokens_2grams = list(ngrams(list(tokenizer.tokenize(prediction)), 2, pad_left=True, left_pad_symbol='<s>'))
154
  tokens_3grams = list(ngrams(list(tokenizer.tokenize(prediction)), 3, pad_left=True, left_pad_symbol='<s>'))
155
  except Exception as e: