bowdbeg commited on
Commit
8eea7aa
·
1 Parent(s): d2b22fa

change dtype as float32

Browse files
Files changed (2) hide show
  1. __main__.py +13 -5
  2. matching_series.py +12 -6
__main__.py CHANGED
@@ -16,16 +16,19 @@ parser.add_argument("references", type=str, help="Path to the numpy array contai
16
  parser.add_argument("--output", type=str, help="Path to the output file")
17
  parser.add_argument("--batch_size", type=int, help="Batch size to use for the computation")
18
  parser.add_argument("--num_process", type=int, help="Batch size to use for the computation", default=1)
 
19
  args = parser.parse_args()
20
 
21
  if not args.predictions or not args.references:
22
  raise ValueError("You must provide the path to the predictions and references numpy arrays")
23
 
24
- predictions = np.load(args.predictions)
25
- references = np.load(args.references)
26
 
27
- predictions = predictions[:1000]
28
- references = references[:1000]
 
 
 
 
29
 
30
  logger.info(f"predictions shape: {predictions.shape}")
31
  logger.info(f"references shape: {references.shape}")
@@ -36,7 +39,12 @@ s = time.time()
36
  metric = matching_series.matching_series()
37
  # metric = evaluate.load("matching_series.py")
38
  results = metric.compute(
39
- predictions=predictions, references=references, batch_size=args.batch_size, num_process=args.num_process
 
 
 
 
 
40
  )
41
  logger.info(f"Time taken: {time.time() - s}")
42
 
 
16
  parser.add_argument("--output", type=str, help="Path to the output file")
17
  parser.add_argument("--batch_size", type=int, help="Batch size to use for the computation")
18
  parser.add_argument("--num_process", type=int, help="Batch size to use for the computation", default=1)
19
+ parser.add_argument("--debug", action="store_true", help="Debug mode")
20
  args = parser.parse_args()
21
 
22
  if not args.predictions or not args.references:
23
  raise ValueError("You must provide the path to the predictions and references numpy arrays")
24
 
 
 
25
 
26
+ predictions = np.load(args.predictions).astype(np.float32)
27
+ references = np.load(args.references).astype(np.float32)
28
+
29
+ if args.debug:
30
+ predictions = predictions[:1000]
31
+ references = references[:1000]
32
 
33
  logger.info(f"predictions shape: {predictions.shape}")
34
  logger.info(f"references shape: {references.shape}")
 
39
  metric = matching_series.matching_series()
40
  # metric = evaluate.load("matching_series.py")
41
  results = metric.compute(
42
+ predictions=predictions,
43
+ references=references,
44
+ batch_size=args.batch_size,
45
+ num_process=args.num_process,
46
+ return_each_features=True,
47
+ return_coverages=True,
48
  )
49
  logger.info(f"Time taken: {time.time() - s}")
50
 
matching_series.py CHANGED
@@ -141,6 +141,7 @@ class matching_series(evaluate.Metric):
141
  return_each_features: bool = False,
142
  return_coverages: bool = False,
143
  return_all: bool = False,
 
144
  ):
145
  """
146
  Compute the scores of the module given the predictions and references
@@ -159,8 +160,8 @@ class matching_series(evaluate.Metric):
159
  return_matching = True
160
  return_each_features = True
161
  return_coverages = True
162
- predictions = np.array(predictions)
163
- references = np.array(references)
164
  if predictions.shape[1:] != references.shape[1:]:
165
  raise ValueError(
166
  "The number of features in the predictions and references should be the same. predictions: {}, references: {}".format(
@@ -173,10 +174,8 @@ class matching_series(evaluate.Metric):
173
  # distance between predictions and references for all example combinations for each features
174
  # shape: (num_generation, num_reference, num_features)
175
  if batch_size is not None:
176
-
177
  if num_process > 1:
178
- distance = np.zeros((len(predictions), len(references), predictions.shape[-1]))
179
-
180
  idxs = [
181
  (i, j)
182
  for i in range(0, len(predictions) + batch_size, batch_size)
@@ -195,7 +194,7 @@ class matching_series(evaluate.Metric):
195
  distance[i : i + batch_size, j : j + batch_size] = d
196
 
197
  else:
198
- distance = np.zeros((len(predictions), len(references), predictions.shape[-1]))
199
  # iterate over the predictions and references in batches
200
  for i in range(0, len(predictions) + batch_size, batch_size):
201
  for j in range(0, len(references) + batch_size, batch_size):
@@ -227,6 +226,7 @@ class matching_series(evaluate.Metric):
227
  recall_distance = distance_mean[best_match_inv, np.arange(len(best_match_inv))].mean()
228
 
229
  f1_distance = 2 / (1 / precision_distance + 1 / recall_distance)
 
230
 
231
  # matching precision, recall and f1
232
  matching_recall = np.unique(best_match).size / len(best_match_inv)
@@ -237,6 +237,7 @@ class matching_series(evaluate.Metric):
237
  precision_distance_features = []
238
  recall_distance_features = []
239
  f1_distance_features = []
 
240
  matching_precision_features = []
241
  matching_recall_features = []
242
  matching_f1_features = []
@@ -251,10 +252,12 @@ class matching_series(evaluate.Metric):
251
  best_match_inv_f = np.argmin(distance_f, axis=0)
252
  recall_distance_f = distance_f[best_match_inv_f, np.arange(len(best_match_inv_f))].mean()
253
  f1_distance_f = 2 / (1 / precision_distance_f + 1 / recall_distance_f)
 
254
  precision_distance_features.append(precision_distance_f)
255
  recall_distance_features.append(recall_distance_f)
256
  f1_distance_features.append(f1_distance_f)
257
  index_distance_features.append(index_distance_f)
 
258
 
259
  matching_recall_f = np.unique(best_match_f).size / len(best_match_f)
260
  matching_precision_f = np.unique(best_match_inv_f).size / len(best_match_inv_f)
@@ -270,6 +273,7 @@ class matching_series(evaluate.Metric):
270
  macro_precision_distance = statistics.mean(precision_distance_features)
271
  macro_recall_distance = statistics.mean(recall_distance_features)
272
  macro_f1_distance = statistics.mean(f1_distance_features)
 
273
  macro_index_distance = statistics.mean(index_distance_features)
274
 
275
  macro_matching_precision = statistics.mean(matching_precision_features)
@@ -285,10 +289,12 @@ class matching_series(evaluate.Metric):
285
  "precision_distance": precision_distance,
286
  "f1_distance": f1_distance,
287
  "recall_distance": recall_distance,
 
288
  "index_distance": index_distance,
289
  "macro_precision_distance": macro_precision_distance,
290
  "macro_recall_distance": macro_recall_distance,
291
  "macro_f1_distance": macro_f1_distance,
 
292
  "macro_index_distance": macro_index_distance,
293
  "matching_precision": matching_precision,
294
  "matching_recall": matching_recall,
 
141
  return_each_features: bool = False,
142
  return_coverages: bool = False,
143
  return_all: bool = False,
144
+ dtype=np.float32,
145
  ):
146
  """
147
  Compute the scores of the module given the predictions and references
 
160
  return_matching = True
161
  return_each_features = True
162
  return_coverages = True
163
+ predictions = np.array(predictions).astype(dtype)
164
+ references = np.array(references).astype(dtype)
165
  if predictions.shape[1:] != references.shape[1:]:
166
  raise ValueError(
167
  "The number of features in the predictions and references should be the same. predictions: {}, references: {}".format(
 
174
  # distance between predictions and references for all example combinations for each features
175
  # shape: (num_generation, num_reference, num_features)
176
  if batch_size is not None:
 
177
  if num_process > 1:
178
+ distance = np.zeros((len(predictions), len(references), predictions.shape[-1]), dtype=dtype)
 
179
  idxs = [
180
  (i, j)
181
  for i in range(0, len(predictions) + batch_size, batch_size)
 
194
  distance[i : i + batch_size, j : j + batch_size] = d
195
 
196
  else:
197
+ distance = np.zeros((len(predictions), len(references), predictions.shape[-1]), dtype=dtype)
198
  # iterate over the predictions and references in batches
199
  for i in range(0, len(predictions) + batch_size, batch_size):
200
  for j in range(0, len(references) + batch_size, batch_size):
 
226
  recall_distance = distance_mean[best_match_inv, np.arange(len(best_match_inv))].mean()
227
 
228
  f1_distance = 2 / (1 / precision_distance + 1 / recall_distance)
229
+ mean_distance = (precision_distance + recall_distance) / 2
230
 
231
  # matching precision, recall and f1
232
  matching_recall = np.unique(best_match).size / len(best_match_inv)
 
237
  precision_distance_features = []
238
  recall_distance_features = []
239
  f1_distance_features = []
240
+ mean_distance_features = []
241
  matching_precision_features = []
242
  matching_recall_features = []
243
  matching_f1_features = []
 
252
  best_match_inv_f = np.argmin(distance_f, axis=0)
253
  recall_distance_f = distance_f[best_match_inv_f, np.arange(len(best_match_inv_f))].mean()
254
  f1_distance_f = 2 / (1 / precision_distance_f + 1 / recall_distance_f)
255
+ mean_distance_f = (precision_distance_f + recall_distance_f) / 2
256
  precision_distance_features.append(precision_distance_f)
257
  recall_distance_features.append(recall_distance_f)
258
  f1_distance_features.append(f1_distance_f)
259
  index_distance_features.append(index_distance_f)
260
+ mean_distance_features.append(mean_distance_f)
261
 
262
  matching_recall_f = np.unique(best_match_f).size / len(best_match_f)
263
  matching_precision_f = np.unique(best_match_inv_f).size / len(best_match_inv_f)
 
273
  macro_precision_distance = statistics.mean(precision_distance_features)
274
  macro_recall_distance = statistics.mean(recall_distance_features)
275
  macro_f1_distance = statistics.mean(f1_distance_features)
276
+ macro_mean_distance = statistics.mean(mean_distance_features)
277
  macro_index_distance = statistics.mean(index_distance_features)
278
 
279
  macro_matching_precision = statistics.mean(matching_precision_features)
 
289
  "precision_distance": precision_distance,
290
  "f1_distance": f1_distance,
291
  "recall_distance": recall_distance,
292
+ "mean_distance": mean_distance,
293
  "index_distance": index_distance,
294
  "macro_precision_distance": macro_precision_distance,
295
  "macro_recall_distance": macro_recall_distance,
296
  "macro_f1_distance": macro_f1_distance,
297
+ "macro_mean_distance": macro_mean_distance,
298
  "macro_index_distance": macro_index_distance,
299
  "matching_precision": matching_precision,
300
  "matching_recall": matching_recall,