Spaces:

bowdbeg
/

matching_series

Sleeping

App Files Files Community

bowdbeg commited on Jun 24, 2024

Commit

8eea7aa

1 Parent(s): d2b22fa

change dtype as float32

Browse files

Files changed (2) hide show

__main__.py +13 -5
matching_series.py +12 -6

__main__.py CHANGED Viewed

@@ -16,16 +16,19 @@ parser.add_argument("references", type=str, help="Path to the numpy array contai
 parser.add_argument("--output", type=str, help="Path to the output file")
 parser.add_argument("--batch_size", type=int, help="Batch size to use for the computation")
 parser.add_argument("--num_process", type=int, help="Batch size to use for the computation", default=1)
 args = parser.parse_args()
 if not args.predictions or not args.references:
     raise ValueError("You must provide the path to the predictions and references numpy arrays")
-predictions = np.load(args.predictions)
-references = np.load(args.references)
-predictions = predictions[:1000]
-references = references[:1000]
 logger.info(f"predictions shape: {predictions.shape}")
 logger.info(f"references shape: {references.shape}")
@@ -36,7 +39,12 @@ s = time.time()
 metric = matching_series.matching_series()
 # metric = evaluate.load("matching_series.py")
 results = metric.compute(
-    predictions=predictions, references=references, batch_size=args.batch_size, num_process=args.num_process
 )
 logger.info(f"Time taken: {time.time() - s}")

 parser.add_argument("--output", type=str, help="Path to the output file")
 parser.add_argument("--batch_size", type=int, help="Batch size to use for the computation")
 parser.add_argument("--num_process", type=int, help="Batch size to use for the computation", default=1)
+parser.add_argument("--debug", action="store_true", help="Debug mode")
 args = parser.parse_args()
 if not args.predictions or not args.references:
     raise ValueError("You must provide the path to the predictions and references numpy arrays")
+predictions = np.load(args.predictions).astype(np.float32)
+references = np.load(args.references).astype(np.float32)
+if args.debug:
+    predictions = predictions[:1000]
+    references = references[:1000]
 logger.info(f"predictions shape: {predictions.shape}")
 logger.info(f"references shape: {references.shape}")
 metric = matching_series.matching_series()
 # metric = evaluate.load("matching_series.py")
 results = metric.compute(
+    predictions=predictions,
+    references=references,
+    batch_size=args.batch_size,
+    num_process=args.num_process,
+    return_each_features=True,
+    return_coverages=True,
 )
 logger.info(f"Time taken: {time.time() - s}")

matching_series.py CHANGED Viewed

@@ -141,6 +141,7 @@ class matching_series(evaluate.Metric):
         return_each_features: bool = False,
         return_coverages: bool = False,
         return_all: bool = False,
     ):
         """
         Compute the scores of the module given the predictions and references
@@ -159,8 +160,8 @@ class matching_series(evaluate.Metric):
             return_matching = True
             return_each_features = True
             return_coverages = True
-        predictions = np.array(predictions)
-        references = np.array(references)
         if predictions.shape[1:] != references.shape[1:]:
             raise ValueError(
                 "The number of features in the predictions and references should be the same. predictions: {}, references: {}".format(
@@ -173,10 +174,8 @@ class matching_series(evaluate.Metric):
         # distance between predictions and references for all example combinations for each features
         # shape: (num_generation, num_reference, num_features)
         if batch_size is not None:
             if num_process > 1:
-                distance = np.zeros((len(predictions), len(references), predictions.shape[-1]))
                 idxs = [
                     (i, j)
                     for i in range(0, len(predictions) + batch_size, batch_size)
@@ -195,7 +194,7 @@ class matching_series(evaluate.Metric):
                         distance[i : i + batch_size, j : j + batch_size] = d
             else:
-                distance = np.zeros((len(predictions), len(references), predictions.shape[-1]))
                 # iterate over the predictions and references in batches
                 for i in range(0, len(predictions) + batch_size, batch_size):
                     for j in range(0, len(references) + batch_size, batch_size):
@@ -227,6 +226,7 @@ class matching_series(evaluate.Metric):
         recall_distance = distance_mean[best_match_inv, np.arange(len(best_match_inv))].mean()
         f1_distance = 2 / (1 / precision_distance + 1 / recall_distance)
         # matching precision, recall and f1
         matching_recall = np.unique(best_match).size / len(best_match_inv)
@@ -237,6 +237,7 @@ class matching_series(evaluate.Metric):
         precision_distance_features = []
         recall_distance_features = []
         f1_distance_features = []
         matching_precision_features = []
         matching_recall_features = []
         matching_f1_features = []
@@ -251,10 +252,12 @@ class matching_series(evaluate.Metric):
             best_match_inv_f = np.argmin(distance_f, axis=0)
             recall_distance_f = distance_f[best_match_inv_f, np.arange(len(best_match_inv_f))].mean()
             f1_distance_f = 2 / (1 / precision_distance_f + 1 / recall_distance_f)
             precision_distance_features.append(precision_distance_f)
             recall_distance_features.append(recall_distance_f)
             f1_distance_features.append(f1_distance_f)
             index_distance_features.append(index_distance_f)
             matching_recall_f = np.unique(best_match_f).size / len(best_match_f)
             matching_precision_f = np.unique(best_match_inv_f).size / len(best_match_inv_f)
@@ -270,6 +273,7 @@ class matching_series(evaluate.Metric):
         macro_precision_distance = statistics.mean(precision_distance_features)
         macro_recall_distance = statistics.mean(recall_distance_features)
         macro_f1_distance = statistics.mean(f1_distance_features)
         macro_index_distance = statistics.mean(index_distance_features)
         macro_matching_precision = statistics.mean(matching_precision_features)
@@ -285,10 +289,12 @@ class matching_series(evaluate.Metric):
             "precision_distance": precision_distance,
             "f1_distance": f1_distance,
             "recall_distance": recall_distance,
             "index_distance": index_distance,
             "macro_precision_distance": macro_precision_distance,
             "macro_recall_distance": macro_recall_distance,
             "macro_f1_distance": macro_f1_distance,
             "macro_index_distance": macro_index_distance,
             "matching_precision": matching_precision,
             "matching_recall": matching_recall,

         return_each_features: bool = False,
         return_coverages: bool = False,
         return_all: bool = False,
+        dtype=np.float32,
     ):
         """
         Compute the scores of the module given the predictions and references
             return_matching = True
             return_each_features = True
             return_coverages = True
+        predictions = np.array(predictions).astype(dtype)
+        references = np.array(references).astype(dtype)
         if predictions.shape[1:] != references.shape[1:]:
             raise ValueError(
                 "The number of features in the predictions and references should be the same. predictions: {}, references: {}".format(
         # distance between predictions and references for all example combinations for each features
         # shape: (num_generation, num_reference, num_features)
         if batch_size is not None:
             if num_process > 1:
+                distance = np.zeros((len(predictions), len(references), predictions.shape[-1]), dtype=dtype)
                 idxs = [
                     (i, j)
                     for i in range(0, len(predictions) + batch_size, batch_size)
                         distance[i : i + batch_size, j : j + batch_size] = d
             else:
+                distance = np.zeros((len(predictions), len(references), predictions.shape[-1]), dtype=dtype)
                 # iterate over the predictions and references in batches
                 for i in range(0, len(predictions) + batch_size, batch_size):
                     for j in range(0, len(references) + batch_size, batch_size):
         recall_distance = distance_mean[best_match_inv, np.arange(len(best_match_inv))].mean()
         f1_distance = 2 / (1 / precision_distance + 1 / recall_distance)
+        mean_distance = (precision_distance + recall_distance) / 2
         # matching precision, recall and f1
         matching_recall = np.unique(best_match).size / len(best_match_inv)
         precision_distance_features = []
         recall_distance_features = []
         f1_distance_features = []
+        mean_distance_features = []
         matching_precision_features = []
         matching_recall_features = []
         matching_f1_features = []
             best_match_inv_f = np.argmin(distance_f, axis=0)
             recall_distance_f = distance_f[best_match_inv_f, np.arange(len(best_match_inv_f))].mean()
             f1_distance_f = 2 / (1 / precision_distance_f + 1 / recall_distance_f)
+            mean_distance_f = (precision_distance_f + recall_distance_f) / 2
             precision_distance_features.append(precision_distance_f)
             recall_distance_features.append(recall_distance_f)
             f1_distance_features.append(f1_distance_f)
             index_distance_features.append(index_distance_f)
+            mean_distance_features.append(mean_distance_f)
             matching_recall_f = np.unique(best_match_f).size / len(best_match_f)
             matching_precision_f = np.unique(best_match_inv_f).size / len(best_match_inv_f)
         macro_precision_distance = statistics.mean(precision_distance_features)
         macro_recall_distance = statistics.mean(recall_distance_features)
         macro_f1_distance = statistics.mean(f1_distance_features)
+        macro_mean_distance = statistics.mean(mean_distance_features)
         macro_index_distance = statistics.mean(index_distance_features)
         macro_matching_precision = statistics.mean(matching_precision_features)
             "precision_distance": precision_distance,
             "f1_distance": f1_distance,
             "recall_distance": recall_distance,
+            "mean_distance": mean_distance,
             "index_distance": index_distance,
             "macro_precision_distance": macro_precision_distance,
             "macro_recall_distance": macro_recall_distance,
             "macro_f1_distance": macro_f1_distance,
+            "macro_mean_distance": macro_mean_distance,
             "macro_index_distance": macro_index_distance,
             "matching_precision": matching_precision,
             "matching_recall": matching_recall,