Spaces:
Sleeping
Sleeping
change dtype as float32
Browse files- __main__.py +13 -5
- matching_series.py +12 -6
__main__.py
CHANGED
@@ -16,16 +16,19 @@ parser.add_argument("references", type=str, help="Path to the numpy array contai
|
|
16 |
parser.add_argument("--output", type=str, help="Path to the output file")
|
17 |
parser.add_argument("--batch_size", type=int, help="Batch size to use for the computation")
|
18 |
parser.add_argument("--num_process", type=int, help="Batch size to use for the computation", default=1)
|
|
|
19 |
args = parser.parse_args()
|
20 |
|
21 |
if not args.predictions or not args.references:
|
22 |
raise ValueError("You must provide the path to the predictions and references numpy arrays")
|
23 |
|
24 |
-
predictions = np.load(args.predictions)
|
25 |
-
references = np.load(args.references)
|
26 |
|
27 |
-
predictions = predictions
|
28 |
-
references = references
|
|
|
|
|
|
|
|
|
29 |
|
30 |
logger.info(f"predictions shape: {predictions.shape}")
|
31 |
logger.info(f"references shape: {references.shape}")
|
@@ -36,7 +39,12 @@ s = time.time()
|
|
36 |
metric = matching_series.matching_series()
|
37 |
# metric = evaluate.load("matching_series.py")
|
38 |
results = metric.compute(
|
39 |
-
predictions=predictions,
|
|
|
|
|
|
|
|
|
|
|
40 |
)
|
41 |
logger.info(f"Time taken: {time.time() - s}")
|
42 |
|
|
|
16 |
parser.add_argument("--output", type=str, help="Path to the output file")
|
17 |
parser.add_argument("--batch_size", type=int, help="Batch size to use for the computation")
|
18 |
parser.add_argument("--num_process", type=int, help="Batch size to use for the computation", default=1)
|
19 |
+
parser.add_argument("--debug", action="store_true", help="Debug mode")
|
20 |
args = parser.parse_args()
|
21 |
|
22 |
if not args.predictions or not args.references:
|
23 |
raise ValueError("You must provide the path to the predictions and references numpy arrays")
|
24 |
|
|
|
|
|
25 |
|
26 |
+
predictions = np.load(args.predictions).astype(np.float32)
|
27 |
+
references = np.load(args.references).astype(np.float32)
|
28 |
+
|
29 |
+
if args.debug:
|
30 |
+
predictions = predictions[:1000]
|
31 |
+
references = references[:1000]
|
32 |
|
33 |
logger.info(f"predictions shape: {predictions.shape}")
|
34 |
logger.info(f"references shape: {references.shape}")
|
|
|
39 |
metric = matching_series.matching_series()
|
40 |
# metric = evaluate.load("matching_series.py")
|
41 |
results = metric.compute(
|
42 |
+
predictions=predictions,
|
43 |
+
references=references,
|
44 |
+
batch_size=args.batch_size,
|
45 |
+
num_process=args.num_process,
|
46 |
+
return_each_features=True,
|
47 |
+
return_coverages=True,
|
48 |
)
|
49 |
logger.info(f"Time taken: {time.time() - s}")
|
50 |
|
matching_series.py
CHANGED
@@ -141,6 +141,7 @@ class matching_series(evaluate.Metric):
|
|
141 |
return_each_features: bool = False,
|
142 |
return_coverages: bool = False,
|
143 |
return_all: bool = False,
|
|
|
144 |
):
|
145 |
"""
|
146 |
Compute the scores of the module given the predictions and references
|
@@ -159,8 +160,8 @@ class matching_series(evaluate.Metric):
|
|
159 |
return_matching = True
|
160 |
return_each_features = True
|
161 |
return_coverages = True
|
162 |
-
predictions = np.array(predictions)
|
163 |
-
references = np.array(references)
|
164 |
if predictions.shape[1:] != references.shape[1:]:
|
165 |
raise ValueError(
|
166 |
"The number of features in the predictions and references should be the same. predictions: {}, references: {}".format(
|
@@ -173,10 +174,8 @@ class matching_series(evaluate.Metric):
|
|
173 |
# distance between predictions and references for all example combinations for each features
|
174 |
# shape: (num_generation, num_reference, num_features)
|
175 |
if batch_size is not None:
|
176 |
-
|
177 |
if num_process > 1:
|
178 |
-
distance = np.zeros((len(predictions), len(references), predictions.shape[-1]))
|
179 |
-
|
180 |
idxs = [
|
181 |
(i, j)
|
182 |
for i in range(0, len(predictions) + batch_size, batch_size)
|
@@ -195,7 +194,7 @@ class matching_series(evaluate.Metric):
|
|
195 |
distance[i : i + batch_size, j : j + batch_size] = d
|
196 |
|
197 |
else:
|
198 |
-
distance = np.zeros((len(predictions), len(references), predictions.shape[-1]))
|
199 |
# iterate over the predictions and references in batches
|
200 |
for i in range(0, len(predictions) + batch_size, batch_size):
|
201 |
for j in range(0, len(references) + batch_size, batch_size):
|
@@ -227,6 +226,7 @@ class matching_series(evaluate.Metric):
|
|
227 |
recall_distance = distance_mean[best_match_inv, np.arange(len(best_match_inv))].mean()
|
228 |
|
229 |
f1_distance = 2 / (1 / precision_distance + 1 / recall_distance)
|
|
|
230 |
|
231 |
# matching precision, recall and f1
|
232 |
matching_recall = np.unique(best_match).size / len(best_match_inv)
|
@@ -237,6 +237,7 @@ class matching_series(evaluate.Metric):
|
|
237 |
precision_distance_features = []
|
238 |
recall_distance_features = []
|
239 |
f1_distance_features = []
|
|
|
240 |
matching_precision_features = []
|
241 |
matching_recall_features = []
|
242 |
matching_f1_features = []
|
@@ -251,10 +252,12 @@ class matching_series(evaluate.Metric):
|
|
251 |
best_match_inv_f = np.argmin(distance_f, axis=0)
|
252 |
recall_distance_f = distance_f[best_match_inv_f, np.arange(len(best_match_inv_f))].mean()
|
253 |
f1_distance_f = 2 / (1 / precision_distance_f + 1 / recall_distance_f)
|
|
|
254 |
precision_distance_features.append(precision_distance_f)
|
255 |
recall_distance_features.append(recall_distance_f)
|
256 |
f1_distance_features.append(f1_distance_f)
|
257 |
index_distance_features.append(index_distance_f)
|
|
|
258 |
|
259 |
matching_recall_f = np.unique(best_match_f).size / len(best_match_f)
|
260 |
matching_precision_f = np.unique(best_match_inv_f).size / len(best_match_inv_f)
|
@@ -270,6 +273,7 @@ class matching_series(evaluate.Metric):
|
|
270 |
macro_precision_distance = statistics.mean(precision_distance_features)
|
271 |
macro_recall_distance = statistics.mean(recall_distance_features)
|
272 |
macro_f1_distance = statistics.mean(f1_distance_features)
|
|
|
273 |
macro_index_distance = statistics.mean(index_distance_features)
|
274 |
|
275 |
macro_matching_precision = statistics.mean(matching_precision_features)
|
@@ -285,10 +289,12 @@ class matching_series(evaluate.Metric):
|
|
285 |
"precision_distance": precision_distance,
|
286 |
"f1_distance": f1_distance,
|
287 |
"recall_distance": recall_distance,
|
|
|
288 |
"index_distance": index_distance,
|
289 |
"macro_precision_distance": macro_precision_distance,
|
290 |
"macro_recall_distance": macro_recall_distance,
|
291 |
"macro_f1_distance": macro_f1_distance,
|
|
|
292 |
"macro_index_distance": macro_index_distance,
|
293 |
"matching_precision": matching_precision,
|
294 |
"matching_recall": matching_recall,
|
|
|
141 |
return_each_features: bool = False,
|
142 |
return_coverages: bool = False,
|
143 |
return_all: bool = False,
|
144 |
+
dtype=np.float32,
|
145 |
):
|
146 |
"""
|
147 |
Compute the scores of the module given the predictions and references
|
|
|
160 |
return_matching = True
|
161 |
return_each_features = True
|
162 |
return_coverages = True
|
163 |
+
predictions = np.array(predictions).astype(dtype)
|
164 |
+
references = np.array(references).astype(dtype)
|
165 |
if predictions.shape[1:] != references.shape[1:]:
|
166 |
raise ValueError(
|
167 |
"The number of features in the predictions and references should be the same. predictions: {}, references: {}".format(
|
|
|
174 |
# distance between predictions and references for all example combinations for each features
|
175 |
# shape: (num_generation, num_reference, num_features)
|
176 |
if batch_size is not None:
|
|
|
177 |
if num_process > 1:
|
178 |
+
distance = np.zeros((len(predictions), len(references), predictions.shape[-1]), dtype=dtype)
|
|
|
179 |
idxs = [
|
180 |
(i, j)
|
181 |
for i in range(0, len(predictions) + batch_size, batch_size)
|
|
|
194 |
distance[i : i + batch_size, j : j + batch_size] = d
|
195 |
|
196 |
else:
|
197 |
+
distance = np.zeros((len(predictions), len(references), predictions.shape[-1]), dtype=dtype)
|
198 |
# iterate over the predictions and references in batches
|
199 |
for i in range(0, len(predictions) + batch_size, batch_size):
|
200 |
for j in range(0, len(references) + batch_size, batch_size):
|
|
|
226 |
recall_distance = distance_mean[best_match_inv, np.arange(len(best_match_inv))].mean()
|
227 |
|
228 |
f1_distance = 2 / (1 / precision_distance + 1 / recall_distance)
|
229 |
+
mean_distance = (precision_distance + recall_distance) / 2
|
230 |
|
231 |
# matching precision, recall and f1
|
232 |
matching_recall = np.unique(best_match).size / len(best_match_inv)
|
|
|
237 |
precision_distance_features = []
|
238 |
recall_distance_features = []
|
239 |
f1_distance_features = []
|
240 |
+
mean_distance_features = []
|
241 |
matching_precision_features = []
|
242 |
matching_recall_features = []
|
243 |
matching_f1_features = []
|
|
|
252 |
best_match_inv_f = np.argmin(distance_f, axis=0)
|
253 |
recall_distance_f = distance_f[best_match_inv_f, np.arange(len(best_match_inv_f))].mean()
|
254 |
f1_distance_f = 2 / (1 / precision_distance_f + 1 / recall_distance_f)
|
255 |
+
mean_distance_f = (precision_distance_f + recall_distance_f) / 2
|
256 |
precision_distance_features.append(precision_distance_f)
|
257 |
recall_distance_features.append(recall_distance_f)
|
258 |
f1_distance_features.append(f1_distance_f)
|
259 |
index_distance_features.append(index_distance_f)
|
260 |
+
mean_distance_features.append(mean_distance_f)
|
261 |
|
262 |
matching_recall_f = np.unique(best_match_f).size / len(best_match_f)
|
263 |
matching_precision_f = np.unique(best_match_inv_f).size / len(best_match_inv_f)
|
|
|
273 |
macro_precision_distance = statistics.mean(precision_distance_features)
|
274 |
macro_recall_distance = statistics.mean(recall_distance_features)
|
275 |
macro_f1_distance = statistics.mean(f1_distance_features)
|
276 |
+
macro_mean_distance = statistics.mean(mean_distance_features)
|
277 |
macro_index_distance = statistics.mean(index_distance_features)
|
278 |
|
279 |
macro_matching_precision = statistics.mean(matching_precision_features)
|
|
|
289 |
"precision_distance": precision_distance,
|
290 |
"f1_distance": f1_distance,
|
291 |
"recall_distance": recall_distance,
|
292 |
+
"mean_distance": mean_distance,
|
293 |
"index_distance": index_distance,
|
294 |
"macro_precision_distance": macro_precision_distance,
|
295 |
"macro_recall_distance": macro_recall_distance,
|
296 |
"macro_f1_distance": macro_f1_distance,
|
297 |
+
"macro_mean_distance": macro_mean_distance,
|
298 |
"macro_index_distance": macro_index_distance,
|
299 |
"matching_precision": matching_precision,
|
300 |
"matching_recall": matching_recall,
|