Elron commited on
Commit
43978ec
·
verified ·
1 Parent(s): 785b9b9

Upload metrics.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. metrics.py +26 -10
metrics.py CHANGED
@@ -4,6 +4,7 @@ import uuid
4
  from abc import ABC, abstractmethod
5
  from collections import Counter
6
  from dataclasses import field
 
7
  from typing import Any, Dict, Generator, List, Optional, Tuple
8
 
9
  import evaluate
@@ -1329,14 +1330,13 @@ class Perplexity(BulkInstanceMetric):
1329
 
1330
  :return: the likelihood of generating text Y_i after text X_i = P(Y_i|X_i) for every i.
1331
  """
1332
- # make sure all references are singletons
1333
- assert all(len(ref) == 1 for ref in references)
 
 
 
 
1334
 
1335
- # add the instruction as prefix
1336
- predictions = [f"{self.perplexity_prompt} {x}" for x in predictions]
1337
- references = [y[0] for y in references]
1338
-
1339
- # check if the model is enc-dec or dec-only to use the right perplexity computation
1340
  from transformers import AutoConfig
1341
 
1342
  config = AutoConfig.from_pretrained(self.model_name, trust_remote_code=True)
@@ -1348,10 +1348,24 @@ class Perplexity(BulkInstanceMetric):
1348
 
1349
  # compute P(Q|P) and store in queue
1350
  scores = lm.compute_lm(
1351
- source=predictions, target=references, batch_size=self.batch_size
1352
  )
1353
 
1354
- return [{self.main_score: score} for score in scores]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1355
 
1356
  class AbstractLM(ABC):
1357
  def __init__(self, model_name):
@@ -1363,7 +1377,9 @@ class Perplexity(BulkInstanceMetric):
1363
  self.model = self.model_class().from_pretrained(self.model_name)
1364
  self.is_cuda = torch.cuda.is_available()
1365
 
1366
- def compute_lm(self, source, target, batch_size: int) -> List[float]:
 
 
1367
  import torch
1368
 
1369
  scores = []
 
4
  from abc import ABC, abstractmethod
5
  from collections import Counter
6
  from dataclasses import field
7
+ from statistics import mean
8
  from typing import Any, Dict, Generator, List, Optional, Tuple
9
 
10
  import evaluate
 
1330
 
1331
  :return: the likelihood of generating text Y_i after text X_i = P(Y_i|X_i) for every i.
1332
  """
1333
+ sources = []
1334
+ targets = []
1335
+ for prediction, instance_references in zip(predictions, references):
1336
+ for instance_reference in instance_references:
1337
+ sources.append(f"{self.perplexity_prompt} {prediction}")
1338
+ targets.append(instance_reference)
1339
 
 
 
 
 
 
1340
  from transformers import AutoConfig
1341
 
1342
  config = AutoConfig.from_pretrained(self.model_name, trust_remote_code=True)
 
1348
 
1349
  # compute P(Q|P) and store in queue
1350
  scores = lm.compute_lm(
1351
+ source=sources, target=targets, batch_size=self.batch_size
1352
  )
1353
 
1354
+ index = 0
1355
+ all_instances_scores = []
1356
+ for instance_references in references:
1357
+ instance_scores = {}
1358
+ instance_scores_list = []
1359
+ for _ in range(len(instance_references)):
1360
+ instance_scores_list.append(scores[index])
1361
+ index += 1
1362
+ instance_scores["reference_scores"] = instance_scores_list
1363
+ instance_scores[self.main_score] = mean(instance_scores_list)
1364
+
1365
+ instance_scores[self.main_score] = mean(instance_scores_list)
1366
+ all_instances_scores.append(instance_scores)
1367
+
1368
+ return all_instances_scores
1369
 
1370
  class AbstractLM(ABC):
1371
  def __init__(self, model_name):
 
1377
  self.model = self.model_class().from_pretrained(self.model_name)
1378
  self.is_cuda = torch.cuda.is_available()
1379
 
1380
+ def compute_lm(
1381
+ self, source: List[str], target: List[str], batch_size: int
1382
+ ) -> List[float]:
1383
  import torch
1384
 
1385
  scores = []