Spaces:

ronaldahmed
/

ccl_win

Sleeping

ronald commited on Apr 26, 2023

Commit

89d4922

1 Parent(s): 68677f3

coh mech

Files changed (1) hide show

ccl_win.py CHANGED Viewed

@@ -113,7 +113,7 @@ class ccl_win(evaluate.Measurement):
-    def _compute(self, predictions, dataset, device=None):
         """Returns the scores"""
         MODEL_CACHE_DIR = "/home/rcardena/.cache/huggingface/"
         if getpass.getuser() == "s1987051":
@@ -132,17 +132,17 @@ class ccl_win(evaluate.Measurement):
         model = transformers.AutoModelForSequenceClassification.from_pretrained(f"./{dataset}/", num_labels=2,cache_dir=MODEL_CACHE_DIR)
         model.to(device)
-        pred_list,len_by_sample = preprocess_adjacent_window(preds)
         scores = []
-        for text in pred_list:
-            sents = text.lower().split("\n")
-            strides = ["\n".join(sents[i:i+WINDOW_SIZE]) for i in range(0,len(sents),WINDOW_SIZE)]
             tinput = tokenizer(strides,padding=True,truncation=True,max_length=512,return_tensors="pt")
             tinput = {k:v.to(device) for k,v in tinput.items()}
             output = model(**tinput)
             probs = torch.softmax(output.logits,dim=-1).detach().cpu().numpy()
-            scores.append(probs[:,0].mean())
         #
         results = []
         offset = 0

+    def _compute(self, predictions, dataset, batch_size: int = 16, device=None):
         """Returns the scores"""
         MODEL_CACHE_DIR = "/home/rcardena/.cache/huggingface/"
         if getpass.getuser() == "s1987051":
         model = transformers.AutoModelForSequenceClassification.from_pretrained(f"./{dataset}/", num_labels=2,cache_dir=MODEL_CACHE_DIR)
         model.to(device)
+        pred_list,len_by_sample = self.preprocess_adjacent_window(preds)
         scores = []
+        n_preds = len(pred_list)
+        for b in range(0,n_preds,batch_size):
+            strides = [x.lower() for x in pred_list[b:b+batch_size]]
             tinput = tokenizer(strides,padding=True,truncation=True,max_length=512,return_tensors="pt")
             tinput = {k:v.to(device) for k,v in tinput.items()}
             output = model(**tinput)
             probs = torch.softmax(output.logits,dim=-1).detach().cpu().numpy()
+            scores.extend(probs[:,0].tolist())
         #
         results = []
         offset = 0