ronald commited on
Commit
89d4922
1 Parent(s): 68677f3
Files changed (1) hide show
  1. ccl_win.py +6 -6
ccl_win.py CHANGED
@@ -113,7 +113,7 @@ class ccl_win(evaluate.Measurement):
113
 
114
 
115
 
116
- def _compute(self, predictions, dataset, device=None):
117
  """Returns the scores"""
118
  MODEL_CACHE_DIR = "/home/rcardena/.cache/huggingface/"
119
  if getpass.getuser() == "s1987051":
@@ -132,17 +132,17 @@ class ccl_win(evaluate.Measurement):
132
  model = transformers.AutoModelForSequenceClassification.from_pretrained(f"./{dataset}/", num_labels=2,cache_dir=MODEL_CACHE_DIR)
133
  model.to(device)
134
 
135
- pred_list,len_by_sample = preprocess_adjacent_window(preds)
136
 
137
  scores = []
138
- for text in pred_list:
139
- sents = text.lower().split("\n")
140
- strides = ["\n".join(sents[i:i+WINDOW_SIZE]) for i in range(0,len(sents),WINDOW_SIZE)]
141
  tinput = tokenizer(strides,padding=True,truncation=True,max_length=512,return_tensors="pt")
142
  tinput = {k:v.to(device) for k,v in tinput.items()}
143
  output = model(**tinput)
144
  probs = torch.softmax(output.logits,dim=-1).detach().cpu().numpy()
145
- scores.append(probs[:,0].mean())
146
  #
147
  results = []
148
  offset = 0
 
113
 
114
 
115
 
116
+ def _compute(self, predictions, dataset, batch_size: int = 16, device=None):
117
  """Returns the scores"""
118
  MODEL_CACHE_DIR = "/home/rcardena/.cache/huggingface/"
119
  if getpass.getuser() == "s1987051":
 
132
  model = transformers.AutoModelForSequenceClassification.from_pretrained(f"./{dataset}/", num_labels=2,cache_dir=MODEL_CACHE_DIR)
133
  model.to(device)
134
 
135
+ pred_list,len_by_sample = self.preprocess_adjacent_window(preds)
136
 
137
  scores = []
138
+ n_preds = len(pred_list)
139
+ for b in range(0,n_preds,batch_size):
140
+ strides = [x.lower() for x in pred_list[b:b+batch_size]]
141
  tinput = tokenizer(strides,padding=True,truncation=True,max_length=512,return_tensors="pt")
142
  tinput = {k:v.to(device) for k,v in tinput.items()}
143
  output = model(**tinput)
144
  probs = torch.softmax(output.logits,dim=-1).detach().cpu().numpy()
145
+ scores.extend(probs[:,0].tolist())
146
  #
147
  results = []
148
  offset = 0