ronald commited on
Commit
d7885fe
1 Parent(s): 9609d1c
Files changed (1) hide show
  1. ccl_win.py +23 -6
ccl_win.py CHANGED
@@ -21,6 +21,8 @@ import getpass
21
  import pdb
22
  import os
23
  import torch
 
 
24
 
25
  # TODO: Add BibTeX citation
26
  _CITATION = """\
@@ -113,7 +115,7 @@ class ccl_win(evaluate.Measurement):
113
 
114
 
115
 
116
- def _compute(self, predictions, dataset, batch_size: int = 16, device=None):
117
  """Returns the scores"""
118
  MODEL_CACHE_DIR = "/home/rcardena/.cache/huggingface/"
119
  BASEDIR = "/gfs/team/nlp/users/rcardena/tools/new_evals/ccl_win"
@@ -129,6 +131,12 @@ class ccl_win(evaluate.Measurement):
129
  else:
130
  device = "cuda" if torch.cuda.is_available() else "cpu"
131
 
 
 
 
 
 
 
132
  tokenizer = AutoTokenizer.from_pretrained("roberta-large")
133
 
134
  model = AutoModelForSequenceClassification.from_pretrained(os.path.join(BASEDIR,dataset))
@@ -148,12 +156,21 @@ class ccl_win(evaluate.Measurement):
148
  probs = torch.softmax(output.logits,dim=-1).detach().cpu().numpy()
149
  scores.extend(probs[:,0].tolist())
150
  #
151
- results = []
152
  offset = 0
153
  for _len in len_by_sample:
154
- results.append( float(np.mean(scores[offset:offset+_len])) )
 
 
 
 
155
  offset += _len
156
  #
157
- return {
158
- "loc_coh_ccl": results,
159
- }
 
 
 
 
 
 
21
  import pdb
22
  import os
23
  import torch
24
+ from rouge_score import scoring
25
+
26
 
27
  # TODO: Add BibTeX citation
28
  _CITATION = """\
 
115
 
116
 
117
 
118
+ def _compute(self, predictions, dataset, batch_size: int = 16, device=None, use_aggregator=True):
119
  """Returns the scores"""
120
  MODEL_CACHE_DIR = "/home/rcardena/.cache/huggingface/"
121
  BASEDIR = "/gfs/team/nlp/users/rcardena/tools/new_evals/ccl_win"
 
131
  else:
132
  device = "cuda" if torch.cuda.is_available() else "cpu"
133
 
134
+ results = []
135
+ aggregator = None
136
+ if use_aggregator:
137
+ np.random.seed(42)
138
+ aggregator = scoring.BootstrapAggregator()
139
+
140
  tokenizer = AutoTokenizer.from_pretrained("roberta-large")
141
 
142
  model = AutoModelForSequenceClassification.from_pretrained(os.path.join(BASEDIR,dataset))
 
156
  probs = torch.softmax(output.logits,dim=-1).detach().cpu().numpy()
157
  scores.extend(probs[:,0].tolist())
158
  #
159
+
160
  offset = 0
161
  for _len in len_by_sample:
162
+ score = float(np.mean(scores[offset:offset+_len]))
163
+ if use_aggregator:
164
+ aggregator.add_score({"loc_coh_ccl": score})
165
+ else:
166
+ results.append(score)
167
  offset += _len
168
  #
169
+ outres = {}
170
+ if use_aggregator:
171
+ res = aggregator.aggregate()
172
+ for k in res: outres[k] = res[k].mid
173
+ else:
174
+ outres = {"loc_coh_ccl": results}
175
+
176
+ return outres