File size: 7,407 Bytes
52dffaa 96815eb 62a5638 96815eb 62a5638 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
---
license: mit
datasets:
- ZachW/GPT-BookSum
language:
- en
metrics:
- accuracy
base_model:
- FacebookAI/roberta-base
pipeline_tag: zero-shot-classification
tags:
- pacing
- concreteness
- text-evalutaion
---
# Pacing-Judge
[\[project page\]](https://github.com/YichenZW/Pacing)
## Overview
This is the **concreteness evaluator** developed in the paper [Improving Pacing in Long-Form Story Planning](https://arxiv.org/abs/2311.04459) (EMNLP 2023).
## Quick Start
A simple usage: Input a pair of texts (text_ex_1, text_ex_2) with \<sep\> as the separator to the model. The output is whether the first or the second is more concrete.
```python
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer
model_name = "ZachW/pacing-judge"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
text_ex_1 = "The Duke then focused on securing his power and looking to future threats. The Duke eventually turned his attention to acquiring Tuscany but struggled."
text_ex_2 = "Lord Bacon mentioned his book \"The History of Henry VII,\" in the conversation noting that King Charles had conquered Naples without resistance, implying that the conquest was like a dream."
inputs = tokenizer(text_ex_1 + " <sep> " + text_ex_2, return_tensors="pt")
outputs = model(**inputs)
output = int(F.softmax(outputs.logits, dim=1)[:, 0].squeeze(-1).detach().cpu().numpy() > 0.5)
print(f"Output Binary = {output}")
if output:
print("The second text is more concrete.")
else:
print("The first text is more concrete.")
```
## Usage
We have designed this Ranker, which enables fair pairwise comparison (independent of sequence order) and ranking among candidates. We **recommend** using our model via the Ranker.
```python
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer
class Ranker:
def __init__(self):
print(f"*** Loading Model from Huggingface ***")
model_name = "ZachW/pacing-judge"
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
def compare(self, t1, t2):
text_pair = [t1 + ' <sep> ' + t2, t2 + ' <sep> ' + t1]
pair_dataset = self.tokenizer(text_pair, padding=True, truncation=True, return_tensors="pt")
score = self.run_model(pair_dataset)
if score < 0.5:
return 0 # first is more concrete
else:
return 1 # second is more concrete
def compare_logits(self, t1, t2):
text_pair = [t1 + ' <sep> ' + t2, t2 + ' <sep> ' + t1]
pair_dataset = self.tokenizer(text_pair, padding=True, truncation=True, return_tensors="pt")
score = self.run_model(pair_dataset)
return score
def run_model(self, dataset):
outputs = self.model(**dataset)
scores = F.softmax(outputs.logits, dim=1)[:, 0].squeeze(-1).detach().cpu().numpy()
aver_score = (scores[0] + (1 - scores[1]))/2
return aver_score
def rank(self, texts_list): # input a list of texts
def quicksort(arr):
if len(arr) <= 1:
return arr
else:
pivot = arr[0]
less = []
greater = []
for t in arr[1:]:
cmp = self.compare(pivot, t)
if cmp == 0:
less.append(t)
elif cmp == 1:
greater.append(t)
return quicksort(greater) + [pivot] + quicksort(less)
return quicksort(texts_list)
# most concrete -> lest concrete
def rank_idx(self, texts_list): # input a list of texts
def quicksort(arr):
if len(arr) <= 1:
return arr
else:
pivot = arr[0]
less = []
greater = []
for t in arr[1:]:
cmp = self.compare(texts_list[pivot], texts_list[t])
if cmp == 0:
less.append(t)
elif cmp == 1:
greater.append(t)
return quicksort(greater) + [pivot] + quicksort(less)
return quicksort(list(range(len(texts_list))))
def rank_idx_conpletely(self, texts_list):
n = len(texts_list)
texts_idx = list(range(n))
scores = [[0] * n for _ in range(n)]
self_score = [0] * n
for i in texts_idx:
scores[i][i] = self.compare_logits(texts_list[i], texts_list[i])
self_score[i] = scores[i][i]
for j in texts_idx:
if j < i:
scores[i][j] = 1 - scores[j][i]
continue
if j == i:
continue
scores[i][j] = self.compare_logits(texts_list[i], texts_list[j])
# average score is, smaller is more concrete
average_score = [ sum(s)/len(s) for s in scores]
output_score = [ a + 0.5 - s for a, s in zip(average_score, self_score)]
sorted_indices = sorted(range(len(output_score)), key=lambda x: output_score[x])
return sorted_indices
def rank_idx_conpletely_wlogits(self, texts_list, logger=None):
n = len(texts_list)
texts_idx = list(range(n))
scores = [[0] * n for _ in range(n)]
self_score = [0] * n
for i in texts_idx:
scores[i][i] = self.compare_logits(texts_list[i], texts_list[i])
self_score[i] = scores[i][i]
for j in texts_idx:
if j < i:
scores[i][j] = 1 - scores[j][i]
continue
if j == i:
continue
scores[i][j] = self.compare_logits(texts_list[i], texts_list[j])
# average score is, smaller is more concrete
average_score = [ sum(s)/len(s) for s in scores]
output_score = [ a + 0.5 - s for a, s in zip(average_score, self_score)]
sorted_indices = sorted(range(len(output_score)), key=lambda x: output_score[x])
return sorted_indices, output_score
def compare_w_neighbors(self, t, cand):
score = 0.0
for c in cand:
score += self.compare_logits(t, c)
score /= len(cand)
return score
```
```python
text_ex_1 = "The Duke then focused on securing his power and looking to future threats. The Duke eventually turned his attention to acquiring Tuscany but struggled."
text_ex_2 = "Lord Bacon mentioned his book \"The History of Henry VII,\" in the conversation noting that King Charles had conquered Naples without resistance, implying that the conquest was like a dream."
ranker = Ranker()
output = ranker.compare(text_ex_1, text_ex_2) # it is equvilant to (text_ex_2, text_ex_1)
print(f"Output Binary = {output}")
if output:
print("The second text is more concrete.")
else:
print("The first text is more concrete.")
output_logits = ranker.compare_logits(text_ex_1, text_ex_2)
print(f"Output Logits = {output_logits:.4f}")
```
**For more details on the evaluator usage (e.g., pacing planning and control in generation) and training process, please refer to our [paper](https://arxiv.org/abs/2311.04459)!**
|