Update README.md
Browse files
README.md
CHANGED
@@ -16,5 +16,170 @@ tags:
|
|
16 |
---
|
17 |
# Pacing-Judge
|
18 |
|
|
|
|
|
19 |
## Overview
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
---
|
17 |
# Pacing-Judge
|
18 |
|
19 |
+
[\[project page\]](https://github.com/YichenZW/Pacing)
|
20 |
+
|
21 |
## Overview
|
22 |
+
|
23 |
+
This is the **concreteness evaluator** developed in the paper [Improving Pacing in Long-Form Story Planning](https://arxiv.org/abs/2311.04459) (EMNLP 2023).
|
24 |
+
|
25 |
+
## Quick Start
|
26 |
+
|
27 |
+
A simple usage: Input a pair of texts (text_ex_1, text_ex_2) with \<sep\> as the separator to the model. The output is whether the first or the second is more concrete.
|
28 |
+
|
29 |
+
```python
|
30 |
+
import torch.nn.functional as F
|
31 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
32 |
+
|
33 |
+
model_name = "ZachW/pacing-judge"
|
34 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
35 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
36 |
+
text_ex_1 = "The Duke then focused on securing his power and looking to future threats. The Duke eventually turned his attention to acquiring Tuscany but struggled."
|
37 |
+
text_ex_2 = "Lord Bacon mentioned his book \"The History of Henry VII,\" in the conversation noting that King Charles had conquered Naples without resistance, implying that the conquest was like a dream."
|
38 |
+
inputs = tokenizer(text_ex_1 + " <sep> " + text_ex_2, return_tensors="pt")
|
39 |
+
outputs = model(**inputs)
|
40 |
+
output = int(F.softmax(outputs.logits, dim=1)[:, 0].squeeze(-1).detach().cpu().numpy() > 0.5)
|
41 |
+
print(f"Output Binary = {output}")
|
42 |
+
if output:
|
43 |
+
print("The second text is more concrete.")
|
44 |
+
else:
|
45 |
+
print("The first text is more concrete.")
|
46 |
+
```
|
47 |
+
|
48 |
+
## Usage
|
49 |
+
|
50 |
+
We have designed this Ranker, which enables fair pairwise comparison (independent of sequence order) and ranking among candidates. We **recommend** using our model via the Ranker.
|
51 |
+
|
52 |
+
```python
|
53 |
+
import torch.nn.functional as F
|
54 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
55 |
+
|
56 |
+
class Ranker:
|
57 |
+
def __init__(self):
|
58 |
+
print(f"*** Loading Model from Huggingface ***")
|
59 |
+
model_name = "ZachW/pacing-judge"
|
60 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
61 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
62 |
+
|
63 |
+
def compare(self, t1, t2):
|
64 |
+
text_pair = [t1 + ' <sep> ' + t2, t2 + ' <sep> ' + t1]
|
65 |
+
pair_dataset = self.tokenizer(text_pair, padding=True, truncation=True, return_tensors="pt")
|
66 |
+
score = self.run_model(pair_dataset)
|
67 |
+
if score < 0.5:
|
68 |
+
return 0 # first is more concrete
|
69 |
+
else:
|
70 |
+
return 1 # second is more concrete
|
71 |
+
|
72 |
+
def compare_logits(self, t1, t2):
|
73 |
+
text_pair = [t1 + ' <sep> ' + t2, t2 + ' <sep> ' + t1]
|
74 |
+
pair_dataset = self.tokenizer(text_pair, padding=True, truncation=True, return_tensors="pt")
|
75 |
+
score = self.run_model(pair_dataset)
|
76 |
+
return score
|
77 |
+
|
78 |
+
def run_model(self, dataset):
|
79 |
+
outputs = self.model(**dataset)
|
80 |
+
scores = F.softmax(outputs.logits, dim=1)[:, 0].squeeze(-1).detach().cpu().numpy()
|
81 |
+
aver_score = (scores[0] + (1 - scores[1]))/2
|
82 |
+
return aver_score
|
83 |
+
|
84 |
+
def rank(self, texts_list): # input a list of texts
|
85 |
+
def quicksort(arr):
|
86 |
+
if len(arr) <= 1:
|
87 |
+
return arr
|
88 |
+
else:
|
89 |
+
pivot = arr[0]
|
90 |
+
less = []
|
91 |
+
greater = []
|
92 |
+
for t in arr[1:]:
|
93 |
+
cmp = self.compare(pivot, t)
|
94 |
+
if cmp == 0:
|
95 |
+
less.append(t)
|
96 |
+
elif cmp == 1:
|
97 |
+
greater.append(t)
|
98 |
+
return quicksort(greater) + [pivot] + quicksort(less)
|
99 |
+
return quicksort(texts_list)
|
100 |
+
# most concrete -> lest concrete
|
101 |
+
|
102 |
+
def rank_idx(self, texts_list): # input a list of texts
|
103 |
+
def quicksort(arr):
|
104 |
+
if len(arr) <= 1:
|
105 |
+
return arr
|
106 |
+
else:
|
107 |
+
pivot = arr[0]
|
108 |
+
less = []
|
109 |
+
greater = []
|
110 |
+
for t in arr[1:]:
|
111 |
+
cmp = self.compare(texts_list[pivot], texts_list[t])
|
112 |
+
if cmp == 0:
|
113 |
+
less.append(t)
|
114 |
+
elif cmp == 1:
|
115 |
+
greater.append(t)
|
116 |
+
return quicksort(greater) + [pivot] + quicksort(less)
|
117 |
+
return quicksort(list(range(len(texts_list))))
|
118 |
+
|
119 |
+
def rank_idx_conpletely(self, texts_list):
|
120 |
+
n = len(texts_list)
|
121 |
+
texts_idx = list(range(n))
|
122 |
+
scores = [[0] * n for _ in range(n)]
|
123 |
+
self_score = [0] * n
|
124 |
+
for i in texts_idx:
|
125 |
+
scores[i][i] = self.compare_logits(texts_list[i], texts_list[i])
|
126 |
+
self_score[i] = scores[i][i]
|
127 |
+
for j in texts_idx:
|
128 |
+
if j < i:
|
129 |
+
scores[i][j] = 1 - scores[j][i]
|
130 |
+
continue
|
131 |
+
if j == i:
|
132 |
+
continue
|
133 |
+
scores[i][j] = self.compare_logits(texts_list[i], texts_list[j])
|
134 |
+
# average score is, smaller is more concrete
|
135 |
+
average_score = [ sum(s)/len(s) for s in scores]
|
136 |
+
output_score = [ a + 0.5 - s for a, s in zip(average_score, self_score)]
|
137 |
+
sorted_indices = sorted(range(len(output_score)), key=lambda x: output_score[x])
|
138 |
+
return sorted_indices
|
139 |
+
|
140 |
+
def rank_idx_conpletely_wlogits(self, texts_list, logger=None):
|
141 |
+
n = len(texts_list)
|
142 |
+
texts_idx = list(range(n))
|
143 |
+
scores = [[0] * n for _ in range(n)]
|
144 |
+
self_score = [0] * n
|
145 |
+
for i in texts_idx:
|
146 |
+
scores[i][i] = self.compare_logits(texts_list[i], texts_list[i])
|
147 |
+
self_score[i] = scores[i][i]
|
148 |
+
for j in texts_idx:
|
149 |
+
if j < i:
|
150 |
+
scores[i][j] = 1 - scores[j][i]
|
151 |
+
continue
|
152 |
+
if j == i:
|
153 |
+
continue
|
154 |
+
scores[i][j] = self.compare_logits(texts_list[i], texts_list[j])
|
155 |
+
# average score is, smaller is more concrete
|
156 |
+
average_score = [ sum(s)/len(s) for s in scores]
|
157 |
+
output_score = [ a + 0.5 - s for a, s in zip(average_score, self_score)]
|
158 |
+
sorted_indices = sorted(range(len(output_score)), key=lambda x: output_score[x])
|
159 |
+
return sorted_indices, output_score
|
160 |
+
|
161 |
+
def compare_w_neighbors(self, t, cand):
|
162 |
+
score = 0.0
|
163 |
+
for c in cand:
|
164 |
+
score += self.compare_logits(t, c)
|
165 |
+
score /= len(cand)
|
166 |
+
return score
|
167 |
+
```
|
168 |
+
|
169 |
+
```python
|
170 |
+
text_ex_1 = "The Duke then focused on securing his power and looking to future threats. The Duke eventually turned his attention to acquiring Tuscany but struggled."
|
171 |
+
text_ex_2 = "Lord Bacon mentioned his book \"The History of Henry VII,\" in the conversation noting that King Charles had conquered Naples without resistance, implying that the conquest was like a dream."
|
172 |
+
|
173 |
+
ranker = Ranker()
|
174 |
+
output = ranker.compare(text_ex_1, text_ex_2) # it is equvilant to (text_ex_2, text_ex_1)
|
175 |
+
print(f"Output Binary = {output}")
|
176 |
+
if output:
|
177 |
+
print("The second text is more concrete.")
|
178 |
+
else:
|
179 |
+
print("The first text is more concrete.")
|
180 |
+
|
181 |
+
output_logits = ranker.compare_logits(text_ex_1, text_ex_2)
|
182 |
+
print(f"Output Logits = {output_logits:.4f}")
|
183 |
+
```
|
184 |
+
|
185 |
+
**For more details on the evaluator usage (e.g., pacing planning and control in generation) and training process, please refer to our [paper](https://arxiv.org/abs/2311.04459)!**
|