ZachW
/

pacing-judge

@@ -16,5 +16,170 @@ tags:
 ---
 # Pacing-Judge
 ## Overview
-This is the concreteness evaluator in the paper Improving Pacing in Long-Form Story Planning.

 ---
 # Pacing-Judge
+[\[project page\]](https://github.com/YichenZW/Pacing)
 ## Overview
+This is the **concreteness evaluator** developed in the paper [Improving Pacing in Long-Form Story Planning](https://arxiv.org/abs/2311.04459) (EMNLP 2023).
+## Quick Start
+A simple usage: Input a pair of texts (text_ex_1, text_ex_2) with \<sep\> as the separator to the model. The output is whether the first or the second is more concrete.
+```python
+import torch.nn.functional as F
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+model_name = "ZachW/pacing-judge"
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+text_ex_1 = "The Duke then focused on securing his power and looking to future threats. The Duke eventually turned his attention to acquiring Tuscany but struggled."
+text_ex_2 = "Lord Bacon mentioned his book \"The History of Henry VII,\" in the conversation noting that King Charles had conquered Naples without resistance, implying that the conquest was like a dream."
+inputs = tokenizer(text_ex_1 + " <sep> " + text_ex_2, return_tensors="pt")
+outputs = model(**inputs)
+output = int(F.softmax(outputs.logits, dim=1)[:, 0].squeeze(-1).detach().cpu().numpy() > 0.5)
+print(f"Output Binary = {output}")
+if output:
+    print("The second text is more concrete.")
+else:
+    print("The first text is more concrete.")
+```
+## Usage
+We have designed this Ranker, which enables fair pairwise comparison (independent of sequence order) and ranking among candidates. We **recommend** using our model via the Ranker.
+```python
+import torch.nn.functional as F
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+class Ranker:
+    def __init__(self):
+        print(f"*** Loading Model from Huggingface ***")
+        model_name = "ZachW/pacing-judge"
+        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+    def compare(self, t1, t2):
+        text_pair = [t1 + ' <sep> ' + t2, t2 + ' <sep> ' + t1]
+        pair_dataset = self.tokenizer(text_pair, padding=True, truncation=True, return_tensors="pt")
+        score = self.run_model(pair_dataset)
+        if score < 0.5:
+            return 0 # first is more concrete
+        else:
+            return 1 # second is more concrete
+    def compare_logits(self, t1, t2):
+        text_pair = [t1 + ' <sep> ' + t2, t2 + ' <sep> ' + t1]
+        pair_dataset = self.tokenizer(text_pair, padding=True, truncation=True, return_tensors="pt")
+        score = self.run_model(pair_dataset)
+        return score
+    def run_model(self, dataset):
+        outputs = self.model(**dataset)
+        scores = F.softmax(outputs.logits, dim=1)[:, 0].squeeze(-1).detach().cpu().numpy()
+        aver_score = (scores[0] + (1 - scores[1]))/2
+        return aver_score
+    def rank(self, texts_list): # input a list of texts
+        def quicksort(arr):
+            if len(arr) <= 1:
+                return arr
+            else:
+                pivot = arr[0]
+                less = []
+                greater = []
+                for t in arr[1:]:
+                    cmp = self.compare(pivot, t)
+                    if cmp == 0:
+                        less.append(t)
+                    elif cmp == 1:
+                        greater.append(t)
+                return quicksort(greater) + [pivot] + quicksort(less)
+        return quicksort(texts_list)
+        # most concrete -> lest concrete
+    def rank_idx(self, texts_list): # input a list of texts
+        def quicksort(arr):
+            if len(arr) <= 1:
+                return arr
+            else:
+                pivot = arr[0]
+                less = []
+                greater = []
+                for t in arr[1:]:
+                    cmp = self.compare(texts_list[pivot], texts_list[t])
+                    if cmp == 0:
+                        less.append(t)
+                    elif cmp == 1:
+                        greater.append(t)
+                return quicksort(greater) + [pivot] + quicksort(less)
+        return quicksort(list(range(len(texts_list))))
+    def rank_idx_conpletely(self, texts_list):
+        n = len(texts_list)
+        texts_idx = list(range(n))
+        scores = [[0] * n for _ in range(n)]
+        self_score = [0] * n
+        for i in texts_idx:
+            scores[i][i] = self.compare_logits(texts_list[i], texts_list[i])
+            self_score[i] = scores[i][i]
+            for j in texts_idx:
+                if j < i:
+                    scores[i][j] = 1 - scores[j][i]
+                    continue
+                if j == i:
+                    continue
+                scores[i][j] = self.compare_logits(texts_list[i], texts_list[j])
+        # average score is, smaller is more concrete
+        average_score = [ sum(s)/len(s) for s in scores]
+        output_score = [ a + 0.5 - s for a, s in zip(average_score, self_score)]
+        sorted_indices = sorted(range(len(output_score)), key=lambda x: output_score[x])
+        return sorted_indices
+    def rank_idx_conpletely_wlogits(self, texts_list, logger=None):
+        n = len(texts_list)
+        texts_idx = list(range(n))
+        scores = [[0] * n for _ in range(n)]
+        self_score = [0] * n
+        for i in texts_idx:
+            scores[i][i] = self.compare_logits(texts_list[i], texts_list[i])
+            self_score[i] = scores[i][i]
+            for j in texts_idx:
+                if j < i:
+                    scores[i][j] = 1 - scores[j][i]
+                    continue
+                if j == i:
+                    continue
+                scores[i][j] = self.compare_logits(texts_list[i], texts_list[j])
+        # average score is, smaller is more concrete
+        average_score = [ sum(s)/len(s) for s in scores]
+        output_score = [ a + 0.5 - s for a, s in zip(average_score, self_score)]
+        sorted_indices = sorted(range(len(output_score)), key=lambda x: output_score[x])
+        return sorted_indices, output_score
+    def compare_w_neighbors(self, t, cand):
+        score = 0.0
+        for c in cand:
+            score += self.compare_logits(t, c)
+        score /= len(cand)
+        return score
+```
+```python
+text_ex_1 = "The Duke then focused on securing his power and looking to future threats. The Duke eventually turned his attention to acquiring Tuscany but struggled."
+text_ex_2 = "Lord Bacon mentioned his book \"The History of Henry VII,\" in the conversation noting that King Charles had conquered Naples without resistance, implying that the conquest was like a dream."
+ranker = Ranker()
+output = ranker.compare(text_ex_1, text_ex_2) # it is equvilant to (text_ex_2, text_ex_1)
+print(f"Output Binary = {output}")
+if output:
+    print("The second text is more concrete.")
+else:
+    print("The first text is more concrete.")
+output_logits = ranker.compare_logits(text_ex_1, text_ex_2)
+print(f"Output Logits = {output_logits:.4f}")
+```
+**For more details on the evaluator usage (e.g., pacing planning and control in generation) and training process, please refer to our [paper](https://arxiv.org/abs/2311.04459)!**