Aidan Phillips commited on
Commit
b837a10
·
1 Parent(s): 0de83a5
Files changed (3) hide show
  1. categories/fluency.py +203 -0
  2. requirements.txt +3 -0
  3. scorer.ipynb +110 -0
categories/fluency.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import language_tool_python
2
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
3
+ import torch
4
+ import numpy as np
5
+ import spacy
6
+
7
+ tool = language_tool_python.LanguageTool('en-US')
8
+ model_name="distilbert-base-multilingual-cased"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModelForMaskedLM.from_pretrained(model_name)
11
+ model.eval()
12
+
13
+ nlp = spacy.load("en_core_web_sm")
14
+
15
+ def pseudo_perplexity(text, max_len=128):
16
+ """
17
+ We want to return
18
+ {
19
+ "score": normalized value from 0 to 100,
20
+ "errors": [
21
+ {
22
+ "start": word index,
23
+ "end": word index,
24
+ "message": "error message"
25
+ }
26
+ ]
27
+ }
28
+ """
29
+ input_ids = tokenizer.encode(text, return_tensors="pt")[0]
30
+
31
+ if len(input_ids) > max_len:
32
+ raise ValueError(f"Input too long for model (>{max_len} tokens).")
33
+
34
+ loss_values = []
35
+
36
+ for i in range(1, len(input_ids) - 1): # skip [CLS] and [SEP]
37
+ masked_input = input_ids.clone()
38
+ masked_input[i] = tokenizer.mask_token_id
39
+
40
+ with torch.no_grad():
41
+ outputs = model(masked_input.unsqueeze(0))
42
+ logits = outputs.logits[0, i]
43
+ probs = torch.softmax(logits, dim=-1)
44
+
45
+ true_token_id = input_ids[i].item()
46
+ prob_true_token = probs[true_token_id].item()
47
+ log_prob = np.log(prob_true_token + 1e-12)
48
+ loss_values.append(-log_prob)
49
+
50
+ # get longest sequence of tokens with perplexity over some threshold
51
+ threshold = 12 # Define a perplexity threshold
52
+ longest_start, longest_end = 0, 0
53
+ current_start, current_end = 0, 0
54
+ max_length = 0
55
+ curr_loss = 0
56
+
57
+ for i, loss in enumerate(loss_values):
58
+ if loss > threshold:
59
+ if current_start == current_end: # Start a new sequence
60
+ current_start = i
61
+ current_end = i + 1
62
+ curr_loss = loss
63
+ else:
64
+ if current_end - current_start > max_length:
65
+ longest_start, longest_end = current_start, current_end
66
+ max_length = current_end - current_start
67
+ current_start, current_end = 0, 0
68
+
69
+ if current_end - current_start > max_length: # Check the last sequence
70
+ longest_start, longest_end = current_start, current_end
71
+
72
+ longest_sequence = (longest_start, longest_end)
73
+
74
+ ppl = np.exp(np.mean(loss_values))
75
+
76
+ res = {
77
+ "score": __fluency_score_from_ppl(ppl),
78
+ "errors": [
79
+ {
80
+ "start": longest_sequence[0],
81
+ "end": longest_sequence[1],
82
+ "message": f"Perplexity above threshold: {curr_loss}"
83
+ }
84
+ ]
85
+ }
86
+
87
+ return res
88
+
89
+ def __fluency_score_from_ppl(ppl, midpoint=20, steepness=0.3):
90
+ """
91
+ Use a logistic function to map perplexity to 0–100.
92
+ Midpoint is the PPL where score is 50.
93
+ Steepness controls curve sharpness.
94
+ """
95
+ score = 100 / (1 + np.exp(steepness * (ppl - midpoint)))
96
+ return round(score, 2)
97
+
98
+ def grammar_errors(text) -> tuple[int, list[str]]:
99
+ """
100
+
101
+ Returns
102
+ int: number of grammar errors
103
+ list: grammar errors
104
+ tuple: (start, end, error message)
105
+ """
106
+
107
+ matches = tool.check(text)
108
+ grammar_score = len(matches)/len(text.split())
109
+
110
+ r = []
111
+ for match in matches:
112
+ words = text.split()
113
+ char_to_word = []
114
+ current_char = 0
115
+
116
+ for i, word in enumerate(words):
117
+ for _ in range(len(word)):
118
+ char_to_word.append(i)
119
+ current_char += len(word)
120
+ if current_char < len(text): # Account for spaces between words
121
+ char_to_word.append(i)
122
+ current_char += 1
123
+
124
+ start = char_to_word[match.offset]
125
+ end = char_to_word[match.offset + match.errorLength - 1] + 1
126
+ r.append({"start": start, "end": end, "message": match.message})
127
+
128
+ struct_err = __check_structural_grammar(text)
129
+ r.extend(struct_err)
130
+
131
+ res = {
132
+ "score": __grammar_score_from_prob(grammar_score),
133
+ "errors": r
134
+ }
135
+
136
+ return res
137
+
138
+ def __grammar_score_from_prob(error_ratio, steepness=10):
139
+ """
140
+ Transform the number of errors divided by words into a score from 0 to 100.
141
+ Steepness controls how quickly the score drops as errors increase.
142
+ """
143
+ score = 100 / (1 + np.exp(steepness * error_ratio))
144
+ return round(score, 2)
145
+
146
+
147
+ def __check_structural_grammar(text):
148
+ doc = nlp(text)
149
+ issues = []
150
+
151
+ # 1. Missing main verb (ROOT)
152
+ root_verbs = [tok for tok in doc if tok.dep_ == "ROOT" and tok.pos_ in {"VERB", "AUX"}]
153
+ if not root_verbs:
154
+ root_root = [tok for tok in doc if tok.dep_ == "ROOT"]
155
+ token = root_root[0] if root_root else doc[0]
156
+ issues.append({
157
+ "start": token.i,
158
+ "end": token.i + 1,
159
+ "message": "Sentence is missing a main verb (no ROOT verb)."
160
+ })
161
+
162
+ # 2. Verb(s) present but no subject
163
+ verbs = [tok for tok in doc if tok.pos_ in {"VERB", "AUX"}]
164
+ subjects = [tok for tok in doc if tok.dep_ in {"nsubj", "nsubjpass"}]
165
+ if verbs and not subjects:
166
+ for verb in verbs:
167
+ issues.append({
168
+ "start": verb.i,
169
+ "end": verb.i + 1,
170
+ "message": "Sentence has verb(s) but no subject (possible fragment)."
171
+ })
172
+
173
+ # 3. Dangling prepositions
174
+ for tok in doc:
175
+ if tok.pos_ == "ADP" and len(list(tok.children)) == 0:
176
+ issues.append({
177
+ "start": tok.i,
178
+ "end": tok.i + 1,
179
+ "message": f"Dangling preposition '{tok.text}' (no object or complement)."
180
+ })
181
+
182
+ # 4. Noun pile-up (no verbs, all tokens are nominal)
183
+ if not any(tok.pos_ in {"VERB", "AUX"} for tok in doc) and \
184
+ all(tok.pos_ in {"NOUN", "PROPN", "ADJ", "DET", "NUM"} for tok in doc if tok.is_alpha):
185
+ token = doc[0]
186
+ issues.append({
187
+ "start": token.i,
188
+ "end": token.i + 1,
189
+ "message": "Sentence lacks a verb or any verbal structure (nominal phrase pile-up)."
190
+ })
191
+
192
+ # 5. Multiple ROOTs (possible run-on)
193
+ root_count = sum(1 for tok in doc if tok.dep_ == "ROOT")
194
+ if root_count > 1:
195
+ for tok in doc:
196
+ if tok.dep_ == "ROOT":
197
+ issues.append({
198
+ "start": tok.i,
199
+ "end": tok.i + 1,
200
+ "message": "Sentence has multiple ROOTs — possible run-on sentence."
201
+ })
202
+
203
+ return issues
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ language_tool_python
2
+ transformers
3
+ torch
scorer.ipynb ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/opt/anaconda3/envs/teach-bs/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "from categories.fluency import *"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 2,
24
+ "metadata": {},
25
+ "outputs": [
26
+ {
27
+ "name": "stdout",
28
+ "output_type": "stream",
29
+ "text": [
30
+ "Sentence: The car hit the cone.\n"
31
+ ]
32
+ }
33
+ ],
34
+ "source": [
35
+ "s = input(\"Enter a sentence: \") # Prompt the user to enter a sentence\n",
36
+ "\n",
37
+ "if s == \"\":\n",
38
+ " s = \"The cat sat the quickly up apples banana.\"\n",
39
+ "\n",
40
+ "print(\"Sentence:\", s) # Print the input sentence\n",
41
+ "\n",
42
+ "err = grammar_errors(s) # Call the function to execute the grammar error checking\n",
43
+ "flu = pseudo_perplexity(s) # Call the function to execute the fluency checking"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 3,
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "name": "stdout",
53
+ "output_type": "stream",
54
+ "text": [
55
+ "Perplexity above threshold: 0: The\n",
56
+ "[{'start': 0, 'end': 0, 'message': 'Perplexity above threshold: 0'}]\n"
57
+ ]
58
+ }
59
+ ],
60
+ "source": [
61
+ "combined_err = err[\"errors\"] + flu[\"errors\"] # Combine the error counts from both functions\n",
62
+ "\n",
63
+ "for e in combined_err:\n",
64
+ " substr = \" \".join(s.split(\" \")[e[\"start\"]:e[\"end\"]+1])\n",
65
+ " print(f\"{e['message']}: {substr}\") # Print the error messages\n",
66
+ "\n",
67
+ "print(combined_err)\n"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": 4,
73
+ "metadata": {},
74
+ "outputs": [
75
+ {
76
+ "name": "stdout",
77
+ "output_type": "stream",
78
+ "text": [
79
+ "Fluency Score: 30.0\n"
80
+ ]
81
+ }
82
+ ],
83
+ "source": [
84
+ "fluency_score = 0.6 * err[\"score\"] + 0.4 * flu[\"score\"] # Calculate the fluency score\n",
85
+ "print(\"Fluency Score:\", fluency_score) # Print the fluency score"
86
+ ]
87
+ }
88
+ ],
89
+ "metadata": {
90
+ "kernelspec": {
91
+ "display_name": "teach-bs",
92
+ "language": "python",
93
+ "name": "python3"
94
+ },
95
+ "language_info": {
96
+ "codemirror_mode": {
97
+ "name": "ipython",
98
+ "version": 3
99
+ },
100
+ "file_extension": ".py",
101
+ "mimetype": "text/x-python",
102
+ "name": "python",
103
+ "nbconvert_exporter": "python",
104
+ "pygments_lexer": "ipython3",
105
+ "version": "3.11.11"
106
+ }
107
+ },
108
+ "nbformat": 4,
109
+ "nbformat_minor": 2
110
+ }