Nathan Butters commited on
Commit
0d13932
1 Parent(s): e9530fc

add spaCy lg

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +4 -0
  3. .ipynb_checkpoints/NLselector-checkpoint.py +197 -0
  4. .ipynb_checkpoints/WNgen-checkpoint.py +313 -0
  5. .ipynb_checkpoints/app-checkpoint.py +340 -0
  6. Assets/.DS_Store +0 -0
  7. {NER-tweaks → Assets/Models}/.DS_Store +0 -0
  8. Assets/Models/en_core_web_lg/attribute_ruler/patterns +0 -0
  9. Assets/Models/en_core_web_lg/config.cfg +266 -0
  10. Assets/Models/en_core_web_lg/lemmatizer/lookups/lookups.bin +3 -0
  11. Assets/Models/en_core_web_lg/meta.json +3 -0
  12. Assets/Models/en_core_web_lg/ner/cfg +13 -0
  13. Assets/Models/en_core_web_lg/ner/model +3 -0
  14. Assets/Models/en_core_web_lg/ner/moves +1 -0
  15. Assets/Models/en_core_web_lg/parser/cfg +13 -0
  16. Assets/Models/en_core_web_lg/parser/model +3 -0
  17. Assets/Models/en_core_web_lg/parser/moves +2 -0
  18. Assets/Models/en_core_web_lg/senter/cfg +3 -0
  19. Assets/Models/en_core_web_lg/senter/model +3 -0
  20. Assets/Models/en_core_web_lg/tagger/cfg +55 -0
  21. Assets/Models/en_core_web_lg/tagger/model +3 -0
  22. Assets/Models/en_core_web_lg/tok2vec/cfg +3 -0
  23. Assets/Models/en_core_web_lg/tok2vec/model +3 -0
  24. Assets/Models/en_core_web_lg/tokenizer +3 -0
  25. Assets/Models/en_core_web_lg/vocab/key2row +3 -0
  26. Assets/Models/en_core_web_lg/vocab/lookups.bin +3 -0
  27. Assets/Models/en_core_web_lg/vocab/strings.json +3 -0
  28. Assets/Models/en_core_web_lg/vocab/vectors +3 -0
  29. Assets/Models/en_core_web_lg/vocab/vectors.cfg +3 -0
  30. Lime Explorations.ipynb +0 -0
  31. NER-tweaks/.ipynb_checkpoints/age-bias-checkpoint.jsonl +0 -32
  32. NER-tweaks/.ipynb_checkpoints/entity-ruler-input-checkpoint.jsonl +0 -44
  33. NER-tweaks/.ipynb_checkpoints/gender-test-checkpoint.jsonl +0 -59
  34. NER-tweaks/.ipynb_checkpoints/main-ruler-bias-checkpoint.jsonl +0 -862
  35. NER-tweaks/age-bias.jsonl +0 -32
  36. NER-tweaks/entity-ruler-input.jsonl +0 -44
  37. NER-tweaks/gender-test.jsonl +0 -59
  38. NER-tweaks/main-ruler-bias.jsonl +0 -862
  39. NLselector.py +1 -1
  40. Pipfile +0 -40
  41. Pipfile.lock +0 -0
  42. README OG.md +0 -34
  43. README.md +20 -1
  44. VizNLC-duct-tape-pipeline.ipynb +0 -934
  45. VizNLC-gen-pipeline.ipynb +0 -1175
  46. WNgen.py +3 -3
  47. app.py +1 -1
  48. cf-gen-pipeline.ipynb +0 -0
  49. custom-named-entity-recognition.ipynb +0 -0
  50. dynamic-word-list-generation.ipynb +0 -1287
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.gitattributes CHANGED
@@ -25,3 +25,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ vectors filter=lfs diff=lfs merge=lfs -text
29
+ model filter=lfs diff=lfs merge=lfs -text
30
+ *.json filter=lfs diff=lfs merge=lfs -text
31
+ key2row filter=lfs diff=lfs merge=lfs -text
.ipynb_checkpoints/NLselector-checkpoint.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Import the libraries we know we'll need for the Generator.
2
+ import pandas as pd, spacy, nltk, numpy as np, re
3
+ from spacy.matcher import Matcher
4
+ #!python -m spacy download en_core_web_md #Not sure if we need this so I'm going to keep it just in case
5
+ nlp = spacy.load("Assets/Models/en_core_web_lg")
6
+ import altair as alt
7
+ import streamlit as st
8
+ from annotated_text import annotated_text as ant
9
+
10
+ #Import the libraries to support the model and predictions.
11
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
12
+ import lime
13
+ import torch
14
+ import torch.nn.functional as F
15
+ from lime.lime_text import LimeTextExplainer
16
+
17
+ class_names = ['negative', 'positive']
18
+ explainer = LimeTextExplainer(class_names=class_names)
19
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
20
+ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
21
+ pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
22
+
23
+ def predictor(texts):
24
+ outputs = model(**tokenizer(texts, return_tensors="pt", padding=True))
25
+ probas = F.softmax(outputs.logits, dim=1).detach().numpy()
26
+ return probas
27
+
28
+ @st.experimental_singleton
29
+ def critical_words(document, options=False):
30
+ if type(document) is not spacy.tokens.doc.Doc:
31
+ document = nlp(document)
32
+ chunks = list(document.noun_chunks)
33
+ pos_options = []
34
+ lime_options = []
35
+
36
+ #Identify what the model cares about.
37
+ if options:
38
+ #Run Lime Setup code
39
+ exp = explainer.explain_instance(document.text, predictor, num_features=15, num_samples=2000)
40
+ lime_results = exp.as_list()
41
+ for feature in lime_results:
42
+ lime_options.append(feature[0])
43
+ lime_results = pd.DataFrame(lime_results, columns=["Word","Weight"])
44
+
45
+ #Identify what we care about "parts of speech"
46
+ for chunk in chunks:
47
+ #The use of chunk[-1] is due to testing that it appears to always match the root
48
+ root = chunk[-1]
49
+ #This currently matches to a list I've created. I don't know the best way to deal with this so I'm leaving it as is for the moment.
50
+ if root.ent_type_:
51
+ cur_values = []
52
+ if (len(chunk) > 1) and (chunk[-2].dep_ == "compound"):
53
+ #creates the compound element of the noun
54
+ compound = [x.text for x in chunk if x.dep_ == "compound"]
55
+ print(f"This is the contents of {compound} and it is {all(elem in lime_options for elem in compound)} that all elements are present in {lime_options}.") #for QA
56
+ #checks to see all elements in the compound are important to the model or use the compound if not checking importance.
57
+ if (all(elem in lime_options for elem in cur_values) and (options is True)) or ((options is False)):
58
+ #creates a span for the entirety of the compound noun and adds it to the list.
59
+ span = -1 * (1 + len(compound))
60
+ pos_options.append(chunk[span:].text)
61
+ cur_values + [token.text for token in chunk if token.pos_ == "ADJ"]
62
+ else:
63
+ print(f"The elmenents in {compound} could not be added to the final list because they are not all relevant to the model.")
64
+ else:
65
+ cur_values = [token.text for token in chunk if (token.ent_type_) or (token.pos_ == "ADJ")]
66
+ if (all(elem in lime_options for elem in cur_values) and (options is True)) or ((options is False)):
67
+ pos_options.extend(cur_values)
68
+ print(f"From {chunk.text}, {cur_values} added to pos_options due to entity recognition.") #for QA
69
+ elif len(chunk) >= 1:
70
+ cur_values = [token.text for token in chunk if token.pos_ in ["NOUN","ADJ"]]
71
+ if (all(elem in lime_options for elem in cur_values) and (options is True)) or ((options is False)):
72
+ pos_options.extend(cur_values)
73
+ print(f"From {chunk.text}, {cur_values} added to pos_options due to wildcard.") #for QA
74
+ else:
75
+ print(f"No options added for \'{chunk.text}\' ")
76
+ # Here I am going to try to pick up pronouns, which are people, and Adjectival Compliments.
77
+ for token in document:
78
+ if (token.text not in pos_options) and ((token.text in lime_options) or (options == False)):
79
+ #print(f"executed {token.text} with {token.pos_} and {token.dep_}") #QA
80
+ if (token.pos_ == "ADJ") and (token.dep_ in ["acomp","conj"]):
81
+ pos_options.append(token.text)
82
+ elif (token.pos_ == "PRON") and (len(token.morph) !=0):
83
+ if (token.morph.get("PronType") == "Prs"):
84
+ pos_options.append(token.text)
85
+
86
+ if options:
87
+ return pos_options, lime_results
88
+ else:
89
+ return pos_options
90
+
91
+ # Return the Viz of elements critical to LIME.
92
+ def lime_viz(df):
93
+ if not isinstance(df, pd.DataFrame):
94
+ df = pd.DataFrame(df, columns=["Word","Weight"])
95
+ single_nearest = alt.selection_single(on='mouseover', nearest=True)
96
+ viz = alt.Chart(df).encode(
97
+ alt.X('Weight:Q', scale=alt.Scale(domain=(-1, 1))),
98
+ alt.Y('Word:N', sort='x', axis=None),
99
+ color=alt.Color("Weight", scale=alt.Scale(scheme='blueorange', domain=[0], type="threshold", range='diverging'), legend=None),
100
+ tooltip = ("Word","Weight")
101
+ ).mark_bar().properties(title ="Importance of individual words")
102
+
103
+ text = viz.mark_text(
104
+ fill="black",
105
+ align='right',
106
+ baseline='middle'
107
+ ).encode(
108
+ text='Word:N'
109
+ )
110
+ limeplot = alt.LayerChart(layer=[viz,text], width = 300).configure_axis(grid=False).configure_view(strokeWidth=0)
111
+ return limeplot
112
+
113
+ # Evaluate Predictions using the model and pipe.
114
+ def eval_pred(text, return_all = False):
115
+ '''A basic function for evaluating the prediction from the model and turning it into a visualization friendly number.'''
116
+ preds = pipe(text)
117
+ neg_score = -1 * preds[0][0]['score']
118
+ sent_neg = preds[0][0]['label']
119
+ pos_score = preds[0][1]['score']
120
+ sent_pos = preds[0][1]['label']
121
+ prediction = 0
122
+ sentiment = ''
123
+ if pos_score > abs(neg_score):
124
+ prediction = pos_score
125
+ sentiment = sent_pos
126
+ elif abs(neg_score) > pos_score:
127
+ prediction = neg_score
128
+ sentiment = sent_neg
129
+
130
+ if return_all:
131
+ return prediction, sentiment
132
+ else:
133
+ return prediction
134
+
135
+ def construct_nlexp(text,sentiment,probability):
136
+ prob = str(np.round(100 * abs(probability),2))
137
+ if sentiment == "NEGATIVE":
138
+ color_sent = ant('The model predicts the sentiment of the sentence you provided is ', (sentiment, "-", "#FFA44F"), ' with a probability of ', (prob, "neg", "#FFA44F"),"%.")
139
+ elif sentiment == "POSITIVE":
140
+ color_sent = ant('The model predicts the sentiment of the sentence you provided is ', (sentiment, "+", "#50A9FF"), ' with a probability of ', (prob, "pos", "#50A9FF"),"%.")
141
+ return color_sent
142
+
143
+ def get_min_max(df, seed):
144
+ '''This function provides the alternatives with the highest spaCy similarity scores and the lowest similarity scores. As similarity is based on vectorization of words and documents this may not be the best way to identify bias.
145
+
146
+ text2 = Most Similar
147
+ text3 = Least Similar'''
148
+ maximum = df[df['similarity'] < .9999].similarity.max()
149
+ text2 = df.loc[df['similarity'] == maximum, 'text'].iloc[0]
150
+ minimum = df[df['similarity'] > .0001].similarity.min()
151
+ text3 = df.loc[df['similarity'] == minimum, 'text'].iloc[0]
152
+ return text2, text3
153
+
154
+ # Inspired by https://stackoverflow.com/questions/17758023/return-rows-in-a-dataframe-closest-to-a-user-defined-number/17758115#17758115
155
+ def abs_dif(df,seed):
156
+ '''This function enables a user to identify the alternative that is closest to the seed and farthest from the seed should that be the what they wish to display.
157
+
158
+ text2 = Nearest Prediction
159
+ text3 = Farthest Prediction'''
160
+ target = df[df['Words'] == seed].pred.iloc[0]
161
+ sub_df = df[df['Words'] != seed].reset_index()
162
+ nearest_prediction = sub_df.pred[(sub_df.pred-target).abs().argsort()[:1]]
163
+ farthest_prediction = sub_df.pred[(sub_df.pred-target).abs().argsort()[-1:]]
164
+ text2 = sub_df.text.iloc[nearest_prediction.index[0]]
165
+ text3 = sub_df.text.iloc[farthest_prediction.index[0]]
166
+ return text2, text3
167
+
168
+ #@st.experimental_singleton #I've enabled this to prevent it from triggering every time the code runs... which could get very messy
169
+ def sampled_alts(df, seed, fixed=False):
170
+ '''This function enables a user to select an alternate way of choosing which counterfactuals are shown for MultiNLC, MultiNLC + Lime, and VizNLC. If you use this then you are enabling random sampling over other options (ex. spaCy similarity scores, or absolute difference).
171
+
172
+ Both samples are random.'''
173
+ sub_df = df[df['Words'] != seed]
174
+ if fixed:
175
+ sample = sub_df.sample(n=2, random_state = 2052)
176
+ else:
177
+ sample = sub_df.sample(n=2)
178
+ text2 = sample.text.iloc[0]
179
+ text3 = sample.text.iloc[1]
180
+ return text2, text3
181
+
182
+ def gen_cf_country(df,_document,selection):
183
+ df['text'] = df.Words.apply(lambda x: re.sub(r'\b'+selection+r'\b',x,_document.text))
184
+ df['pred'] = df.text.apply(eval_pred)
185
+ df['seed'] = df.Words.apply(lambda x: 'seed' if x == selection else 'alternative')
186
+ df['similarity'] = df.Words.apply(lambda x: nlp(selection).similarity(nlp(x)))
187
+ return df
188
+
189
+
190
+ def gen_cf_profession(df,_document,selection):
191
+ category = df.loc[df['Words'] == selection, 'Major'].iloc[0]
192
+ df = df[df.Major == category]
193
+ df['text'] = df.Words.apply(lambda x: re.sub(r'\b'+selection+r'\b',x,_document.text))
194
+ df['pred'] = df.text.apply(eval_pred)
195
+ df['seed'] = df.Words.apply(lambda x: 'seed' if x == selection else 'alternative')
196
+ df['similarity'] = df.Words.apply(lambda x: nlp(selection).similarity(nlp(x)))
197
+ return df
.ipynb_checkpoints/WNgen-checkpoint.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Import necessary libraries.
2
+ import re, nltk, pandas as pd, numpy as np, ssl, streamlit as st
3
+ from nltk.corpus import wordnet
4
+ import spacy
5
+ nlp = spacy.load("Assets/Models/en_core_web_lg")
6
+
7
+ #Import necessary parts for predicting things.
8
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
9
+ import torch
10
+ import torch.nn.functional as F
11
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
12
+ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
13
+ pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
14
+
15
+ #If an error is thrown that the corpus "omw-1.4" isn't discoverable you can use this code. (https://stackoverflow.com/questions/38916452/nltk-download-ssl-certificate-verify-failed)
16
+ try:
17
+ _create_unverified_https_context = ssl._create_unverified_context
18
+ except AttributeError:
19
+ pass
20
+ else:
21
+ ssl._create_default_https_context = _create_unverified_https_context
22
+
23
+ nltk.download('omw-1.4')
24
+
25
+ # A simple function to pull synonyms and antonyms using spacy's POS
26
+ def syn_ant(word,POS=False,human=True):
27
+ pos_options = ['NOUN','VERB','ADJ','ADV']
28
+ synonyms = []
29
+ antonyms = []
30
+ #WordNet hates spaces so you have to remove them
31
+ if " " in word:
32
+ word = word.replace(" ", "_")
33
+
34
+ if POS in pos_options:
35
+ for syn in wordnet.synsets(word, pos=getattr(wordnet, POS)):
36
+ for l in syn.lemmas():
37
+ current = l.name()
38
+ if human:
39
+ current = re.sub("_"," ",current)
40
+ synonyms.append(current)
41
+ if l.antonyms():
42
+ for ant in l.antonyms():
43
+ cur_ant = ant.name()
44
+ if human:
45
+ cur_ant = re.sub("_"," ",cur_ant)
46
+ antonyms.append(cur_ant)
47
+ else:
48
+ for syn in wordnet.synsets(word):
49
+ for l in syn.lemmas():
50
+ current = l.name()
51
+ if human:
52
+ current = re.sub("_"," ",current)
53
+ synonyms.append(current)
54
+ if l.antonyms():
55
+ for ant in l.antonyms():
56
+ cur_ant = ant.name()
57
+ if human:
58
+ cur_ant = re.sub("_"," ",cur_ant)
59
+ antonyms.append(cur_ant)
60
+ synonyms = list(set(synonyms))
61
+ antonyms = list(set(antonyms))
62
+ return synonyms, antonyms
63
+
64
+ def process_text(text):
65
+ doc = nlp(text.lower())
66
+ result = []
67
+ for token in doc:
68
+ if (token.is_stop) or (token.is_punct) or (token.lemma_ == '-PRON-'):
69
+ continue
70
+ result.append(token.lemma_)
71
+ return " ".join(result)
72
+
73
+ def clean_definition(syn):
74
+ #This function removes stop words from sentences to improve on document level similarity for differentiation.
75
+ if type(syn) is str:
76
+ synset = wordnet.synset(syn).definition()
77
+ elif type(syn) is nltk.corpus.reader.wordnet.Synset:
78
+ synset = syn.definition()
79
+ definition = nlp(process_text(synset))
80
+ return definition
81
+
82
+ def check_sim(a,b):
83
+ if type(a) is str and type(b) is str:
84
+ a = nlp(a)
85
+ b = nlp(b)
86
+ similarity = a.similarity(b)
87
+ return similarity
88
+
89
+ # Builds a dataframe dynamically from WordNet using NLTK.
90
+ def wordnet_df(word,POS=False,seed_definition=None):
91
+ pos_options = ['NOUN','VERB','ADJ','ADV']
92
+ synonyms, antonyms = syn_ant(word,POS,False)
93
+ #print(synonyms, antonyms) #for QA purposes
94
+ words = []
95
+ cats = []
96
+ #WordNet hates spaces so you have to remove them
97
+ m_word = word.replace(" ", "_")
98
+
99
+ #Allow the user to pick a seed definition if it is not provided directly to the function. Currently not working so it's commented out.
100
+ '''#commented out the way it was designed to allow for me to do it through Streamlit (keeping it for posterity, and for anyone who wants to use it without streamlit.)
101
+ for d in range(len(seed_definitions)):
102
+ print(f"{d}: {seed_definitions[d]}")
103
+ #choice = int(input("Which of the definitions above most aligns to your selection?"))
104
+ seed_definition = seed_definitions[choice]'''
105
+ try:
106
+ definition = seed_definition
107
+ except:
108
+ st.write("You did not supply a definition.")
109
+
110
+ if POS in pos_options:
111
+ for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS)):
112
+ if check_sim(process_text(seed_definition),process_text(syn.definition())) > .7:
113
+ cur_lemmas = syn.lemmas()
114
+ hypos = syn.hyponyms()
115
+ for hypo in hypos:
116
+ cur_lemmas.extend(hypo.lemmas())
117
+ for lemma in cur_lemmas:
118
+ ll = lemma.name()
119
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
120
+ words.append(re.sub("_"," ",ll))
121
+
122
+ if len(synonyms) > 0:
123
+ for w in synonyms:
124
+ w = w.replace(" ","_")
125
+ for syn in wordnet.synsets(w, pos=getattr(wordnet, POS)):
126
+ if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:
127
+ cur_lemmas = syn.lemmas()
128
+ hypos = syn.hyponyms()
129
+ for hypo in hypos:
130
+ cur_lemmas.extend(hypo.lemmas())
131
+ for lemma in cur_lemmas:
132
+ ll = lemma.name()
133
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
134
+ words.append(re.sub("_"," ",ll))
135
+ if len(antonyms) > 0:
136
+ for a in antonyms:
137
+ a = a.replace(" ","_")
138
+ for syn in wordnet.synsets(a, pos=getattr(wordnet, POS)):
139
+ if check_sim(process_text(seed_definition),process_text(syn.definition())) > .26:
140
+ cur_lemmas = syn.lemmas()
141
+ hypos = syn.hyponyms()
142
+ for hypo in hypos:
143
+ cur_lemmas.extend(hypo.lemmas())
144
+ for lemma in cur_lemmas:
145
+ ll = lemma.name()
146
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
147
+ words.append(re.sub("_"," ",ll))
148
+ else:
149
+ for syn in wordnet.synsets(m_word):
150
+ if check_sim(process_text(seed_definition),process_text(syn.definition())) > .7:
151
+ cur_lemmas = syn.lemmas()
152
+ hypos = syn.hyponyms()
153
+ for hypo in hypos:
154
+ cur_lemmas.extend(hypo.lemmas())
155
+ for lemma in cur_lemmas:
156
+ ll = lemma.name()
157
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
158
+ words.append(re.sub("_"," ",ll))
159
+ if len(synonyms) > 0:
160
+ for w in synonyms:
161
+ w = w.replace(" ","_")
162
+ for syn in wordnet.synsets(w):
163
+ if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:
164
+ cur_lemmas = syn.lemmas()
165
+ hypos = syn.hyponyms()
166
+ for hypo in hypos:
167
+ cur_lemmas.extend(hypo.lemmas())
168
+ for lemma in cur_lemmas:
169
+ ll = lemma.name()
170
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
171
+ words.append(re.sub("_"," ",ll))
172
+ if len(antonyms) > 0:
173
+ for a in antonyms:
174
+ a = a.replace(" ","_")
175
+ for syn in wordnet.synsets(a):
176
+ if check_sim(process_text(seed_definition),process_text(syn.definition())) > .26:
177
+ cur_lemmas = syn.lemmas()
178
+ hypos = syn.hyponyms()
179
+ for hypo in hypos:
180
+ cur_lemmas.extend(hypo.lemmas())
181
+ for lemma in cur_lemmas:
182
+ ll = lemma.name()
183
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
184
+ words.append(re.sub("_"," ",ll))
185
+
186
+ df = {"Categories":cats, "Words":words}
187
+ df = pd.DataFrame(df)
188
+ df = df.drop_duplicates().reset_index()
189
+ df = df.drop("index", axis=1)
190
+ return df
191
+
192
+ def eval_pred_test(text, return_all = False):
193
+ '''A basic function for evaluating the prediction from the model and turning it into a visualization friendly number.'''
194
+ preds = pipe(text)
195
+ neg_score = -1 * preds[0][0]['score']
196
+ sent_neg = preds[0][0]['label']
197
+ pos_score = preds[0][1]['score']
198
+ sent_pos = preds[0][1]['label']
199
+ prediction = 0
200
+ sentiment = ''
201
+ if pos_score > abs(neg_score):
202
+ prediction = pos_score
203
+ sentiment = sent_pos
204
+ elif abs(neg_score) > pos_score:
205
+ prediction = neg_score
206
+ sentiment = sent_neg
207
+
208
+ if return_all:
209
+ return prediction, sentiment
210
+ else:
211
+ return prediction
212
+
213
+ def get_parallel(word, seed_definition, QA=False):
214
+ cleaned = nlp(process_text(seed_definition))
215
+ root_syns = wordnet.synsets(word)
216
+ hypers = []
217
+ new_hypos = []
218
+
219
+ for syn in root_syns:
220
+ hypers.extend(syn.hypernyms())
221
+
222
+ for syn in hypers:
223
+ new_hypos.extend(syn.hyponyms())
224
+
225
+ hypos = list(set([syn for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.75]))[:25]
226
+ # with st.sidebar:
227
+ # st.write(f"The number of hypos is {len(hypos)} during get Parallel at Similarity >= .75.") #QA
228
+
229
+ if len(hypos) <= 1:
230
+ hypos = root_syns
231
+ elif len(hypos) < 3:
232
+ hypos = list(set([syn for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.5]))[:25] # added a cap to each
233
+ elif len(hypos) < 10:
234
+ hypos = list(set([syn for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.66]))[:25]
235
+ elif len(hypos) >= 10:
236
+ hypos = list(set([syn for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.8]))[:25]
237
+ if QA:
238
+ print(hypers)
239
+ print(hypos)
240
+ return hypers, hypos
241
+ else:
242
+ return hypos
243
+
244
+ # Builds a dataframe dynamically from WordNet using NLTK.
245
+ def wordnet_parallel_df(word,seed_definition=None):
246
+ words = []
247
+ cats = []
248
+ #WordNet hates spaces so you have to remove them
249
+ m_word = word.replace(" ", "_")
250
+
251
+ # add synonyms and antonyms for diversity
252
+ synonyms, antonyms = syn_ant(word)
253
+ words.extend(synonyms)
254
+ cats.extend(["synonyms" for n in range(len(synonyms))])
255
+ words.extend(antonyms)
256
+ cats.extend(["antonyms" for n in range(len(antonyms))])
257
+
258
+ try:
259
+ hypos = get_parallel(m_word,seed_definition)
260
+ except:
261
+ st.write("You did not supply a definition.")
262
+ #Allow the user to pick a seed definition if it is not provided directly to the function.
263
+ '''if seed_definition is None:
264
+ if POS in pos_options:
265
+ seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS))]
266
+ else:
267
+ seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word)]
268
+ for d in range(len(seed_definitions)):
269
+ print(f"{d}: {seed_definitions[d]}")
270
+ choice = int(input("Which of the definitions above most aligns to your selection?"))
271
+ seed_definition = seed_definitions[choice]'''
272
+
273
+ #This is a QA section
274
+ # with st.sidebar:
275
+ # st.write(f"The number of hypos is {len(hypos)} during parallel df creation.") #QA
276
+
277
+ #Transforms hypos into lemmas
278
+ for syn in hypos:
279
+ cur_lemmas = syn.lemmas()
280
+ hypos = syn.hyponyms()
281
+ for hypo in hypos:
282
+ cur_lemmas.extend(hypo.lemmas())
283
+ for lemma in cur_lemmas:
284
+ ll = lemma.name()
285
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
286
+ words.append(re.sub("_"," ",ll))
287
+ # with st.sidebar:
288
+ # st.write(f'There are {len(words)} words in the dataframe at the beginning of df creation.') #QA
289
+
290
+ df = {"Categories":cats, "Words":words}
291
+ df = pd.DataFrame(df)
292
+ df = df.drop_duplicates("Words").reset_index()
293
+ df = df.drop("index", axis=1)
294
+ return df
295
+
296
+ #@st.experimental_singleton(suppress_st_warning=True)
297
+ def cf_from_wordnet_df(seed,text,seed_definition=False):
298
+ seed_token = nlp(seed)
299
+ seed_POS = seed_token[0].pos_
300
+ #print(seed_POS) QA
301
+ try:
302
+ df = wordnet_parallel_df(seed,seed_definition)
303
+ except:
304
+ st.write("You did not supply a definition.")
305
+
306
+ df["text"] = df.Words.apply(lambda x: re.sub(r'\b'+seed+r'\b',x,text))
307
+ df["similarity"] = df.Words.apply(lambda x: seed_token[0].similarity(nlp(x)[0]))
308
+ df = df[df["similarity"] > 0].reset_index()
309
+ df.drop("index", axis=1, inplace=True)
310
+ df["pred"] = df.text.apply(eval_pred_test)
311
+ # added this because I think it will make the end results better if we ensure the seed is in the data we generate counterfactuals from.
312
+ df['seed'] = df.Words.apply(lambda x: 'seed' if x.lower() == seed.lower() else 'alternative')
313
+ return df
.ipynb_checkpoints/app-checkpoint.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Import the libraries we know we'll need for the Generator.
2
+ import pandas as pd, spacy, nltk, numpy as np
3
+ from spacy.matcher import Matcher
4
+ nlp = spacy.load("Assets/Models/en_core_web_lg")
5
+
6
+ #Import the libraries to support the model and predictions.
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
8
+ import lime
9
+ import torch
10
+ import torch.nn.functional as F
11
+ from lime.lime_text import LimeTextExplainer
12
+
13
+ #Import the libraries for human interaction and visualization.
14
+ import altair as alt
15
+ import streamlit as st
16
+ from annotated_text import annotated_text as ant
17
+
18
+ #Import functions needed to build dataframes of keywords from WordNet
19
+ from WNgen import *
20
+ from NLselector import *
21
+
22
+ @st.experimental_singleton
23
+ def set_up_explainer():
24
+ class_names = ['negative', 'positive']
25
+ explainer = LimeTextExplainer(class_names=class_names)
26
+ return explainer
27
+
28
+ @st.experimental_singleton
29
+ def prepare_model():
30
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
31
+ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
32
+ pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
33
+ return tokenizer, model, pipe
34
+
35
+ @st.experimental_singleton
36
+ def prepare_lists():
37
+ countries = pd.read_csv("Assets/Countries/combined-countries.csv")
38
+ professions = pd.read_csv("Assets/Professions/soc-professions-2018.csv")
39
+ word_lists = [list(countries.Words),list(professions.Words)]
40
+ return countries, professions, word_lists
41
+
42
+ #Provide all the functions necessary to run the app
43
+ #get definitions for control flow in Streamlit
44
+ def get_def(word, POS=False):
45
+ pos_options = ['NOUN','VERB','ADJ','ADV']
46
+ m_word = word.replace(" ", "_")
47
+ if POS in pos_options:
48
+ seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS))]
49
+ else:
50
+ seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word)]
51
+ seed_definition = col1.selectbox("Which definition is most relevant?", seed_definitions, key= "WN_definition")
52
+ if col1.button("Choose Definition"):
53
+ col1.write("You've chosen a definition.")
54
+ st.session_state.definition = seed_definition
55
+ return seed_definition
56
+ else:
57
+ col1.write("Please choose a definition.")
58
+
59
+ ###Start coding the actual app###
60
+ st.set_page_config(layout="wide", page_title="Natural Language Counterfactuals (NLC)")
61
+ layouts = ['Natural Language Explanation', 'Lime Explanation', 'MultiNLC', 'MultiNLC + Lime', 'VizNLC']
62
+ alternatives = ['Similarity', 'Sampling (Random)', 'Sampling (Fixed)', 'Probability']
63
+ alt_choice = "Similarity"
64
+
65
+ #Content in the Sidebar.
66
+ st.sidebar.info('This is an interface for exploring how different interfaces for exploring natural language explanations (NLE) may appear to people. It is intended to allow individuals to provide feedback on specific versions, as well as to compare what one offers over others for the same inputs.')
67
+ layout = st.sidebar.selectbox("Select a layout to explore.", layouts)
68
+ alt_choice = st.sidebar.selectbox("Choose the way you want to display alternatives.", alternatives) #Commented out until we decide this is useful functionality.
69
+
70
+ #Set up the Main Area Layout
71
+ st.title('Natural Language Counterfactuals (NLC) Prototype')
72
+ st.subheader(f'Current Layout: {layout}')
73
+ text = st.text_input('Provide a sentence you want to evaluate.', placeholder = "I like you. I love you.", key="input")
74
+
75
+ #Prepare the model, data, and Lime. Set starting variables.
76
+ tokenizer, model, pipe = prepare_model()
77
+ countries, professions, word_lists = prepare_lists()
78
+ explainer = set_up_explainer()
79
+ text2 = ""
80
+ text3 = ""
81
+ cf_df = pd.DataFrame()
82
+ if 'definition' not in st.session_state:
83
+ st.session_state.definition = "<(^_')>"
84
+
85
+ #Outline the various user interfaces we have built.
86
+
87
+ col1, col2, col3 = st.columns(3)
88
+ if layout == 'Natural Language Explanation':
89
+ with col1:
90
+ if st.session_state.input != "":
91
+ st.caption("This is the sentence you provided.")
92
+ st.write(text)
93
+ probability, sentiment = eval_pred(text, return_all=True)
94
+ nat_lang_explanation = construct_nlexp(text,sentiment,probability)
95
+
96
+ if layout == 'Lime Explanation':
97
+ with col1:
98
+ #Use spaCy to make the sentence into a doc so we can do NLP.
99
+ doc = nlp(st.session_state.input)
100
+ #Evaluate the provided sentence for sentiment and probability.
101
+ if st.session_state.input != "":
102
+ st.caption("This is the sentence you provided.")
103
+ st.write(text)
104
+ probability, sentiment = eval_pred(text, return_all=True)
105
+ options, lime = critical_words(st.session_state.input,options=True)
106
+ nat_lang_explanation = construct_nlexp(text,sentiment,probability)
107
+ st.write(" ")
108
+ st.altair_chart(lime_viz(lime))
109
+
110
+ if layout == 'MultiNLC':
111
+ with col1:
112
+ #Use spaCy to make the sentence into a doc so we can do NLP.
113
+ doc = nlp(st.session_state.input)
114
+ #Evaluate the provided sentence for sentiment and probability.
115
+ if st.session_state.input != "":
116
+ st.caption("This is the sentence you provided.")
117
+ st.write(text)
118
+ probability, sentiment = eval_pred(text, return_all=True)
119
+ options, lime = critical_words(st.session_state.input,options=True)
120
+ nat_lang_explanation = construct_nlexp(text,sentiment,probability)
121
+
122
+ #Allow the user to pick an option to generate counterfactuals from.
123
+ option = st.radio('Which word would you like to use to generate alternatives?', options, key = "option")
124
+ if (any(option in sublist for sublist in word_lists)):
125
+ st.write(f'You selected {option}. It matches a list.')
126
+ elif option:
127
+ st.write(f'You selected {option}. It does not match a list.')
128
+ definition = get_def(option)
129
+ else:
130
+ st.write('Awaiting your selection.')
131
+
132
+ if st.button('Generate Alternatives'):
133
+ if option in list(countries.Words):
134
+ cf_df = gen_cf_country(countries, doc, option)
135
+ st.success('Alternatives created.')
136
+ elif option in list(professions.Words):
137
+ cf_df = gen_cf_profession(professions, doc, option)
138
+ st.success('Alternatives created.')
139
+ else:
140
+ with st.sidebar:
141
+ ant("Generating alternatives for",(option,"opt","#E0FBFB"), "with a definition of: ",(st.session_state.definition,"def","#E0FBFB"),".")
142
+ cf_df = cf_from_wordnet_df(option,text,seed_definition=st.session_state.definition)
143
+ st.success('Alternatives created.')
144
+
145
+ if len(cf_df) != 0:
146
+ if alt_choice == "Similarity":
147
+ text2, text3 = get_min_max(cf_df, option)
148
+ col2.caption(f"This sentence is 'similar' to {option}.")
149
+ col3.caption(f"This sentence is 'not similar' to {option}.")
150
+ elif alt_choice == "Sampling (Random)":
151
+ text2, text3 = sampled_alts(cf_df, option)
152
+ col2.caption(f"This sentence is a random sample from the alternatives.")
153
+ col3.caption(f"This sentence is a random sample from the alternatives.")
154
+ elif alt_choice == "Sampling (Fixed)":
155
+ text2, text3 = sampled_alts(cf_df, option, fixed=True)
156
+ col2.caption(f"This sentence is a fixed sample of the alternatives.")
157
+ col3.caption(f"This sentence is a fixed sample of the alternatives.")
158
+ elif alt_choice == "Probability":
159
+ text2, text3 = abs_dif(cf_df, option)
160
+ col2.caption(f"This sentence is the closest prediction in the model.")
161
+ col3.caption(f"This sentence is the farthest prediction in the model.")
162
+ with st.sidebar:
163
+ st.info(f"Alternatives generated: {len(cf_df)}")
164
+
165
+ with col2:
166
+ if text2 != "":
167
+ sim2 = cf_df.loc[cf_df['text'] == text2, 'similarity'].iloc[0]
168
+ st.write(text2)
169
+ probability2, sentiment2 = eval_pred(text2, return_all=True)
170
+ nat_lang_explanation = construct_nlexp(text2,sentiment2,probability2)
171
+ #st.info(f" Similarity Score: {np.round(sim2, 2)}, Num Checked: {len(cf_df)}") #for QA purposes
172
+
173
+ with col3:
174
+ if text3 != "":
175
+ sim3 = cf_df.loc[cf_df['text'] == text3, 'similarity'].iloc[0]
176
+ st.write(text3)
177
+ probability3, sentiment3 = eval_pred(text3, return_all=True)
178
+ nat_lang_explanation = construct_nlexp(text3,sentiment3,probability3)
179
+ #st.info(f"Similarity Score: {np.round(sim3, 2)}, Num Checked: {len(cf_df)}") #for QA purposes
180
+
181
+ if layout == 'MultiNLC + Lime':
182
+ with col1:
183
+
184
+ #Use spaCy to make the sentence into a doc so we can do NLP.
185
+ doc = nlp(st.session_state.input)
186
+ #Evaluate the provided sentence for sentiment and probability.
187
+ if st.session_state.input != "":
188
+ st.caption("This is the sentence you provided.")
189
+ st.write(text)
190
+ probability, sentiment = eval_pred(text, return_all=True)
191
+ options, lime = critical_words(st.session_state.input,options=True)
192
+ nat_lang_explanation = construct_nlexp(text,sentiment,probability)
193
+ st.write(" ")
194
+ st.altair_chart(lime_viz(lime))
195
+
196
+ #Allow the user to pick an option to generate counterfactuals from.
197
+ option = st.radio('Which word would you like to use to generate alternatives?', options, key = "option")
198
+ if (any(option in sublist for sublist in word_lists)):
199
+ st.write(f'You selected {option}. It matches a list.')
200
+ elif option:
201
+ st.write(f'You selected {option}. It does not match a list.')
202
+ definition = get_def(option)
203
+ else:
204
+ st.write('Awaiting your selection.')
205
+
206
+ if st.button('Generate Alternatives'):
207
+ if option in list(countries.Words):
208
+ cf_df = gen_cf_country(countries, doc, option)
209
+ st.success('Alternatives created.')
210
+ elif option in list(professions.Words):
211
+ cf_df = gen_cf_profession(professions, doc, option)
212
+ st.success('Alternatives created.')
213
+ else:
214
+ with st.sidebar:
215
+ ant("Generating alternatives for",(option,"opt","#E0FBFB"), "with a definition of: ",(st.session_state.definition,"def","#E0FBFB"),".")
216
+ cf_df = cf_from_wordnet_df(option,text,seed_definition=st.session_state.definition)
217
+ st.success('Alternatives created.')
218
+
219
+ if len(cf_df) != 0:
220
+ if alt_choice == "Similarity":
221
+ text2, text3 = get_min_max(cf_df, option)
222
+ col2.caption(f"This sentence is 'similar' to {option}.")
223
+ col3.caption(f"This sentence is 'not similar' to {option}.")
224
+ elif alt_choice == "Sampling (Random)":
225
+ text2, text3 = sampled_alts(cf_df, option)
226
+ col2.caption(f"This sentence is a random sample from the alternatives.")
227
+ col3.caption(f"This sentence is a random sample from the alternatives.")
228
+ elif alt_choice == "Sampling (Fixed)":
229
+ text2, text3 = sampled_alts(cf_df, option, fixed=True)
230
+ col2.caption(f"This sentence is a fixed sample of the alternatives.")
231
+ col3.caption(f"This sentence is a fixed sample of the alternatives.")
232
+ elif alt_choice == "Probability":
233
+ text2, text3 = abs_dif(cf_df, option)
234
+ col2.caption(f"This sentence is the closest prediction in the model.")
235
+ col3.caption(f"This sentence is the farthest prediction in the model.")
236
+ with st.sidebar:
237
+ st.info(f"Alternatives generated: {len(cf_df)}")
238
+
239
+ with col2:
240
+ if text2 != "":
241
+ sim2 = cf_df.loc[cf_df['text'] == text2, 'similarity'].iloc[0]
242
+ st.write(text2)
243
+ probability2, sentiment2 = eval_pred(text2, return_all=True)
244
+ nat_lang_explanation = construct_nlexp(text2,sentiment2,probability2)
245
+ exp2 = explainer.explain_instance(text2, predictor, num_features=15, num_samples=2000)
246
+ lime_results2 = exp2.as_list()
247
+ st.write(" ")
248
+ st.altair_chart(lime_viz(lime_results2))
249
+
250
+ with col3:
251
+ if text3 != "":
252
+ sim3 = cf_df.loc[cf_df['text'] == text3, 'similarity'].iloc[0]
253
+ st.write(text3)
254
+ probability3, sentiment3 = eval_pred(text3, return_all=True)
255
+ nat_lang_explanation = construct_nlexp(text3,sentiment3,probability3)
256
+ exp3 = explainer.explain_instance(text3, predictor, num_features=15, num_samples=2000)
257
+ lime_results3 = exp3.as_list()
258
+ st.write(" ")
259
+ st.altair_chart(lime_viz(lime_results3))
260
+
261
+ if layout == 'VizNLC':
262
+ with col1:
263
+
264
+ #Use spaCy to make the sentence into a doc so we can do NLP.
265
+ doc = nlp(st.session_state.input)
266
+ #Evaluate the provided sentence for sentiment and probability.
267
+ if st.session_state.input != "":
268
+ st.caption("This is the sentence you provided.")
269
+ st.write(text)
270
+ probability, sentiment = eval_pred(text, return_all=True)
271
+ options, lime = critical_words(st.session_state.input,options=True)
272
+ nat_lang_explanation = construct_nlexp(text,sentiment,probability)
273
+ st.write(" ")
274
+ st.altair_chart(lime_viz(lime))
275
+
276
+ #Allow the user to pick an option to generate counterfactuals from.
277
+ option = st.radio('Which word would you like to use to generate alternatives?', options, key = "option")
278
+ if (any(option in sublist for sublist in word_lists)):
279
+ st.write(f'You selected {option}. It matches a list.')
280
+ elif option:
281
+ st.write(f'You selected {option}. It does not match a list.')
282
+ definition = get_def(option)
283
+ else:
284
+ st.write('Awaiting your selection.')
285
+
286
+ if st.button('Generate Alternatives'):
287
+ if option in list(countries.Words):
288
+ cf_df = gen_cf_country(countries, doc, option)
289
+ st.success('Alternatives created.')
290
+ elif option in list(professions.Words):
291
+ cf_df = gen_cf_profession(professions, doc, option)
292
+ st.success('Alternatives created.')
293
+ else:
294
+ with st.sidebar:
295
+ ant("Generating alternatives for",(option,"opt","#E0FBFB"), "with a definition of: ",(st.session_state.definition,"def","#E0FBFB"),".")
296
+ cf_df = cf_from_wordnet_df(option,text,seed_definition=st.session_state.definition)
297
+ st.success('Alternatives created.')
298
+
299
+ if len(cf_df) != 0:
300
+ if alt_choice == "Similarity":
301
+ text2, text3 = get_min_max(cf_df, option)
302
+ col2.caption(f"This sentence is 'similar' to {option}.")
303
+ col3.caption(f"This sentence is 'not similar' to {option}.")
304
+ elif alt_choice == "Sampling (Random)":
305
+ text2, text3 = sampled_alts(cf_df, option)
306
+ col2.caption(f"This sentence is a random sample from the alternatives.")
307
+ col3.caption(f"This sentence is a random sample from the alternatives.")
308
+ elif alt_choice == "Sampling (Fixed)":
309
+ text2, text3 = sampled_alts(cf_df, option, fixed=True)
310
+ col2.caption(f"This sentence is a fixed sample of the alternatives.")
311
+ col3.caption(f"This sentence is a fixed sample of the alternatives.")
312
+ elif alt_choice == "Probability":
313
+ text2, text3 = abs_dif(cf_df, option)
314
+ col2.caption(f"This sentence is the closest prediction in the model.")
315
+ col3.caption(f"This graph represents the {len(cf_df)} alternatives to {option}.")
316
+ with st.sidebar:
317
+ st.info(f"Alternatives generated: {len(cf_df)}")
318
+
319
+ with col2:
320
+ if text2 != "":
321
+ sim2 = cf_df.loc[cf_df['text'] == text2, 'similarity'].iloc[0]
322
+ st.write(text2)
323
+ probability2, sentiment2 = eval_pred(text2, return_all=True)
324
+ nat_lang_explanation = construct_nlexp(text2,sentiment2,probability2)
325
+ exp2 = explainer.explain_instance(text2, predictor, num_features=15, num_samples=2000)
326
+ lime_results2 = exp2.as_list()
327
+ st.write(" ")
328
+ st.altair_chart(lime_viz(lime_results2))
329
+
330
+ with col3:
331
+ if not cf_df.empty:
332
+ single_nearest = alt.selection_single(on='mouseover', nearest=True)
333
+ full = alt.Chart(cf_df).encode(
334
+ alt.X('similarity:Q', scale=alt.Scale(zero=False)),
335
+ alt.Y('pred:Q'),
336
+ color=alt.Color('Categories:N', legend=alt.Legend(title="Color of Categories")),
337
+ size=alt.Size('seed:O'),
338
+ tooltip=('Categories','text','pred')
339
+ ).mark_circle(opacity=.5).properties(width=450, height=450).add_selection(single_nearest)
340
+ st.altair_chart(full)
Assets/.DS_Store CHANGED
Binary files a/Assets/.DS_Store and b/Assets/.DS_Store differ
 
{NER-tweaks → Assets/Models}/.DS_Store RENAMED
Binary files a/NER-tweaks/.DS_Store and b/Assets/Models/.DS_Store differ
 
Assets/Models/en_core_web_lg/attribute_ruler/patterns ADDED
Binary file (14.8 kB). View file
 
Assets/Models/en_core_web_lg/config.cfg ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = null
3
+ dev = null
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = null
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "en"
13
+ pipeline = ["tok2vec","tagger","parser","senter","attribute_ruler","lemmatizer","ner"]
14
+ disabled = ["senter"]
15
+ before_creation = null
16
+ after_creation = null
17
+ after_pipeline_creation = null
18
+ batch_size = 256
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+
21
+ [components]
22
+
23
+ [components.attribute_ruler]
24
+ factory = "attribute_ruler"
25
+ scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
26
+ validate = false
27
+
28
+ [components.lemmatizer]
29
+ factory = "lemmatizer"
30
+ mode = "rule"
31
+ model = null
32
+ overwrite = false
33
+ scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
34
+
35
+ [components.ner]
36
+ factory = "ner"
37
+ incorrect_spans_key = null
38
+ moves = null
39
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
40
+ update_with_oracle_cut_size = 100
41
+
42
+ [components.ner.model]
43
+ @architectures = "spacy.TransitionBasedParser.v2"
44
+ state_type = "ner"
45
+ extra_state_tokens = false
46
+ hidden_width = 64
47
+ maxout_pieces = 2
48
+ use_upper = true
49
+ nO = null
50
+
51
+ [components.ner.model.tok2vec]
52
+ @architectures = "spacy.Tok2Vec.v2"
53
+
54
+ [components.ner.model.tok2vec.embed]
55
+ @architectures = "spacy.MultiHashEmbed.v2"
56
+ width = 96
57
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
58
+ rows = [5000,1000,2500,2500,50]
59
+ include_static_vectors = true
60
+
61
+ [components.ner.model.tok2vec.encode]
62
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
63
+ width = 96
64
+ depth = 4
65
+ window_size = 1
66
+ maxout_pieces = 3
67
+
68
+ [components.parser]
69
+ factory = "parser"
70
+ learn_tokens = false
71
+ min_action_freq = 30
72
+ moves = null
73
+ scorer = {"@scorers":"spacy.parser_scorer.v1"}
74
+ update_with_oracle_cut_size = 100
75
+
76
+ [components.parser.model]
77
+ @architectures = "spacy.TransitionBasedParser.v2"
78
+ state_type = "parser"
79
+ extra_state_tokens = false
80
+ hidden_width = 64
81
+ maxout_pieces = 2
82
+ use_upper = true
83
+ nO = null
84
+
85
+ [components.parser.model.tok2vec]
86
+ @architectures = "spacy.Tok2VecListener.v1"
87
+ width = ${components.tok2vec.model.encode:width}
88
+ upstream = "tok2vec"
89
+
90
+ [components.senter]
91
+ factory = "senter"
92
+ overwrite = false
93
+ scorer = {"@scorers":"spacy.senter_scorer.v1"}
94
+
95
+ [components.senter.model]
96
+ @architectures = "spacy.Tagger.v2"
97
+ nO = null
98
+ normalize = false
99
+
100
+ [components.senter.model.tok2vec]
101
+ @architectures = "spacy.Tok2Vec.v2"
102
+
103
+ [components.senter.model.tok2vec.embed]
104
+ @architectures = "spacy.MultiHashEmbed.v2"
105
+ width = 16
106
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
107
+ rows = [1000,500,500,500,50]
108
+ include_static_vectors = true
109
+
110
+ [components.senter.model.tok2vec.encode]
111
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
112
+ width = 16
113
+ depth = 2
114
+ window_size = 1
115
+ maxout_pieces = 2
116
+
117
+ [components.tagger]
118
+ factory = "tagger"
119
+ neg_prefix = "!"
120
+ overwrite = false
121
+ scorer = {"@scorers":"spacy.tagger_scorer.v1"}
122
+
123
+ [components.tagger.model]
124
+ @architectures = "spacy.Tagger.v2"
125
+ nO = null
126
+ normalize = false
127
+
128
+ [components.tagger.model.tok2vec]
129
+ @architectures = "spacy.Tok2VecListener.v1"
130
+ width = ${components.tok2vec.model.encode:width}
131
+ upstream = "tok2vec"
132
+
133
+ [components.tok2vec]
134
+ factory = "tok2vec"
135
+
136
+ [components.tok2vec.model]
137
+ @architectures = "spacy.Tok2Vec.v2"
138
+
139
+ [components.tok2vec.model.embed]
140
+ @architectures = "spacy.MultiHashEmbed.v2"
141
+ width = ${components.tok2vec.model.encode:width}
142
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
143
+ rows = [5000,1000,2500,2500,50]
144
+ include_static_vectors = true
145
+
146
+ [components.tok2vec.model.encode]
147
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
148
+ width = 96
149
+ depth = 4
150
+ window_size = 1
151
+ maxout_pieces = 3
152
+
153
+ [corpora]
154
+
155
+ [corpora.dev]
156
+ @readers = "spacy.Corpus.v1"
157
+ path = ${paths.dev}
158
+ gold_preproc = false
159
+ max_length = 0
160
+ limit = 0
161
+ augmenter = null
162
+
163
+ [corpora.train]
164
+ @readers = "spacy.Corpus.v1"
165
+ path = ${paths.train}
166
+ gold_preproc = false
167
+ max_length = 0
168
+ limit = 0
169
+ augmenter = null
170
+
171
+ [training]
172
+ train_corpus = "corpora.train"
173
+ dev_corpus = "corpora.dev"
174
+ seed = ${system:seed}
175
+ gpu_allocator = ${system:gpu_allocator}
176
+ dropout = 0.1
177
+ accumulate_gradient = 1
178
+ patience = 5000
179
+ max_epochs = 0
180
+ max_steps = 100000
181
+ eval_frequency = 1000
182
+ frozen_components = []
183
+ before_to_disk = null
184
+ annotating_components = []
185
+
186
+ [training.batcher]
187
+ @batchers = "spacy.batch_by_words.v1"
188
+ discard_oversize = false
189
+ tolerance = 0.2
190
+ get_length = null
191
+
192
+ [training.batcher.size]
193
+ @schedules = "compounding.v1"
194
+ start = 100
195
+ stop = 1000
196
+ compound = 1.001
197
+ t = 0.0
198
+
199
+ [training.logger]
200
+ @loggers = "spacy.ConsoleLogger.v1"
201
+ progress_bar = false
202
+
203
+ [training.optimizer]
204
+ @optimizers = "Adam.v1"
205
+ beta1 = 0.9
206
+ beta2 = 0.999
207
+ L2_is_weight_decay = true
208
+ L2 = 0.01
209
+ grad_clip = 1.0
210
+ use_averages = true
211
+ eps = 0.00000001
212
+ learn_rate = 0.001
213
+
214
+ [training.score_weights]
215
+ tag_acc = 0.16
216
+ dep_uas = 0.0
217
+ dep_las = 0.16
218
+ dep_las_per_type = null
219
+ sents_p = null
220
+ sents_r = null
221
+ sents_f = 0.02
222
+ lemma_acc = 0.5
223
+ ents_f = 0.16
224
+ ents_p = 0.0
225
+ ents_r = 0.0
226
+ ents_per_type = null
227
+ speed = 0.0
228
+
229
+ [pretraining]
230
+
231
+ [initialize]
232
+ vocab_data = null
233
+ vectors = ${paths.vectors}
234
+ init_tok2vec = ${paths.init_tok2vec}
235
+ before_init = null
236
+ after_init = null
237
+
238
+ [initialize.components]
239
+
240
+ [initialize.components.ner]
241
+
242
+ [initialize.components.ner.labels]
243
+ @readers = "spacy.read_labels.v1"
244
+ path = "corpus/labels/ner.json"
245
+ require = false
246
+
247
+ [initialize.components.parser]
248
+
249
+ [initialize.components.parser.labels]
250
+ @readers = "spacy.read_labels.v1"
251
+ path = "corpus/labels/parser.json"
252
+ require = false
253
+
254
+ [initialize.components.tagger]
255
+
256
+ [initialize.components.tagger.labels]
257
+ @readers = "spacy.read_labels.v1"
258
+ path = "corpus/labels/tagger.json"
259
+ require = false
260
+
261
+ [initialize.lookups]
262
+ @misc = "spacy.LookupsDataLoader.v1"
263
+ lang = ${nlp.lang}
264
+ tables = ["lexeme_norm"]
265
+
266
+ [initialize.tokenizer]
Assets/Models/en_core_web_lg/lemmatizer/lookups/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb64f40c0f8396d1762730c0ddf4dad2a52d138f5a389f71a1a1d088173b7737
3
+ size 972893
Assets/Models/en_core_web_lg/meta.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3ba202861febc88c998c20b62c4e2db5aa4249047816576d7cf476f4c5bb6c2
3
+ size 10361
Assets/Models/en_core_web_lg/ner/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":1,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
Assets/Models/en_core_web_lg/ner/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9d8a97f17d882960a52360ae2e58d9c960937534c9c010e1d912a3b82767a8f
3
+ size 6511153
Assets/Models/en_core_web_lg/ner/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��moves�{"0":{},"1":{"ORG":56356,"DATE":40381,"PERSON":36475,"GPE":26716,"MONEY":15121,"CARDINAL":14096,"NORP":9638,"PERCENT":9182,"WORK_OF_ART":4475,"LOC":4047,"TIME":3670,"QUANTITY":3114,"FAC":3042,"EVENT":3015,"ORDINAL":2142,"PRODUCT":1782,"LAW":1620,"LANGUAGE":355},"2":{"ORG":56356,"DATE":40381,"PERSON":36475,"GPE":26716,"MONEY":15121,"CARDINAL":14096,"NORP":9638,"PERCENT":9182,"WORK_OF_ART":4475,"LOC":4047,"TIME":3670,"QUANTITY":3114,"FAC":3042,"EVENT":3015,"ORDINAL":2142,"PRODUCT":1782,"LAW":1620,"LANGUAGE":355},"3":{"ORG":56356,"DATE":40381,"PERSON":36475,"GPE":26716,"MONEY":15121,"CARDINAL":14096,"NORP":9638,"PERCENT":9182,"WORK_OF_ART":4475,"LOC":4047,"TIME":3670,"QUANTITY":3114,"FAC":3042,"EVENT":3015,"ORDINAL":2142,"PRODUCT":1782,"LAW":1620,"LANGUAGE":355},"4":{"ORG":56356,"DATE":40381,"PERSON":36475,"GPE":26716,"MONEY":15121,"CARDINAL":14096,"NORP":9638,"PERCENT":9182,"WORK_OF_ART":4475,"LOC":4047,"TIME":3670,"QUANTITY":3114,"FAC":3042,"EVENT":3015,"ORDINAL":2142,"PRODUCT":1782,"LAW":1620,"LANGUAGE":355,"":1},"5":{"":1}}�cfg��neg_key�
Assets/Models/en_core_web_lg/parser/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":30,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
Assets/Models/en_core_web_lg/parser/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b8abfdcfaa0d0a822556f61fa2ab7b48d5528e8ab25375e9c657af78d8e2368
3
+ size 319909
Assets/Models/en_core_web_lg/parser/moves ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ��moves�
2
+ {"0":{"":994267},"1":{"":990803},"2":{"det":172595,"nsubj":165748,"compound":116623,"amod":105184,"aux":86667,"punct":65478,"advmod":62763,"poss":36443,"mark":27941,"nummod":22598,"auxpass":15594,"prep":14001,"nsubjpass":13856,"neg":12357,"cc":10739,"nmod":9562,"advcl":9062,"npadvmod":8168,"quantmod":7101,"intj":6464,"ccomp":5896,"dobj":3427,"expl":3360,"dep":2806,"predet":1944,"parataxis":1837,"csubj":1428,"preconj":621,"pobj||prep":616,"attr":578,"meta":376,"advmod||conj":368,"dobj||xcomp":352,"acomp":284,"nsubj||ccomp":224,"dative":206,"advmod||xcomp":149,"dobj||ccomp":70,"csubjpass":64,"dobj||conj":62,"prep||conj":51,"acl":48,"prep||nsubj":41,"prep||dobj":36,"xcomp":34,"advmod||ccomp":32,"oprd":31},"3":{"punct":183790,"pobj":182191,"prep":174008,"dobj":89615,"conj":59687,"cc":51930,"ccomp":30385,"advmod":22861,"xcomp":21021,"relcl":20969,"advcl":19828,"attr":17741,"acomp":16922,"appos":15265,"case":13388,"acl":12085,"pcomp":10324,"npadvmod":9796,"prt":8179,"agent":3903,"dative":3866,"nsubj":3470,"neg":2906,"amod":2839,"intj":2819,"nummod":2732,"oprd":2301,"dep":1487,"parataxis":1261,"quantmod":319,"nmod":294,"acl||dobj":200,"prep||dobj":190,"prep||nsubj":162,"acl||nsubj":159,"appos||nsubj":145,"relcl||dobj":134,"relcl||nsubj":111,"aux":103,"expl":96,"meta":92,"appos||dobj":86,"preconj":71,"csubj":65,"prep||nsubjpass":55,"prep||advmod":54,"prep||acomp":53,"det":51,"nsubjpass":45,"relcl||pobj":42,"acl||nsubjpass":42,"mark":40,"auxpass":39,"prep||pobj":36,"relcl||nsubjpass":32,"appos||nsubjpass":31},"4":{"ROOT":111664}}�cfg��neg_key�
Assets/Models/en_core_web_lg/senter/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "overwrite":false
3
+ }
Assets/Models/en_core_web_lg/senter/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1bdccc5dc2d8c842081528c93680c54508411615b525cef695239f30bb0ed8
3
+ size 219953
Assets/Models/en_core_web_lg/tagger/cfg ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "$",
4
+ "''",
5
+ ",",
6
+ "-LRB-",
7
+ "-RRB-",
8
+ ".",
9
+ ":",
10
+ "ADD",
11
+ "AFX",
12
+ "CC",
13
+ "CD",
14
+ "DT",
15
+ "EX",
16
+ "FW",
17
+ "HYPH",
18
+ "IN",
19
+ "JJ",
20
+ "JJR",
21
+ "JJS",
22
+ "LS",
23
+ "MD",
24
+ "NFP",
25
+ "NN",
26
+ "NNP",
27
+ "NNPS",
28
+ "NNS",
29
+ "PDT",
30
+ "POS",
31
+ "PRP",
32
+ "PRP$",
33
+ "RB",
34
+ "RBR",
35
+ "RBS",
36
+ "RP",
37
+ "SYM",
38
+ "TO",
39
+ "UH",
40
+ "VB",
41
+ "VBD",
42
+ "VBG",
43
+ "VBN",
44
+ "VBP",
45
+ "VBZ",
46
+ "WDT",
47
+ "WP",
48
+ "WP$",
49
+ "WRB",
50
+ "XX",
51
+ "``"
52
+ ],
53
+ "neg_prefix":"!",
54
+ "overwrite":false
55
+ }
Assets/Models/en_core_web_lg/tagger/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4481bf82fdaea8773149ca8b637057e0dfaa4f8fa1cc5e8f19f33250568f6fc0
3
+ size 19441
Assets/Models/en_core_web_lg/tok2vec/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+
3
+ }
Assets/Models/en_core_web_lg/tok2vec/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71724ee469b871ec2287455264d692c9b229b1bf129aa5bc06130a4aeb9b7c0e
3
+ size 6365604
Assets/Models/en_core_web_lg/tokenizer ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ��prefix_search� ~^§|^%|^=|^—|^–|^\+(?![0-9])|^…|^……|^,|^:|^;|^\!|^\?|^¿|^؟|^¡|^\(|^\)|^\[|^\]|^\{|^\}|^<|^>|^_|^#|^\*|^&|^。|^?|^!|^,|^、|^;|^:|^~|^·|^।|^،|^۔|^؛|^٪|^\.\.+|^…|^\'|^"|^”|^“|^`|^‘|^´|^’|^‚|^,|^„|^»|^«|^「|^」|^『|^』|^(|^)|^〔|^〕|^【|^】|^《|^》|^〈|^〉|^\$|^£|^€|^¥|^฿|^US\$|^C\$|^A\$|^₽|^﷼|^₴|^₠|^₡|^₢|^₣|^₤|^₥|^₦|^₧|^₨|^₩|^₪|^₫|^€|^₭|^₮|^₯|^₰|^₱|^₲|^₳|^₴|^₵|^₶|^₷|^₸|^₹|^₺|^₻|^₼|^₽|^₾|^₿|^[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]�suffix_search�2y…$|……$|,$|:$|;$|\!$|\?$|¿$|؟$|¡$|\($|\)$|\[$|\]$|\{$|\}$|<$|>$|_$|#$|\*$|&$|。$|?$|!$|,$|、$|;$|:$|~$|·$|।$|،$|۔$|؛$|٪$|\.\.+$|…$|\'$|"$|”$|“$|`$|‘$|´$|’$|‚$|,$|„$|»$|«$|「$|」$|『$|』$|($|)$|〔$|〕$|【$|】$|《$|》$|〈$|〉$|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]$|'s$|'S$|’s$|’S$|—$|–$|(?<=[0-9])\+$|(?<=°[FfCcKk])\.$|(?<=[0-9])(?:\$|£|€|¥|฿|US\$|C\$|A\$|₽|﷼|₴|₠|₡|₢|₣|₤|₥|₦|₧|₨|₩|₪|₫|€|₭|₮|₯|₰|₱|₲|₳|₴|₵|₶|₷|₸|₹|₺|₻|₼|₽|₾|₿)$|(?<=[0-9])(?:km|km²|km³|m|m²|m³|dm|dm²|dm³|cm|cm²|cm³|mm|mm²|mm³|ha|µm|nm|yd|in|ft|kg|g|mg|µg|t|lb|oz|m/s|km/h|kmh|mph|hPa|Pa|mbar|mb|MB|kb|KB|gb|GB|tb|TB|T|G|M|K|%|км|км²|км³|м|м²|м³|дм|дм²|дм³|см|см²|см³|мм|мм²|мм³|нм|кг|г|мг|м/с|км/ч|кПа|Па|мбар|Кб|КБ|кб|Мб|МБ|мб|Гб|ГБ|гб|Тб|ТБ|тбكم|كم²|كم³|م|م²|م³|سم|سم²|سم³|مم|مم²|مم³|كم|غرام|جرام|جم|كغ|ملغ|كوب|اكواب)$|(?<=[0-9a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F%²\-\+…|……|,|:|;|\!|\?|¿|؟|¡|\(|\)|\[|\]|\{|\}|<|>|_|#|\*|&|。|?|!|,|、|;|:|~|·|।|،|۔|؛|٪(?:\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉)])\.$|(?<=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F][A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])\.$�infix_finditer�>�\.\.+|…|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]|(?<=[0-9])[+\-\*^](?=[0-9-])|(?<=[a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉])\.(?=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F]),(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F0-9])(?:-|–|—|--|---|——|~)(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F0-9])[:<>=/](?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])�token_match��url_match�
2
+ ��A�
3
+ � ��A� �'��A�'�''��A�''�'Cause��A�'CauseC�because�'Cos��A�'CosC�because�'Coz��A�'CozC�because�'Cuz��A�'CuzC�because�'S��A�'SC�'s�'bout��A�'boutC�about�'cause��A�'causeC�because�'cos��A�'cosC�because�'coz��A�'cozC�because�'cuz��A�'cuzC�because�'d��A�'d�'em��A�'emC�them�'ll��A�'llC�will�'nuff��A�'nuffC�enough�'re��A�'reC�are�'s��A�'sC�'s�(*_*)��A�(*_*)�(-8��A�(-8�(-:��A�(-:�(-;��A�(-;�(-_-)��A�(-_-)�(._.)��A�(._.)�(:��A�(:�(;��A�(;�(=��A�(=�(>_<)��A�(>_<)�(^_^)��A�(^_^)�(o:��A�(o:�(¬_¬)��A�(¬_¬)�(ಠ_ಠ)��A�(ಠ_ಠ)�(╯°□°)╯︵┻━┻��A�(╯°□°)╯︵┻━┻�)-:��A�)-:�):��A�):�-_-��A�-_-�-__-��A�-__-�._.��A�._.�0.0��A�0.0�0.o��A�0.o�0_0��A�0_0�0_o��A�0_o�10a.m.��A�10�A�a.m.C�a.m.�10am��A�10�A�amC�a.m.�10p.m.��A�10�A�p.m.C�p.m.�10pm��A�10�A�pmC�p.m.�11a.m.��A�11�A�a.m.C�a.m.�11am��A�11�A�amC�a.m.�11p.m.��A�11�A�p.m.C�p.m.�11pm��A�11�A�pmC�p.m.�12a.m.��A�12�A�a.m.C�a.m.�12am��A�12�A�amC�a.m.�12p.m.��A�12�A�p.m.C�p.m.�12pm��A�12�A�pmC�p.m.�1a.m.��A�1�A�a.m.C�a.m.�1am��A�1�A�amC�a.m.�1p.m.��A�1�A�p.m.C�p.m.�1pm��A�1�A�pmC�p.m.�2a.m.��A�2�A�a.m.C�a.m.�2am��A�2�A�amC�a.m.�2p.m.��A�2�A�p.m.C�p.m.�2pm��A�2�A�pmC�p.m.�3a.m.��A�3�A�a.m.C�a.m.�3am��A�3�A�amC�a.m.�3p.m.��A�3�A�p.m.C�p.m.�3pm��A�3�A�pmC�p.m.�4a.m.��A�4�A�a.m.C�a.m.�4am��A�4�A�amC�a.m.�4p.m.��A�4�A�p.m.C�p.m.�4pm��A�4�A�pmC�p.m.�5a.m.��A�5�A�a.m.C�a.m.�5am��A�5�A�amC�a.m.�5p.m.��A�5�A�p.m.C�p.m.�5pm��A�5�A�pmC�p.m.�6a.m.��A�6�A�a.m.C�a.m.�6am��A�6�A�amC�a.m.�6p.m.��A�6�A�p.m.C�p.m.�6pm��A�6�A�pmC�p.m.�7a.m.��A�7�A�a.m.C�a.m.�7am��A�7�A�amC�a.m.�7p.m.��A�7�A�p.m.C�p.m.�7pm��A�7�A�pmC�p.m.�8)��A�8)�8-)��A�8-)�8-D��A�8-D�8D��A�8D�8a.m.��A�8�A�a.m.C�a.m.�8am��A�8�A�amC�a.m.�8p.m.��A�8�A�p.m.C�p.m.�8pm��A�8�A�pmC�p.m.�9a.m.��A�9�A�a.m.C�a.m.�9am��A�9�A�amC�a.m.�9p.m.��A�9�A�p.m.C�p.m.�9pm��A�9�A�pmC�p.m.�:'(��A�:'(�:')��A�:')�:'-(��A�:'-(�:'-)��A�:'-)�:(��A�:(�:((��A�:((�:(((��A�:(((�:()��A�:()�:)��A�:)�:))��A�:))�:)))��A�:)))�:*��A�:*�:-(��A�:-(�:-((��A�:-((�:-(((��A�:-(((�:-)��A�:-)�:-))��A�:-))�:-)))��A�:-)))�:-*��A�:-*�:-/��A�:-/�:-0��A�:-0�:-3��A�:-3�:->��A�:->�:-D��A�:-D�:-O��A�:-O�:-P��A�:-P�:-X��A�:-X�:-]��A�:-]�:-o��A�:-o�:-p��A�:-p�:-x��A�:-x�:-|��A�:-|�:-}��A�:-}�:/��A�:/�:0��A�:0�:1��A�:1�:3��A�:3�:>��A�:>�:D��A�:D�:O��A�:O�:P��A�:P�:X��A�:X�:]��A�:]�:o��A�:o�:o)��A�:o)�:p��A�:p�:x��A�:x�:|��A�:|�:}��A�:}�:’(��A�:’(�:’)��A�:’)�:’-(��A�:’-(�:’-)��A�:’-)�;)��A�;)�;-)��A�;-)�;-D��A�;-D�;D��A�;D�;_;��A�;_;�<.<��A�<.<�</3��A�</3�<3��A�<3�<33��A�<33�<333��A�<333�<space>��A�<space>�=(��A�=(�=)��A�=)�=/��A�=/�=3��A�=3�=D��A�=D�=[��A�=[�=]��A�=]�=|��A�=|�>.<��A�>.<�>.>��A�>.>�>:(��A�>:(�>:o��A�>:o�><(((*>��A�><(((*>�@_@��A�@_@�Adm.��A�Adm.�Ain't��A�Ai�A�n'tC�not�Aint��A�Ai�A�ntC�not�Ain’t��A�Ai�A�n’tC�not�Ak.��A�Ak.C�Alaska�Ala.��A�Ala.C�Alabama�Apr.��A�Apr.C�April�Aren't��A�AreC�are�A�n'tC�not�Arent��A�AreC�are�A�ntC�not�Aren’t��A�AreC�are�A�n’tC�not�Ariz.��A�Ariz.C�Arizona�Ark.��A�Ark.C�Arkansas�Aug.��A�Aug.C�August�Bros.��A�Bros.�C'mon��A�C'mC�come�A�on�C++��A�C++�Calif.��A�Calif.C�California�Can't��A�CaC�can�A�n'tC�not�Can't've��A�CaC�can�A�n'tC�not�A�'veC�have�Cannot��A�CanC�can�A�not�Cant��A�CaC�can�A�ntC�not�Cantve��A�CaC�can�A�ntC�not�A�veC�have�Can’t��A�CaC�can�A�n’tC�not�Can’t’ve��A�CaC�can�A�n’tC�not�A�’veC�have�Co.��A�Co.�Colo.��A�Colo.C�Colorado�Conn.��A�Conn.C�Connecticut�Corp.��A�Corp.�Could've��A�CouldC�could�A�'ve�Couldn't��A�CouldC�could�A�n'tC�not�Couldn't've��A�CouldC�could�A�n'tC�not�A�'veC�have�Couldnt��A�CouldC�could�A�ntC�not�Couldntve��A�CouldC�could�A�ntC�not�A�veC�have�Couldn’t��A�CouldC�could�A�n’tC�not�Couldn’t’ve��A�CouldC�could�A�n’tC�not�A�’veC�have�Couldve��A�CouldC�could�A�ve�Could’ve��A�CouldC�could�A�’ve�C’mon��A�C’mC�come�A�on�D.C.��A�D.C.�Daren't��A�DareC�dare�A�n'tC�not�Darent��A�DareC�dare�A�ntC�not�Daren’t��A�DareC�dare�A�n’tC�not�Dec.��A�Dec.C�December�Del.��A�Del.C�Delaware�Didn't��A�DidC�do�A�n'tC�not�Didn't've��A�DidC�do�A�n'tC�not�A�'veC�have�Didnt��A�DidC�do�A�ntC�not�Didntve��A�DidC�do�A�ntC�not�A�veC�have�Didn’t��A�DidC�do�A�n’tC�not�Didn’t’ve��A�DidC�do�A�n’tC�not�A�’veC�have�Doesn't��A�DoesC�does�A�n'tC�not�Doesn't've��A�DoesC�does�A�n'tC�not�A�'veC�have�Doesnt��A�DoesC�does�A�ntC�not�Doesntve��A�DoesC�does�A�ntC�not�A�veC�have�Doesn’t��A�DoesC�does�A�n’tC�not�Doesn’t’ve��A�DoesC�does�A�n’tC�not�A�’veC�have�Doin��A�DoinC�doing�Doin'��A�Doin'C�doing�Doin’��A�Doin’C�doing�Don't��A�DoC�do�A�n'tC�not�Don't've��A�DoC�do�A�n'tC�not�A�'veC�have�Dont��A�DoC�do�A�ntC�not�Dontve��A�DoC�do�A�ntC�not�A�veC�have�Don’t��A�DoC�do�A�n’tC�not�Don’t’ve��A�DoC�do�A�n’tC�not�A�’veC�have�Dr.��A�Dr.�E.G.��A�E.G.�E.g.��A�E.g.�Feb.��A�Feb.C�February�Fla.��A�Fla.C�Florida�Ga.��A�Ga.C�Georgia�Gen.��A�Gen.�Goin��A�GoinC�going�Goin'��A�Goin'C�going�Goin’��A�Goin’C�going�Gonna��A�GonC�going�A�naC�to�Gotta��A�GotC�got�A�taC�to�Gov.��A�Gov.�Hadn't��A�HadC�have�A�n'tC�not�Hadn't've��A�HadC�have�A�n'tC�not�A�'veC�have�Hadnt��A�HadC�have�A�ntC�not�Hadntve��A�HadC�have�A�ntC�not�A�veC�have�Hadn’t��A�HadC�have�A�n’tC�not�Hadn’t’ve��A�HadC�have�A�n’tC�not�A�’veC�have�Hasn't��A�HasC�has�A�n'tC�not�Hasnt��A�HasC�has�A�ntC�not�Hasn’t��A�HasC�has�A�n’tC�not�Haven't��A�HaveC�have�A�n'tC�not�Havent��A�HaveC�have�A�ntC�not�Haven��t��A�HaveC�have�A�n’tC�not�Havin��A�HavinC�having�Havin'��A�Havin'C�having�Havin’��A�Havin’C�having�He'd��A�HeC�he�A�'dC�'d�He'd've��A�HeC�he�A�'dC�would�A�'veC�have�He'll��A�HeC�he�A�'llC�will�He'll've��A�HeC�he�A�'llC�will�A�'veC�have�He's��A�HeC�he�A�'sC�'s�Hed��A�HeC�he�A�dC�'d�Hedve��A�HeC�he�A�dC�would�A�veC�have�Hellve��A�HeC�he�A�llC�will�A�veC�have�Hes��A�HeC�he�A�s�He’d��A�HeC�he�A�’dC�'d�He’d’ve��A�HeC�he�A�’dC�would�A�’veC�have�He’ll��A�HeC�he�A�’llC�will�He’ll’ve��A�HeC�he�A�’llC�will�A�’veC�have�He’s��A�HeC�he�A�’sC�'s�How'd��A�HowC�how�A�'dC�'d�How'd've��A�HowC�how�A�'dC�would�A�'veC�have�How'd'y��A�HowC�how�A�'d�A�'yC�you�How'll��A�HowC�how�A�'llC�will�How'll've��A�HowC�how�A�'llC�will�A�'veC�have�How're��A�HowC�how�A�'reC�are�How's��A�HowC�how�A�'sC�'s�How've��A�HowC�how�A�'ve�Howd��A�HowC�how�A�dC�'d�Howdve��A�HowC�how�A�dC�would�A�veC�have�Howll��A�HowC�how�A�llC�will�Howllve��A�HowC�how�A�llC�will�A�veC�have�Howre��A�HowC�how�A�reC�are�Hows��A�HowC�how�A�s�Howve��A�How�A�veC�have�How’d��A�HowC�how�A�’dC�'d�How’d’ve��A�HowC�how�A�’dC�would�A�’veC�have�How’d’y��A�HowC�how�A�’d�A�’yC�you�How’ll��A�HowC�how�A�’llC�will�How’ll’ve��A�HowC�how�A�’llC�will�A�’veC�have�How’re��A�HowC�how�A�’reC�are�How’s��A�HowC�how�A�’sC�'s�How’ve��A�HowC�how�A�’ve�I'd��A�IC�i�A�'dC�'d�I'd've��A�IC�i�A�'dC�would�A�'veC�have�I'll��A�IC�i�A�'llC�will�I'll've��A�IC�i�A�'llC�will�A�'veC�have�I'm��A�IC�i�A�'mC�am�I'ma��A�IC�i�A�'mC�am�A�aC�gonna�I've��A�IC�i�A�'veC�have�I.E.��A�I.E.�I.e.��A�I.e.�Ia.��A�Ia.C�Iowa�Id��A�IC�i�A�dC�'d�Id.��A�Id.C�Idaho�Idve��A�IC�i�A�dC�would�A�veC�have�Ill.��A�Ill.C�Illinois�Illve��A�IC�i�A�llC�will�A�veC�have�Im��A�IC�i�A�m�Ima��A�IC�i�A�mC�am�A�aC�gonna�Inc.��A�Inc.�Ind.��A�Ind.C�Indiana�Isn't��A�IsC�is�A�n'tC�not�Isnt��A�IsC�is�A�ntC�not�Isn’t��A�IsC�is�A�n’tC�not�It'd��A�ItC�it�A�'dC�'d�It'd've��A�ItC�it�A�'dC�would�A�'veC�have�It'll��A�ItC�it�A�'llC�will�It'll've��A�ItC�it�A�'llC�will�A�'veC�have�It's��A�ItC�it�A�'sC�'s�Itd��A�ItC�it�A�dC�'d�Itdve��A�ItC�it�A�dC�would�A�veC�have�Itll��A�ItC�it�A�llC�will�Itllve��A�ItC�it�A�llC�will�A�veC�have�It’d��A�ItC�it�A�’dC�'d�It’d’ve��A�ItC�it�A�’dC�would�A�’veC�have�It’ll��A�ItC�it�A�’llC�will�It’ll’ve��A�ItC�it�A�’llC�will�A�’veC�have�It’s��A�ItC�it�A�’sC�'s�Ive��A�IC�i�A�veC�have�I’d��A�IC�i�A�’dC�'d�I’d’ve��A�IC�i�A�’dC�would�A�’veC�have�I’ll��A�IC�i�A�’llC�will�I’ll’ve��A�IC�i�A�’llC�will�A�’veC�have�I’m��A�IC�i�A�’mC�am�I’ma��A�IC�i�A�’mC�am�A�aC�gonna�I’ve��A�IC�i�A�’veC�have�Jan.��A�Jan.C�January�Jr.��A�Jr.�Jul.��A�Jul.C�July�Jun.��A�Jun.C�June�Kan.��A�Kan.C�Kansas�Kans.��A�Kans.C�Kansas�Ky.��A�Ky.C�Kentucky�La.��A�La.C�Louisiana�Let's��A�LetC�let�A�'sC�us�Let’s��A�LetC�let�A�’sC�us�Lovin��A�LovinC�loving�Lovin'��A�Lovin'C�loving�Lovin’��A�Lovin’C�loving�Ltd.��A�Ltd.�Ma'am��A�Ma'amC�madam�Mar.��A�Mar.C�March�Mass.��A�Mass.C�Massachusetts�Mayn't��A�MayC�may�A�n'tC�not�Mayn't've��A�MayC�may�A�n'tC�not�A�'veC�have�Maynt��A�MayC�may�A�ntC�not�Mayntve��A�MayC�may�A�ntC�not�A�veC�have�Mayn’t��A�MayC�may�A�n’tC�not�Mayn’t’ve��A�MayC�may�A�n’tC�not�A�’veC�have�Ma’am��A�Ma’amC�madam�Md.��A�Md.�Messrs.��A�Messrs.�Mich.��A�Mich.C�Michigan�Might've��A�MightC�might�A�'ve�Mightn't��A�MightC�might�A�n'tC�not�Mightn't've��A�MightC�might�A�n'tC�not�A�'veC�have�Mightnt��A�MightC�might�A�ntC�not�Mightntve��A�MightC�might�A�ntC�not�A�veC�have�Mightn’t��A�MightC�might�A�n’tC�not�Mightn’t’ve��A�MightC�might�A�n’tC�not�A�’veC�have�Mightve��A�MightC�might�A�ve�Might’ve��A�MightC�might�A�’ve�Minn.��A�Minn.C�Minnesota�Miss.��A�Miss.C�Mississippi�Mo.��A�Mo.�Mont.��A�Mont.�Mr.��A�Mr.�Mrs.��A�Mrs.�Ms.��A�Ms.�Mt.��A�Mt.C�Mount�Must've��A�MustC�must�A�'ve�Mustn't��A�MustC�must�A�n'tC�not�Mustn't've��A�MustC�must�A�n'tC�not�A�'veC�have�Mustnt��A�MustC�must�A�ntC�not�Mustntve��A�MustC�must�A�ntC�not�A�veC�have�Mustn’t��A�MustC�must�A�n’tC�not�Mustn’t’ve��A�MustC�must�A�n’tC�not�A�’veC�have�Mustve��A�MustC�must�A�ve�Must’ve��A�MustC�must�A�’ve�N.C.��A�N.C.C�North Carolina�N.D.��A�N.D.C�North Dakota�N.H.��A�N.H.C�New Hampshire�N.J.��A�N.J.C�New Jersey�N.M.��A�N.M.C�New Mexico�N.Y.��A�N.Y.C�New York�Neb.��A�Neb.C�Nebraska�Nebr.��A�Nebr.C�Nebraska�Needn't��A�NeedC�need�A�n'tC�not�Needn't've��A�NeedC�need�A�n'tC�not�A�'veC�have�Neednt��A�NeedC�need�A�ntC�not�Needntve��A�NeedC�need�A�ntC�not�A�veC�have�Needn’t��A�NeedC�need�A�n’tC�not�Needn’t’ve��A�NeedC�need�A�n’tC�not�A�’veC�have�Nev.��A�Nev.C�Nevada�Not've��A�NotC�not�A�'veC�have�Nothin��A�NothinC�nothing�Nothin'��A�Nothin'C�nothing�Nothin’��A�Nothin’C�nothing�Notve��A�NotC�not�A�veC�have�Not’ve��A�NotC�not�A�’veC�have�Nov.��A�Nov.C�November�Nuthin��A�NuthinC�nothing�Nuthin'��A�Nuthin'C�nothing�Nuthin’��A�Nuthin’C�nothing�O'clock��A�O'clockC�o'clock�O.O��A�O.O�O.o��A�O.o�O_O��A�O_O�O_o��A�O_o�Oct.��A�Oct.C�October�Okla.��A�Okla.C�Oklahoma�Ol��A�OlC�old�Ol'��A�Ol'C�old�Ol’��A�Ol’C�old�Ore.��A�Ore.C�Oregon�Oughtn't��A�OughtC�ought�A�n'tC�not�Oughtn't've��A�OughtC�ought�A�n'tC�not�A�'veC�have�Oughtnt��A�OughtC�ought�A�ntC�not�Oughtntve��A�OughtC�ought�A�ntC�not�A�veC�have�Oughtn’t��A�OughtC�ought�A�n’tC�not�Oughtn’t’ve��A�OughtC�ought�A�n’tC�not�A�’veC�have�O’clock��A�O’clockC�o'clock�Pa.��A�Pa.C�Pennsylvania�Ph.D.��A�Ph.D.�Prof.��A�Prof.�Rep.��A�Rep.�Rev.��A�Rev.�S.C.��A�S.C.C�South Carolina�Sen.��A�Sen.�Sep.��A�Sep.C�September�Sept.��A�Sept.C�September�Shan't��A�ShaC�shall�A�n'tC�not�Shan't've��A�ShaC�shall�A�n'tC�not�A�'veC�have�Shant��A�ShaC�shall�A�ntC�not�Shantve��A�ShaC�shall�A�ntC�not�A�veC�have�Shan’t��A�ShaC�shall�A�n’tC�not�Shan’t’ve��A�ShaC�shall�A�n’tC�not�A�’veC�have�She'd��A�SheC�she�A�'dC�'d�She'd've��A�SheC�she�A�'dC�would�A�'veC�have�She'll��A�SheC�she�A�'llC�will�She'll've��A�SheC�she�A�'llC�will�A�'veC�have�She's��A�SheC�she�A�'sC�'s�Shedve��A�SheC�she�A�dC�would�A�veC�have�Shellve��A�SheC�she�A�llC�will�A�veC�have�Shes��A�SheC�she�A�s�She’d��A�SheC�she�A�’dC�'d�She’d’ve��A�SheC�she�A�’dC�would�A�’veC�have�She’ll��A�SheC�she�A�’llC�will�She’ll’ve��A�SheC�she�A�’llC�will�A�’veC�have�She’s��A�SheC�she�A�’sC�'s�Should've��A�ShouldC�should�A�'ve�Shouldn't��A�ShouldC�should�A�n'tC�not�Shouldn't've��A�ShouldC�should�A�n'tC�not�A�'veC�have�Shouldnt��A�ShouldC�should�A�ntC�not�Shouldntve��A�ShouldC�should�A�ntC�not�A�veC�have�Shouldn’t��A�ShouldC�should�A�n’tC�not�Shouldn’t’ve��A�ShouldC�should�A�n’tC�not�A�’veC�have�Shouldve��A�ShouldC�should�A�ve�Should’ve��A�ShouldC�should�A�’ve�Somethin��A�SomethinC�something�Somethin'��A�Somethin'C�something�Somethin’��A�Somethin’C�something�St.��A�St.�Tenn.��A�Tenn.C�Tennessee�That'd��A�ThatC�that�A�'dC�'d�That'd've��A�ThatC�that�A�'dC�would�A�'veC�have�That'll��A�ThatC�that�A�'llC�will�That'll've��A�ThatC�that�A�'llC�will�A�'veC�have�That're��A�ThatC�that�A�'reC�are�That's��A�ThatC�that�A�'sC�'s�That've��A�ThatC�that�A�'ve�Thatd��A�ThatC�that�A�dC�'d�Thatdve��A�ThatC�that�A�dC�would�A�veC�have�Thatll��A�ThatC�that�A�llC�will�Thatllve��A�ThatC�that�A�llC�will�A�veC�have�Thatre��A�ThatC�that�A�reC�are�Thats��A�ThatC�that�A�s�Thatve��A�That�A�veC�have�That’d��A�ThatC�that�A�’dC�'d�That’d’ve��A�ThatC�that�A�’dC�would�A�’veC�have�That’ll��A�ThatC�that�A�’llC�will�That’ll’ve��A�ThatC�that�A�’llC�will�A�’veC�have�That’re��A�ThatC�that�A�’reC�are�That’s��A�ThatC�that�A�’sC�'s�That’ve��A�ThatC�that�A�’ve�There'd��A�ThereC�there�A�'dC�'d�There'd've��A�ThereC�there�A�'dC�would�A�'veC�have�There'll��A�ThereC�there�A�'llC�will�There'll've��A�ThereC�there�A�'llC�will�A�'veC�have�There're��A�ThereC�there�A�'reC�are�There's��A�ThereC�there�A�'sC�'s�There've��A�ThereC�there�A�'ve�Thered��A�ThereC�there�A�dC�'d�Theredve��A�ThereC�there�A�dC�would�A�veC�have�Therell��A�ThereC�there�A�llC�will�Therellve��A�ThereC�there�A�llC�will�A�veC�have�Therere��A�ThereC�there�A�reC�are�Theres��A�ThereC�there�A�s�Thereve��A�There�A�veC�have�There’d��A�ThereC�there�A�’dC�'d�There’d’ve��A�ThereC�there�A�’dC�would�A�’veC�have�There’ll��A�ThereC�there�A�’llC�will�There’ll’ve��A�ThereC�there�A�’llC�will�A�’veC�have�There’re��A�ThereC�there�A�’reC�are�There’s��A�ThereC�there�A�’sC�'s�There’ve��A�ThereC�there�A�’ve�These'd��A�TheseC�these�A�'dC�'d�These'd've��A�TheseC�these�A�'dC�would�A�'veC�have�These'll��A�TheseC�these�A�'llC�will�These'll've��A�TheseC�these�A�'llC�will�A�'veC�have�These're��A�TheseC�these�A�'reC�are�These's��A�TheseC�these�A�'sC�'s�These've��A�TheseC�these�A�'ve�Thesed��A�TheseC�these�A�dC�'d�Thesedve��A�TheseC�these�A�dC�would�A�veC�have�Thesell��A�TheseC�these�A�llC�will�Thesellve��A�TheseC�these�A�llC�will�A�veC�have�Thesere��A�TheseC�these�A�reC�are�Theses��A�TheseC�these�A�s�Theseve��A�These�A�veC�have�These’d��A�TheseC�these�A�’dC�'d�These’d’ve��A�TheseC�these�A�’dC�would�A�’veC�have�These’ll��A�TheseC�these�A�’llC�will�These’ll’ve��A�TheseC�these�A�’llC�will�A�’veC�have�These’re��A�TheseC�these�A�’reC�are�These’s��A�TheseC�these�A�’sC�'s�These’ve��A�TheseC�these�A�’ve�They'd��A�TheyC�they�A�'dC�'d�They'd've��A�TheyC�they�A�'dC�would�A�'veC�have�They'll��A�TheyC�they�A�'llC�will�They'll've��A�TheyC�they�A�'llC�will�A�'veC�have�They're��A�TheyC�they�A�'reC�are�They've��A�TheyC�they�A�'veC�have�Theyd��A�TheyC�they�A�dC�'d�Theydve��A�TheyC�they�A�dC�would�A�veC�have�Theyll��A�TheyC�they�A�llC�will�Theyllve��A�TheyC�they�A�llC�will�A�veC�have�Theyre��A�TheyC�they�A�reC�are�Theyve��A�TheyC�they�A�veC�have�They’d��A�TheyC�they�A�’dC�'d�They’d’ve��A�TheyC�they�A�’dC�would�A�’veC�have�They’ll��A�TheyC�they�A�’llC�will�They’ll’ve��A�TheyC�they�A�’llC�will�A�’veC�have�They’re��A�TheyC�they�A�’reC�are�They’ve��A�TheyC�they�A�’veC�have�This'd��A�ThisC�this�A�'dC�'d�This'd've��A�ThisC�this�A�'dC�would�A�'veC�have�This'll��A�ThisC�this�A�'llC�will�This'll've��A�ThisC�this�A�'llC�will�A�'veC�have�This're��A�ThisC�this�A�'reC�are�This's��A�ThisC�this�A�'sC�'s�This've��A�ThisC�this�A�'ve�Thisd��A�ThisC�this�A�dC�'d�Thisdve��A�ThisC�this�A�dC�would�A�veC�have�Thisll��A�ThisC�this�A�llC�will�Thisllve��A�ThisC�this�A�llC�will�A�veC�have�Thisre��A�ThisC�this�A�reC�are�Thiss��A�ThisC�this�A�s�Thisve��A�This�A�veC�have�This’d��A�ThisC�this�A�’dC�'d�This’d’ve��A�ThisC�this�A�’dC�would�A�’veC�have�This’ll��A�ThisC�this�A�’llC�will�This’ll’ve��A�ThisC�this�A�’llC�will�A�’veC�have�This’re��A�ThisC�this�A�’reC�are�This’s��A�ThisC�this�A�’sC�'s�This’ve��A�ThisC�this�A�’ve�Those'd��A�ThoseC�those�A�'dC�'d�Those'd've��A�ThoseC�those�A�'dC�would�A�'veC�have�Those'll��A�ThoseC�those�A�'llC�will�Those'll've��A�ThoseC�those�A�'llC�will�A�'veC�have�Those're��A�ThoseC�those�A�'reC�are�Those's��A�ThoseC�those�A�'sC�'s�Those've��A�ThoseC�those�A�'ve�Thosed��A�ThoseC�those�A�dC�'d�Thosedve��A�ThoseC�those�A�dC�would�A�veC�have�Thosell��A�ThoseC�those�A�llC�will�Thosellve��A�ThoseC�those�A�llC�will�A�veC�have�Thosere��A�ThoseC�those�A�reC�are�Thoses��A�ThoseC�those�A�s�Thoseve��A�Those�A�veC�have�Those’d��A�ThoseC�those�A�’dC�'d�Those’d’ve��A�ThoseC�those�A�’dC�would�A�’veC�have�Those’ll��A�ThoseC�those�A�’llC�will�Those’ll’ve��A�ThoseC�those�A�’llC�will�A�’veC�have�Those’re��A�ThoseC�those�A�’reC�are�Those’s��A�ThoseC�those�A�’sC�'s�Those’ve��A�ThoseC�those�A�’ve�V.V��A�V.V�V_V��A�V_V�Va.��A�Va.C�Virginia�Wash.��A�Wash.C�Washington�Wasn't��A�WasC�was�A�n'tC�not�Wasnt��A�WasC�was�A�ntC�not�Wasn’t��A�WasC�was�A�n’tC�not�We'd��A�WeC�we�A�'dC�'d�We'd've��A�WeC�we�A�'dC�would�A�'veC�have�We'll��A�WeC�we�A�'llC�will�We'll've��A�WeC�we�A�'llC�will�A�'veC�have�We're��A�WeC�we�A�'reC�are�We've��A�WeC�we�A�'veC�have�Wed��A�WeC�we�A�dC�'d�Wedve��A�WeC�we�A�dC�would�A�veC�have�Wellve��A�WeC�we�A�llC�will�A�veC�have�Weren't��A�WereC�were�A�n'tC�not�Werent��A�WereC�were�A�ntC�not�Weren’t��A�WereC�were�A�n’tC�not�Weve��A�WeC�we�A�veC�have�We’d��A�WeC�we�A�’dC�'d�We’d’ve��A�WeC�we�A�’dC�would�A�’veC�have�We’ll��A�WeC�we�A�’llC�will�We’ll’ve��A�WeC�we�A�’llC�will�A�’veC�have�We’re��A�WeC�we�A�’reC�are�We’ve��A�WeC�we�A�’veC�have�What'd��A�WhatC�what�A�'dC�'d�What'd've��A�WhatC�what�A�'dC�would�A�'veC�have�What'll��A�WhatC�what�A�'llC�will�What'll've��A�WhatC�what�A�'llC�will�A�'veC�have�What're��A�WhatC�what�A�'reC�are�What's��A�WhatC�what�A�'sC�'s�What've��A�WhatC�what�A�'ve�Whatd��A�WhatC�what�A�dC�'d�Whatdve��A�WhatC�what�A�dC�would�A�veC�have�Whatll��A�WhatC�what�A�llC�will�Whatllve��A�WhatC�what�A�llC�will�A�veC�have�Whatre��A�WhatC�what�A�reC�are�Whats��A�WhatC�what�A�s�Whatve��A�What�A�veC�have�What’d��A�WhatC�what�A�’dC�'d�What’d’ve��A�WhatC�what�A�’dC�would�A�’veC�have�What’ll��A�WhatC�what�A�’llC�will�What’ll’ve��A�WhatC�what�A�’llC�will�A�’veC�have�What’re��A�WhatC�what�A�’reC�are�What’s��A�WhatC�what�A�’sC�'s�What’ve��A�WhatC�what�A�’ve�When'd��A�WhenC�when�A�'dC�'d�When'd've��A�WhenC�when�A�'dC�would�A�'veC�have�When'll��A�WhenC�when�A�'llC�will�When'll've��A�WhenC�when�A�'llC�will�A�'veC�have�When're��A�WhenC�when�A�'reC�are�When's��A�WhenC�when�A�'sC�'s�When've��A�WhenC�when�A�'ve�Whend��A�WhenC�when�A�dC�'d�Whendve��A�WhenC�when�A�dC�would�A�veC�have�Whenll��A�WhenC�when�A�llC�will�Whenllve��A�WhenC�when�A�llC�will�A�veC�have�Whenre��A�WhenC�when�A�reC�are�Whens��A�WhenC�when�A�s�Whenve��A�When�A�veC�have�When’d��A�WhenC�when�A�’dC�'d�When’d’ve��A�WhenC�when�A�’dC�would�A�’veC�have�When’ll��A�WhenC�when�A�’llC�will�When’ll’ve��A�WhenC�when�A�’llC�will�A�’veC�have�When’re��A�WhenC�when�A�’reC�are�When’s��A�WhenC�when�A�’sC�'s�When’ve��A�WhenC�when�A�’ve�Where'd��A�WhereC�where�A�'dC�'d�Where'd've��A�WhereC�where�A�'dC�would�A�'veC�have�Where'll��A�WhereC�where�A�'llC�will�Where'll've��A�WhereC�where�A�'llC�will�A�'veC�have�Where're��A�WhereC�where�A�'reC�are�Where's��A�WhereC�where�A�'sC�'s�Where've��A�WhereC�where�A�'ve�Whered��A�WhereC�where�A�dC�'d�Wheredve��A�WhereC�where�A�dC�would�A�veC�have�Wherell��A�WhereC�where�A�llC�will�Wherellve��A�WhereC�where�A�llC�will�A�veC�have�Wherere��A�WhereC�where�A�reC�are�Wheres��A�WhereC�where�A�s�Whereve��A�Where�A�veC�have�Where’d��A�WhereC�where�A�’dC�'d�Where’d’ve��A�WhereC�where�A�’dC�would�A�’veC�have�Where’ll��A�WhereC�where�A�’llC�will�Where’ll’ve��A�WhereC�where�A�’llC�will�A�’veC�have�Where’re��A�WhereC�where�A�’reC�are�Where’s��A�WhereC�where�A�’sC�'s�Where’ve��A�WhereC�where�A�’ve�Who'd��A�WhoC�who�A�'dC�'d�Who'd've��A�WhoC�who�A�'dC�would�A�'veC�have�Who'll��A�WhoC�who�A�'llC�will�Who'll've��A�WhoC�who�A�'llC�will�A�'veC�have�Who're��A�WhoC�who�A�'reC�are�Who's��A�WhoC�who�A�'sC�'s�Who've��A�WhoC�who�A�'ve�Whod��A�WhoC�who�A�dC�'d�Whodve��A�WhoC�who�A�dC�would�A�veC�have�Wholl��A�WhoC�who�A�llC�will�Whollve��A�WhoC�who�A�llC�will�A�veC�have�Whos��A�WhoC�who�A�s�Whove��A�Who�A�veC�have�Who’d��A�WhoC�who�A�’dC�'d�Who’d’ve��A�WhoC�who�A�’dC�would�A�’veC�have�Who’ll��A�WhoC�who�A�’llC�will�Who’ll’ve��A�WhoC�who�A�’llC�will�A�’veC�have�Who’re��A�WhoC�who�A�’reC�are�Who’s��A�WhoC�who�A�’sC�'s�Who’ve��A�WhoC�who�A�’ve�Why'd��A�WhyC�why�A�'dC�'d�Why'd've��A�WhyC�why�A�'dC�would�A�'veC�have�Why'll��A�WhyC�why�A�'llC�will�Why'll've��A�WhyC�why�A�'llC�will�A�'veC�have�Why're��A�WhyC�why�A�'reC�are�Why's��A�WhyC�why�A�'sC�'s�Why've��A�WhyC�why�A�'ve�Whyd��A�WhyC�why�A�dC�'d�Whydve��A�WhyC�why�A�dC�would�A�veC�have�Whyll��A�WhyC�why�A�llC�will�Whyllve��A�WhyC�why�A�llC�will�A�veC�have�Whyre��A�WhyC�why�A�reC�are�Whys��A�WhyC�why�A�s�Whyve��A�Why�A�veC�have�Why’d��A�WhyC�why�A�’dC�'d�Why’d’ve��A�WhyC�why�A�’dC�would�A�’veC�have�Why’ll��A�WhyC�why�A�’llC�will�Why’ll’ve��A�WhyC�why�A�’llC�will�A�’veC�have�Why’re��A�WhyC�why�A�’reC�are�Why’s��A�WhyC�why�A�’sC�'s�Why’ve��A�WhyC�why�A�’ve�Wis.��A�Wis.C�Wisconsin�Won't��A�WoC�will�A�n'tC�not�Won't've��A�WoC�will�A�n'tC�not�A�'veC�have�Wont��A�WoC�will�A�ntC�not�Wontve��A�WoC�will�A�ntC�not�A�veC�have�Won’t��A�WoC�will�A�n’tC�not�Won’t’ve��A�WoC�will�A�n’tC�not�A�’veC�have�Would've��A�WouldC�would�A�'ve�Wouldn't��A�WouldC�would�A�n'tC�not�Wouldn't've��A�WouldC�would�A�n'tC�not�A�'veC�have�Wouldnt��A�WouldC�would�A�ntC�not�Wouldntve��A�WouldC�would�A�ntC�not�A�veC�have�Wouldn’t��A�WouldC�would�A�n’tC�not�Wouldn’t’ve��A�WouldC�would�A�n’tC�not�A�’veC�have�Wouldve��A�WouldC�would�A�ve�Would’ve��A�WouldC�would�A�’ve�XD��A�XD�XDD��A�XDD�You'd��A�YouC�you�A�'dC�'d�You'd've��A�YouC�you�A�'dC�would�A�'veC�have�You'll��A�YouC�you�A�'llC�will�You'll've��A�YouC�you�A�'llC�will�A�'veC�have�You're��A�YouC�you�A�'reC�are�You've��A�YouC�you�A�'veC�have�Youd��A�YouC�you�A�dC�'d�Youdve��A�YouC�you�A�dC�would�A�veC�have�Youll��A�YouC�you�A�llC�will�Youllve��A�YouC�you�A�llC�will�A�veC�have�Youre��A�YouC�you�A�reC�are�Youve��A�YouC�you�A�veC�have�You’d��A�YouC�you�A�’dC�'d�You’d’ve��A�YouC�you�A�’dC�would�A�’veC�have�You’ll��A�YouC�you�A�’llC�will�You’ll’ve��A�YouC�you�A�’llC�will�A�’veC�have�You’re��A�YouC�you�A�’reC�are�You’ve��A�YouC�you�A�’veC�have�[-:��A�[-:�[:��A�[:�[=��A�[=�\")��A�\")�\n��A�\n�\t��A�\t�]=��A�]=�^_^��A�^_^�^__^��A�^__^�^___^��A�^___^�a.��A�a.�a.m.��A�a.m.�ain't��A�ai�A�n'tC�not�aint��A�ai�A�ntC�not�ain’t��A�ai�A�n’tC�not�and/or��A�and/orC�and/or�aren't��A�areC�are�A�n'tC�not�arent��A�areC�are�A�ntC�not�aren’t��A�areC�are�A�n’tC�not�b.��A�b.�c'mon��A�c'mC�come�A�on�c.��A�c.�can't��A�caC�can�A�n'tC�not�can't've��A�caC�can�A�n'tC�not�A�'veC�have�cannot��A�can�A�not�cant��A�caC�can�A�ntC�not�cantve��A�caC�can�A�ntC�not�A�veC�have�can’t��A�caC�can�A�n’tC�not�can’t’ve��A�caC�can�A�n’tC�not�A�’veC�have�co.��A�co.�could've��A�couldC�could�A�'ve�couldn't��A�couldC�could�A�n'tC�not�couldn't've��A�couldC�could�A�n'tC�not�A�'veC�have�couldnt��A�couldC�could�A�ntC�not�couldntve��A�couldC�could�A�ntC�not�A�veC�have�couldn’t��A�couldC�could�A�n’tC�not�couldn’t’ve��A�couldC�could�A�n’tC�not�A�’veC�have�couldve��A�couldC�could�A�ve�could’ve��A�couldC�could�A�’ve�c’mon��A�c’mC�come�A�on�d.��A�d.�daren't��A�dareC�dare�A�n'tC�not�darent��A�dareC�dare�A�ntC�not�daren’t��A�dareC�dare�A�n’tC�not�didn't��A�didC�do�A�n'tC�not�didn't've��A�didC�do�A�n'tC�not�A�'veC�have�didnt��A�didC�do�A�ntC�not�didntve��A�didC�do�A�ntC�not�A�veC�have�didn’t��A�didC�do�A�n’tC�not�didn’t’ve��A�didC�do�A�n’tC�not�A�’veC�have�doesn't��A�doesC�does�A�n'tC�not�doesn't've��A�doesC�does�A�n'tC�not�A�'veC�have�doesnt��A�doesC�does�A�ntC�not�doesntve��A�doesC�does�A�ntC�not�A�veC�have�doesn’t��A�doesC�does�A�n’tC�not�doesn’t’ve��A�doesC�does�A�n’tC�not�A�’veC�have�doin��A�doinC�doing�doin'��A�doin'C�doing�doin’��A�doin’C�doing�don't��A�doC�do�A�n'tC�not�don't've��A�doC�do�A�n'tC�not�A�'veC�have�dont��A�doC�do�A�ntC�not�dontve��A�doC�do�A�ntC�not�A�veC�have�don’t��A�doC�do�A�n’tC�not�don’t’ve��A�doC�do�A�n’tC�not�A�’veC�have�e.��A�e.�e.g.��A�e.g.�em��A�emC�them�f.��A�f.�g.��A�g.�goin��A�goinC�going�goin'��A�goin'C�going�goin’��A�goin’C�going�gonna��A�gonC�going�A�naC�to�gotta��A�got�A�taC�to�h.��A�h.�hadn't��A�hadC�have�A�n'tC�not�hadn't've��A�hadC�have�A�n'tC�not�A�'veC�have�hadnt��A�hadC�have�A�ntC�not�hadntve��A�hadC�have�A�ntC�not�A�veC�have�hadn’t��A�hadC�have�A�n’tC�not�hadn’t’ve��A�hadC�have�A�n’tC�not�A�’veC�have�hasn't��A�hasC�has�A�n'tC�not�hasnt��A�hasC�has�A�ntC�not�hasn’t��A�hasC�has�A�n’tC�not�haven't��A�haveC�have�A�n'tC�not�havent��A�haveC�have�A�ntC�not�haven’t��A�haveC�have�A�n’tC�not�havin��A�havinC�having�havin'��A�havin'C�having�havin’��A�havin’C�having�he'd��A�heC�he�A�'dC�'d�he'd've��A�heC�he�A�'dC�would�A�'veC�have�he'll��A�heC�he�A�'llC�will�he'll've��A�heC�he�A�'llC�will�A�'veC�have�he's��A�heC�he�A�'sC�'s�hed��A�heC�he�A�dC�'d�hedve��A�heC�he�A�dC�would�A�veC�have�hellve��A�heC�he�A�llC�will�A�veC�have�hes��A�heC�he�A�s�he’d��A�heC�he�A�’dC�'d�he’d’ve��A�heC�he�A�’dC�would�A�’veC�have�he’ll��A�heC�he�A�’llC�will�he’ll’ve��A�heC�he�A�’llC�will�A�’veC�have�he’s��A�heC�he�A�’sC�'s�how'd��A�howC�how�A�'dC�'d�how'd've��A�howC�how�A�'dC�would�A�'veC�have�how'd'y��A�how�A�'d�A�'yC�you�how'll��A�howC�how�A�'llC�will�how'll've��A�howC�how�A�'llC�will�A�'veC�have�how're��A�howC�how�A�'reC�are�how's��A�howC�how�A�'sC�'s�how've��A�howC�how�A�'ve�howd��A�howC�how�A�dC�'d�howdve��A�howC�how�A�dC�would�A�veC�have�howll��A�howC�how�A�llC�will�howllve��A�howC�how�A�llC�will�A�veC�have�howre��A�howC�how�A�reC�are�hows��A�howC�how�A�s�howve��A�how�A�veC�have�how’d��A�howC�how�A�’dC�'d�how’d’ve��A�howC�how�A�’dC�would�A�’veC�have�how’d’y��A�how�A�’d�A�’yC�you�how’ll��A�howC�how�A�’llC�will�how’ll’ve��A�howC�how�A�’llC�will�A�’veC�have�how’re��A�howC�how�A�’reC�are�how’s��A�howC�how�A�’sC�'s�how’ve��A�howC�how�A�’ve�i'd��A�iC�i�A�'dC�'d�i'd've��A�iC�i�A�'dC�would�A�'veC�have�i'll��A�iC�i�A�'llC�will�i'll've��A�iC�i�A�'llC�will�A�'veC�have�i'm��A�iC�i�A�'mC�am�i'ma��A�iC�i�A�'mC�am�A�aC�gonna�i've��A�iC�i�A�'veC�have�i.��A�i.�i.e.��A�i.e.�id��A�iC�i�A�dC�'d�idve��A�iC�i�A�dC�would�A�veC�have�illve��A�iC�i�A�llC�will�A�veC�have�im��A�iC�i�A�m�ima��A�iC�i�A�mC�am�A�aC�gonna�isn't��A�isC�is�A�n'tC�not�isnt��A�isC�is�A�ntC�not�isn’t��A�isC�is�A�n’tC�not�it'd��A�itC�it�A�'dC�'d�it'd've��A�itC�it�A�'dC�would�A�'veC�have�it'll��A�itC�it�A�'llC�will�it'll've��A�itC�it�A�'llC�will�A�'veC�have�it's��A�itC�it�A�'sC�'s�itd��A�itC�it�A�dC�'d�itdve��A�itC�it�A�dC�would�A�veC�have�itll��A�itC�it�A�llC�will�itllve��A�itC�it�A�llC�will�A�veC�have�it’d��A�itC�it�A�’dC�'d�it’d’ve��A�itC�it�A�’dC�would�A�’veC�have�it’ll��A�itC�it�A�’llC�will�it’ll’ve��A�itC�it�A�’llC�will�A�’veC�have�it’s��A�itC�it�A�’sC�'s�ive��A�iC�i�A�veC�have�i’d��A�iC�i�A�’dC�'d�i’d’ve��A�iC�i�A�’dC�would�A�’veC�have�i’ll��A�iC�i�A�’llC�will�i’ll’ve��A�iC�i�A�’llC�will�A�’veC�have�i’m��A�iC�i�A�’mC�am�i’ma��A�iC�i�A�’mC�am�A�aC�gonna�i’ve��A�iC�i�A�’veC�have�j.��A�j.�k.��A�k.�l.��A�l.�let's��A�let�A�'sC�us�let’s��A�let�A�’sC�us�ll��A�llC�will�lovin��A�lovinC�loving�lovin'��A�lovin'C�loving�lovin’��A�lovin’C�loving�m.��A�m.�ma'am��A�ma'amC�madam�mayn't��A�mayC�may�A�n'tC�not�mayn't've��A�mayC�may�A�n'tC�not�A�'veC�have�maynt��A�mayC�may�A�ntC�not�mayntve��A�mayC�may�A�ntC�not�A�veC�have�mayn’t��A�mayC�may�A�n’tC�not�mayn’t’ve��A�mayC�may�A�n’tC�not�A�’veC�have�ma’am��A�ma’amC�madam�might've��A�mightC�might�A�'ve�mightn't��A�mightC�might�A�n'tC�not�mightn't've��A�mightC�might�A�n'tC�not�A�'veC�have�mightnt��A�mightC�might�A�ntC�not�mightntve��A�mightC�might�A�ntC�not�A�veC�have�mightn’t��A�mightC�might�A�n’tC�not�mightn’t’ve��A�mightC�might�A�n’tC�not�A�’veC�have�mightve��A�mightC�might�A�ve�might’ve��A�mightC�might�A�’ve�must've��A�mustC�must�A�'ve�mustn't��A�mustC�must�A�n'tC�not�mustn't've��A�mustC�must�A�n'tC�not�A�'veC�have�mustnt��A�mustC�must�A�ntC�not�mustntve��A�mustC�must�A�ntC�not�A�veC�have�mustn’t��A�mustC�must�A�n’tC�not�mustn’t’ve��A�mustC�must�A�n’tC�not�A�’veC�have�mustve��A�mustC�must�A�ve�must’ve��A�mustC�must�A�’ve�n.��A�n.�needn't��A�needC�need�A�n'tC�not�needn't've��A�needC�need�A�n'tC�not�A�'veC�have�neednt��A�needC�need�A�ntC�not�needntve��A�needC�need�A�ntC�not�A�veC�have�needn’t��A�needC�need�A�n’tC�not�needn’t’ve��A�needC�need�A�n’tC�not�A�’veC�have�not've��A�not�A�'veC�have�nothin��A�nothinC�nothing�nothin'��A�nothin'C�nothing�nothin’��A�nothin’C�nothing�notve��A�not�A�veC�have�not’ve��A�not�A�’veC�have�nuff��A�nuffC�enough�nuthin��A�nuthinC�nothing�nuthin'��A�nuthin'C�nothing�nuthin’��A�nuthin’C�nothing�o'clock��A�o'clockC�o'clock�o.��A�o.�o.0��A�o.0�o.O��A�o.O�o.o��A�o.o�o_0��A�o_0�o_O��A�o_O�o_o��A�o_o�ol��A�olC�old�ol'��A�ol'C�old�ol’��A�ol’C�old�oughtn't��A�oughtC�ought�A�n'tC�not�oughtn't've��A�oughtC�ought�A�n'tC�not�A�'veC�have�oughtnt��A�oughtC�ought�A�ntC�not�oughtntve��A�oughtC�ought�A�ntC�not�A�veC�have�oughtn’t��A�oughtC�ought�A�n’tC�not�oughtn’t’ve��A�oughtC�ought�A�n’tC�not�A�’veC�have�o’clock��A�o’clockC�o'clock�p.��A�p.�p.m.��A�p.m.�q.��A�q.�r.��A�r.�s.��A�s.�shan't��A�shaC�shall�A�n'tC�not�shan't've��A�shaC�shall�A�n'tC�not�A�'veC�have�shant��A�shaC�shall�A�ntC�not�shantve��A�shaC�shall�A�ntC�not�A�veC�have�shan’t��A�shaC�shall�A�n’tC�not�shan’t’ve��A�shaC�shall�A�n’tC�not�A�’veC�have�she'd��A�sheC�she�A�'dC�'d�she'd've��A�sheC�she�A�'dC�would�A�'veC�have�she'll��A�sheC�she�A�'llC�will�she'll've��A�sheC�she�A�'llC�will�A�'veC�have�she's��A�sheC�she�A�'sC�'s�shedve��A�sheC�she�A�dC�would�A�veC�have�shellve��A�sheC�she�A�llC�will�A�veC�have�shes��A�sheC�she�A�s�she’d��A�sheC�she�A�’dC�'d�she’d’ve��A�sheC�she�A�’dC�would�A�’veC�have�she’ll��A�sheC�she�A�’llC�will�she’ll’ve��A�sheC�she�A�’llC�will�A�’veC�have�she’s��A�sheC�she�A�’sC�'s�should've��A�shouldC�should�A�'ve�shouldn't��A�shouldC�should�A�n'tC�not�shouldn't've��A�shouldC�should�A�n'tC�not�A�'veC�have�shouldnt��A�shouldC�should�A�ntC�not�shouldntve��A�shouldC�should�A�ntC�not�A�veC�have�shouldn’t��A�shouldC�should�A�n’tC�not�shouldn’t’ve��A�shouldC�should�A�n’tC�not�A�’veC�have�shouldve��A�shouldC�should�A�ve�should’ve��A�shouldC�should�A�’ve�somethin��A�somethinC�something�somethin'��A�somethin'C�something�somethin’��A�somethin’C�something�t.��A�t.�that'd��A�thatC�that�A�'dC�'d�that'd've��A�thatC�that�A�'dC�would�A�'veC�have�that'll��A�thatC�that�A�'llC�will�that'll've��A�thatC�that�A�'llC�will�A�'veC�have�that're��A�thatC�that�A�'reC�are�that's��A�thatC�that�A�'sC�'s�that've��A�thatC�that�A�'ve�thatd��A�thatC�that�A�dC�'d�thatdve��A�thatC�that�A�dC�would�A�veC�have�thatll��A�thatC�that�A�llC�will�thatllve��A�thatC�that�A�llC�will�A�veC�have�thatre��A�thatC�that�A�reC�are�thats��A�thatC�that�A�s�thatve��A�that�A�veC�have�that’d��A�thatC�that�A�’dC�'d�that’d’ve��A�thatC�that�A�’dC�would�A�’veC�have�that’ll��A�thatC�that�A�’llC�will�that’ll’ve��A�thatC�that�A�’llC�will�A�’veC�have�that’re��A�thatC�that�A�’reC�are�that’s��A�thatC�that�A�’sC�'s�that’ve��A�thatC�that�A�’ve�there'd��A�thereC�there�A�'dC�'d�there'd've��A�thereC�there�A�'dC�would�A�'veC�have�there'll��A�thereC�there�A�'llC�will�there'll've��A�thereC�there�A�'llC�will�A�'veC�have�there're��A�thereC�there�A�'reC�are�there's��A�thereC�there�A�'sC�'s�there've��A�thereC�there�A�'ve�thered��A�thereC�there�A�dC�'d�theredve��A�thereC�there�A�dC�would�A�veC�have�therell��A�thereC�there�A�llC�will�therellve��A�thereC�there�A�llC�will�A�veC�have�therere��A�thereC�there�A�reC�are�theres��A�thereC�there�A�s�thereve��A�there�A�veC�have�there’d��A�thereC�there�A�’dC�'d�there’d’ve��A�thereC�there�A�’dC�would�A�’veC�have�there’ll��A�thereC�there�A�’llC�will�there’ll’ve��A�thereC�there�A�’llC�will�A�’veC�have�there’re��A�thereC�there�A�’reC�are�there’s��A�thereC�there�A�’sC�'s�there’ve��A�thereC�there�A�’ve�these'd��A�theseC�these�A�'dC�'d�these'd've��A�theseC�these�A�'dC�would�A�'veC�have�these'll��A�theseC�these�A�'llC�will�these'll've��A�theseC�these�A�'llC�will�A�'veC�have�these're��A�theseC�these�A�'reC�are�these's��A�theseC�these�A�'sC�'s�these've��A�theseC�these�A�'ve�thesed��A�theseC�these�A�dC�'d�thesedve��A�theseC�these�A�dC�would�A�veC�have�thesell��A�theseC�these�A�llC�will�thesellve��A�theseC�these�A�llC�will�A�veC�have�thesere��A�theseC�these�A�reC�are�theses��A�theseC�these�A�s�theseve��A�these�A�veC�have�these’d��A�theseC�these�A�’dC�'d�these’d’ve��A�theseC�these�A�’dC�would�A�’veC�have�these’ll��A�theseC�these�A�’llC�will�these’ll’ve��A�theseC�these�A�’llC�will�A�’veC�have�these’re��A�theseC�these�A�’reC�are�these’s��A�theseC�these�A�’sC�'s�these’ve��A�theseC�these�A�’ve�they'd��A�theyC�they�A�'dC�'d�they'd've��A�theyC�they�A�'dC�would�A�'veC�have�they'll��A�theyC�they�A�'llC�will�they'll've��A�theyC�they�A�'llC�will�A�'veC�have�they're��A�theyC�they�A�'reC�are�they've��A�theyC�they�A�'veC�have�theyd��A�theyC�they�A�dC�'d�theydve��A�theyC�they�A�dC�would�A�veC�have�theyll��A�theyC�they�A�llC�will�theyllve��A�theyC�they�A�llC�will�A�veC�have�theyre��A�theyC�they�A�reC�are�theyve��A�theyC�they�A�veC�have�they’d��A�theyC�they�A�’dC�'d�they’d’ve��A�theyC�they�A�’dC�would�A�’veC�have�they’ll��A�theyC�they�A�’llC�will�they’ll’ve��A�theyC�they�A�’llC�will�A�’veC�have�they’re��A�theyC�they�A�’reC�are�they’ve��A�theyC�they�A�’veC�have�this'd��A�thisC�this�A�'dC�'d�this'd've��A�thisC�this�A�'dC�would�A�'veC�have�this'll��A�thisC�this�A�'llC�will�this'll've��A�thisC�this�A�'llC�will�A�'veC�have�this're��A�thisC�this�A�'reC�are�this's��A�thisC�this�A�'sC�'s�this've��A�thisC�this�A�'ve�thisd��A�thisC�this�A�dC�'d�thisdve��A�thisC�this�A�dC�would�A�veC�have�thisll��A�thisC�this�A�llC�will�thisllve��A�thisC�this�A�llC�will�A�veC�have�thisre��A�thisC�this�A�reC�are�thiss��A�thisC�this�A�s�thisve��A�this�A�veC�have�this’d��A�thisC�this�A�’dC�'d�this’d’ve��A�thisC�this�A�’dC�would�A�’veC�have�this’ll��A�thisC�this�A�’llC�will�this’ll’ve��A�thisC�this�A�’llC�will�A�’veC�have�this’re��A�thisC�this�A�’reC�are�this’s��A�thisC�this�A�’sC�'s�this’ve��A�thisC�this�A�’ve�those'd��A�thoseC�those�A�'dC�'d�those'd've��A�thoseC�those�A�'dC�would�A�'veC�have�those'll��A�thoseC�those�A�'llC�will�those'll've��A�thoseC�those�A�'llC�will�A�'veC�have�those're��A�thoseC�those�A�'reC�are�those's��A�thoseC�those�A�'sC�'s�those've��A�thoseC�those�A�'ve�thosed��A�thoseC�those�A�dC�'d�thosedve��A�thoseC�those�A�dC�would�A�veC�have�thosell��A�thoseC�those�A�llC�will�thosellve��A�thoseC�those�A�llC�will�A�veC�have�thosere��A�thoseC�those�A�reC�are�thoses��A�thoseC�those�A�s�thoseve��A�those�A�veC�have�those’d��A�thoseC�those�A�’dC�'d�those’d’ve��A�thoseC�those�A�’dC�would�A�’veC�have�those’ll��A�thoseC�those�A�’llC�will�those’ll’ve��A�thoseC�those�A�’llC�will�A�’veC�have�those’re��A�thoseC�those�A�’reC�are�those’s��A�thoseC�those�A�’sC�'s�those’ve��A�thoseC�those�A�’ve�u.��A�u.�v.��A�v.�v.s.��A�v.s.�v.v��A�v.v�v_v��A�v_v�vs.��A�vs.�w.��A�w.�w/o��A�w/oC�without�wasn't��A�wasC�was�A�n'tC�not�wasnt��A�wasC�was�A�ntC�not�wasn’t��A�wasC�was�A�n’tC�not�we'd��A�weC�we�A�'dC�'d�we'd've��A�weC�we�A�'dC�would�A�'veC�have�we'll��A�weC�we�A�'llC�will�we'll've��A�weC�we�A�'llC�will�A�'veC�have�we're��A�weC�we�A�'reC�are�we've��A�weC�we�A�'veC�have�wed��A�weC�we�A�dC�'d�wedve��A�weC�we�A�dC�would�A�veC�have�wellve��A�weC�we�A�llC�will�A�veC�have�weren't��A�wereC�were�A�n'tC�not�werent��A�wereC�were�A�ntC�not�weren’t��A�wereC�were�A�n’tC�not�weve��A�weC�we�A�veC�have�we’d��A�weC�we�A�’dC�'d�we’d’ve��A�weC�we�A�’dC�would�A�’veC�have�we’ll��A�weC�we�A�’llC�will�we’ll’ve��A�weC�we�A�’llC�will�A�’veC�have�we’re��A�weC�we�A�’reC�are�we’ve��A�weC�we�A�’veC�have�what'd��A�whatC�what�A�'dC�'d�what'd've��A�whatC�what�A�'dC�would�A�'veC�have�what'll��A�whatC�what�A�'llC�will�what'll've��A�whatC�what�A�'llC�will�A�'veC�have�what're��A�whatC�what�A�'reC�are�what's��A�whatC�what�A�'sC�'s�what've��A�whatC�what�A�'ve�whatd��A�whatC�what�A�dC�'d�whatdve��A�whatC�what�A�dC�would�A�veC�have�whatll��A�whatC�what�A�llC�will�whatllve��A�whatC�what�A�llC�will�A�veC�have�whatre��A�whatC�what�A�reC�are�whats��A�whatC�what�A�s�whatve��A�what�A�veC�have�what’d��A�whatC�what�A�’dC�'d�what’d’ve��A�whatC�what�A�’dC�would�A�’veC�have�what’ll��A�whatC�what�A�’llC�will�what’ll’ve��A�whatC�what�A�’llC�will�A�’veC�have�what’re��A�whatC�what�A�’reC�are�what’s��A�whatC�what�A�’sC�'s�what’ve��A�whatC�what�A�’ve�when'd��A�whenC�when�A�'dC�'d�when'd've��A�whenC�when�A�'dC�would�A�'veC�have�when'll��A�whenC�when�A�'llC�will�when'll've��A�whenC�when�A�'llC�will�A�'veC�have�when're��A�whenC�when�A�'reC�are�when's��A�whenC�when�A�'sC�'s�when've��A�whenC�when�A�'ve�whend��A�whenC�when�A�dC�'d�whendve��A�whenC�when�A�dC�would�A�veC�have�whenll��A�whenC�when�A�llC�will�whenllve��A�whenC�when�A�llC�will�A�veC�have�whenre��A�whenC�when�A�reC�are�whens��A�whenC�when�A�s�whenve��A�when�A�veC�have�when’d��A�whenC�when�A�’dC�'d�when’d’ve��A�whenC�when�A�’dC�would�A�’veC�have�when’ll��A�whenC�when�A�’llC�will�when’ll’ve��A�whenC�when�A�’llC�will�A�’veC�have�when’re��A�whenC�when�A�’reC�are�when’s��A�whenC�when�A�’sC�'s�when’ve��A�whenC�when�A�’ve�where'd��A�whereC�where�A�'dC�'d�where'd've��A�whereC�where�A�'dC�would�A�'veC�have�where'll��A�whereC�where�A�'llC�will�where'll've��A�whereC�where�A�'llC�will�A�'veC�have�where're��A�whereC�where�A�'reC�are�where's��A�whereC�where�A�'sC�'s�where've��A�whereC�where�A�'ve�whered��A�whereC�where�A�dC�'d�wheredve��A�whereC�where�A�dC�would�A�veC�have�wherell��A�whereC�where�A�llC�will�wherellve��A�whereC�where�A�llC�will�A�veC�have�wherere��A�whereC�where�A�reC�are�wheres��A�whereC�where�A�s�whereve��A�where�A�veC�have�where’d��A�whereC�where�A�’dC�'d�where’d’ve��A�whereC�where�A�’dC�would�A�’veC�have�where’ll��A�whereC�where�A�’llC�will�where’ll’ve��A�whereC�where�A�’llC�will�A�’veC�have�where’re��A�whereC�where�A�’reC�are�where’s��A�whereC�where�A�’sC�'s�where’ve��A�whereC�where�A�’ve�who'd��A�whoC�who�A�'dC�'d�who'd've��A�whoC�who�A�'dC�would�A�'veC�have�who'll��A�whoC�who�A�'llC�will�who'll've��A�whoC�who�A�'llC�will�A�'veC�have�who're��A�whoC�who�A�'reC�are�who's��A�whoC�who�A�'sC�'s�who've��A�whoC�who�A�'ve�whod��A�whoC�who�A�dC�'d�whodve��A�whoC�who�A�dC�would�A�veC�have�wholl��A�whoC�who�A�llC�will�whollve��A�whoC�who�A�llC�will�A�veC�have�whos��A�whoC�who�A�s�whove��A�who�A�veC�have�who’d��A�whoC�who�A�’dC�'d�who’d’ve��A�whoC�who�A�’dC�would�A�’veC�have�who’ll��A�whoC�who�A�’llC�will�who’ll’ve��A�whoC�who�A�’llC�will�A�’veC�have�who’re��A�whoC�who�A�’reC�are�who’s��A�whoC�who�A�’sC�'s�who’ve��A�whoC�who�A�’ve�why'd��A�whyC�why�A�'dC�'d�why'd've��A�whyC�why�A�'dC�would�A�'veC�have�why'll��A�whyC�why�A�'llC�will�why'll've��A�whyC�why�A�'llC�will�A�'veC�have�why're��A�whyC�why�A�'reC�are�why's��A�whyC�why�A�'sC�'s�why've��A�whyC�why�A�'ve�whyd��A�whyC�why�A�dC�'d�whydve��A�whyC�why�A�dC�would�A�veC�have�whyll��A�whyC�why�A�llC�will�whyllve��A�whyC�why�A�llC�will�A�veC�have�whyre��A�whyC�why�A�reC�are�whys��A�whyC�why�A�s�whyve��A�why�A�veC�have�why’d��A�whyC�why�A�’dC�'d�why’d’ve��A�whyC�why�A�’dC�would�A�’veC�have�why’ll��A�whyC�why�A�’llC�will�why’ll’ve��A�whyC�why�A�’llC�will�A�’veC�have�why’re��A�whyC�why�A�’reC�are�why’s��A�whyC�why�A�’sC�'s�why’ve��A�whyC�why�A�’ve�won't��A�woC�will�A�n'tC�not�won't've��A�woC�will�A�n'tC�not�A�'veC�have�wont��A�woC�will�A�ntC�not�wontve��A�woC�will�A�ntC�not�A�veC�have�won’t��A�woC�will�A�n’tC�not�won’t’ve��A�woC�will�A�n’tC�not�A�’veC�have�would've��A�wouldC�would�A�'ve�wouldn't��A�wouldC�would�A�n'tC�not�wouldn't've��A�wouldC�would�A�n'tC�not�A�'veC�have�wouldnt��A�wouldC�would�A�ntC�not�wouldntve��A�wouldC�would�A�ntC�not�A�veC�have�wouldn’t��A�wouldC�would�A�n’tC�not�wouldn’t’ve��A�wouldC�would�A�n’tC�not�A�’veC�have�wouldve��A�wouldC�would�A�ve�would’ve��A�wouldC�would�A�’ve�x.��A�x.�xD��A�xD�xDD��A�xDD�y'all��A�y'C�you�A�all�y.��A�y.�yall��A�yC�you�A�all�you'd��A�youC�you�A�'dC�'d�you'd've��A�youC�you�A�'dC�would�A�'veC�have�you'll��A�youC�you�A�'llC�will�you'll've��A�youC�you�A�'llC�will�A�'veC�have�you're��A�youC�you�A�'reC�are�you've��A�youC�you�A�'veC�have�youd��A�youC�you�A�dC�'d�youdve��A�youC�you�A�dC�would�A�veC�have�youll��A�youC�you�A�llC�will�youllve��A�youC�you�A�llC�will�A�veC�have�youre��A�youC�you�A�reC�are�youve��A�youC�you�A�veC�have�you’d��A�youC�you�A�’dC�'d�you’d’ve��A�youC�you�A�’dC�would�A�’veC�have�you’ll��A�youC�you�A�’llC�will�you’ll’ve��A�youC�you�A�’llC�will�A�’veC�have�you’re��A�youC�you�A�’reC�are�you’ve��A�youC�you�A�’veC�have�y’all��A�y’C�you�A�all�z.��A�z.� ��A� C� �¯\(ツ)/¯��A�¯\(ツ)/¯�°C.��A�°�A�C�A�.�°F.��A�°�A�F�A�.�°K.��A�°�A�K�A�.�°c.��A�°�A�c�A�.�°f.��A�°�A�f�A�.�°k.��A�°�A�k�A�.�ä.��A�ä.�ö.��A�ö.�ü.��A�ü.�ಠ_ಠ��A�ಠ_ಠ�ಠ︵ಠ��A�ಠ︵ಠ�—��A�—�‘S��A�‘SC�'s�‘s��A�‘sC�'s�’��A�’�’Cause��A�’CauseC�because�’Cos��A�’CosC�because�’Coz��A�’CozC�because�’Cuz��A�’CuzC�because�’S��A�’SC�'s�’bout��A�’boutC�about�’cause��A�’causeC�because�’cos��A�’cosC�because�’coz��A�’cozC�because�’cuz��A�’cuzC�because�’d��A�’d�’em��A�’emC�them�’ll��A�’llC�will�’nuff��A�’nuffC�enough�’re��A�’reC�are�’s��A�’sC�'s�’’��A�’’�faster_heuristics�
Assets/Models/en_core_web_lg/vocab/key2row ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8163b927a234a675074bb38ce62c17a57182998dc83fb9275d35500559a582a
3
+ size 9311659
Assets/Models/en_core_web_lg/vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ddd140ecac6a8c4592e9146d6e30074569ffaed97ee51edc9587dc510f8934c
3
+ size 69982
Assets/Models/en_core_web_lg/vocab/strings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649ca580aed1f07d3b761fa73308bc96f72b78e8bd4d51140a3a920b3429ba10
3
+ size 9694998
Assets/Models/en_core_web_lg/vocab/vectors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd82f972c4fca3d440c505cdd94c88efdded56457cc86851d584b751f7dea673
3
+ size 411501728
Assets/Models/en_core_web_lg/vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }
Lime Explorations.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
NER-tweaks/.ipynb_checkpoints/age-bias-checkpoint.jsonl DELETED
@@ -1,32 +0,0 @@
1
- {"label": "age", "pattern": [{"LOWER": "advanced"}], "id": "age-bias"}
2
- {"label": "age", "pattern": [{"LOWER": "aged"}], "id": "age-bias"}
3
- {"label": "age", "pattern": [{"LOWER": "ancient"}], "id": "age-bias"}
4
- {"label": "age", "pattern": [{"LOWER": "antique"}], "id": "age-bias"}
5
- {"label": "age", "pattern": [{"LOWER": "archaic"}], "id": "age-bias"}
6
- {"label": "age", "pattern": [{"LOWER": "contemporary"}], "id": "age-bias"}
7
- {"label": "age", "pattern": [{"LOWER": "current"}], "id": "age-bias"}
8
- {"label": "age", "pattern": [{"LOWER": "frayed"}], "id": "age-bias"}
9
- {"label": "age", "pattern": [{"LOWER": "fresh"}], "id": "age-bias"}
10
- {"label": "age", "pattern": [{"LOWER": "grizzled"}], "id": "age-bias"}
11
- {"label": "age", "pattern": [{"LOWER": "hoary"}], "id": "age-bias"}
12
- {"label": "age", "pattern": [{"LOWER": "immature"}], "id": "age-bias"}
13
- {"label": "age", "pattern": [{"LOWER": "juvenile"}], "id": "age-bias"}
14
- {"label": "age", "pattern": [{"LOWER": "mature"}], "id": "age-bias"}
15
- {"label": "age", "pattern": [{"LOWER": "modern"}], "id": "age-bias"}
16
- {"label": "age", "pattern": [{"LOWER": "new"}], "id": "age-bias"}
17
- {"label": "age", "pattern": [{"LOWER": "novel"}], "id": "age-bias"}
18
- {"label": "age", "pattern": [{"LOWER": "obsolete"}], "id": "age-bias"}
19
- {"label": "age", "pattern": [{"LOWER": "old"}], "id": "age-bias"}
20
- {"label": "age", "pattern": [{"LOWER": "primordial"}], "id": "age-bias"}
21
- {"label": "age", "pattern": [{"LOWER": "ragged"}], "id": "age-bias"}
22
- {"label": "age", "pattern": [{"LOWER": "raw"}], "id": "age-bias"}
23
- {"label": "age", "pattern": [{"LOWER": "recent"}], "id": "age-bias"}
24
- {"label": "age", "pattern": [{"LOWER": "senile"}], "id": "age-bias"}
25
- {"label": "age", "pattern": [{"LOWER": "shabby"}], "id": "age-bias"}
26
- {"label": "age", "pattern": [{"LOWER": "stale"}], "id": "age-bias"}
27
- {"label": "age", "pattern": [{"LOWER": "tattered"}], "id": "age-bias"}
28
- {"label": "age", "pattern": [{"LOWER": "threadbare"}], "id": "age-bias"}
29
- {"label": "age", "pattern": [{"LOWER": "trite"}], "id": "age-bias"}
30
- {"label": "age", "pattern": [{"LOWER": "vintage"}], "id": "age-bias"}
31
- {"label": "age", "pattern": [{"LOWER": "worn"}], "id": "age-bias"}
32
- {"label": "age", "pattern": [{"LOWER": "young"}], "id": "age-bias"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
NER-tweaks/.ipynb_checkpoints/entity-ruler-input-checkpoint.jsonl DELETED
@@ -1,44 +0,0 @@
1
- {"label": "GENDER", "pattern": [{"LOWER": "woman"}],"id":"female-bias"}
2
- {"label": "GENDER", "pattern": [{"LOWER": "feminine"}],"id":"female-bias"}
3
- {"label": "GENDER", "pattern": [{"LOWER": "female"}],"id":"female-bias"}
4
- {"label": "GENDER", "pattern": [{"LOWER": "lady"}],"id":"female-bias"}
5
- {"label": "GENDER", "pattern": [{"LOWER": "girl"}],"id":"female-bias"}
6
- {"label": "GENDER", "pattern": [{"LOWER": "she"}],"id":"female-bias"}
7
- {"label": "GENDER", "pattern": [{"LOWER": "her"}],"id":"female-bias"}
8
- {"label": "GENDER", "pattern": [{"LOWER": "hers"}],"id":"female-bias"}
9
- {"label": "GENDER", "pattern": [{"LOWER": "herself"}],"id":"female-bias"}
10
- {"label": "GENDER", "pattern": [{"LOWER": "mother"}],"id":"female-bias"}
11
- {"label": "GENDER", "pattern": [{"LOWER": "grandmother"}],"id":"female-bias"}
12
- {"label": "GENDER", "pattern": [{"LOWER": "grandma"}],"id":"female-bias"}
13
- {"label": "GENDER", "pattern": [{"LOWER": "momma"}],"id":"female-bias"}
14
- {"label": "GENDER", "pattern": [{"LOWER": "mommy"}],"id":"female-bias"}
15
- {"label": "GENDER", "pattern": [{"LOWER": "babe"}],"id":"female-bias"}
16
- {"label": "GENDER", "pattern": [{"LOWER": "daughter"}],"id":"female-bias"}
17
- {"label": "GENDER", "pattern": [{"LOWER": "sister"}],"id":"female-bias"}
18
- {"label": "GENDER", "pattern": [{"LOWER": "niece"}],"id":"female-bias"}
19
- {"label": "GENDER", "pattern": [{"LOWER": "aunt"}],"id":"female-bias"}
20
- {"label": "GENDER", "pattern": [{"LOWER": "girlfriend"}],"id":"female-bias"}
21
- {"label": "GENDER", "pattern": [{"LOWER": "wife"}],"id":"female-bias"}
22
- {"label": "GENDER", "pattern": [{"LOWER": "mistress"}],"id":"female-bias"}
23
- {"label": "GENDER", "pattern": [{"LOWER": "man"}],"id":"male-bias"}
24
- {"label": "GENDER", "pattern": [{"LOWER": "masculine"}],"id":"male-bias"}
25
- {"label": "GENDER", "pattern": [{"LOWER": "male"}],"id":"male-bias"}
26
- {"label": "GENDER", "pattern": [{"LOWER": "dude"}],"id":"male-bias"}
27
- {"label": "GENDER", "pattern": [{"LOWER": "boy"}],"id":"male-bias"}
28
- {"label": "GENDER", "pattern": [{"LOWER": "he"}],"id":"male-bias"}
29
- {"label": "GENDER", "pattern": [{"LOWER": "his"}],"id":"male-bias"}
30
- {"label": "GENDER", "pattern": [{"LOWER": "him"}],"id":"male-bias"}
31
- {"label": "GENDER", "pattern": [{"LOWER": "himself"}],"id":"male-bias"}
32
- {"label": "GENDER", "pattern": [{"LOWER": "father"}],"id":"male-bias"}
33
- {"label": "GENDER", "pattern": [{"LOWER": "grandfather"}],"id":"male-bias"}
34
- {"label": "GENDER", "pattern": [{"LOWER": "grandpa"}],"id":"male-bias"}
35
- {"label": "GENDER", "pattern": [{"LOWER": "poppa"}],"id":"male-bias"}
36
- {"label": "GENDER", "pattern": [{"LOWER": "daddy"}],"id":"male-bias"}
37
- {"label": "GENDER", "pattern": [{"LOWER": "lad"}],"id":"male-bias"}
38
- {"label": "GENDER", "pattern": [{"LOWER": "son"}],"id":"male-bias"}
39
- {"label": "GENDER", "pattern": [{"LOWER": "brother"}],"id":"male-bias"}
40
- {"label": "GENDER", "pattern": [{"LOWER": "nephew"}],"id":"male-bias"}
41
- {"label": "GENDER", "pattern": [{"LOWER": "uncle"}],"id":"male-bias"}
42
- {"label": "GENDER", "pattern": [{"LOWER": "boyfriend"}],"id":"male-bias"}
43
- {"label": "GENDER", "pattern": [{"LOWER": "husband"}],"id":"male-bias"}
44
- {"label": "GENDER", "pattern": [{"LOWER": "gentleman"}],"id":"male-bias"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
NER-tweaks/.ipynb_checkpoints/gender-test-checkpoint.jsonl DELETED
@@ -1,59 +0,0 @@
1
- {"label": "SOGI", "pattern": [{"LOWER": "woman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
2
- {"label": "SOGI", "pattern": [{"LOWER": "feminine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
3
- {"label": "SOGI", "pattern": [{"LOWER": "female", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
4
- {"label": "SOGI", "pattern": [{"LOWER": "lady", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
5
- {"label": "SOGI", "pattern": [{"LOWER": "girl", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
6
- {"label": "SOGI", "pattern": [{"LOWER": "she", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
7
- {"label": "SOGI", "pattern": [{"LOWER": "hers", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
8
- {"label": "SOGI", "pattern": [{"LOWER": "her", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
9
- {"label": "SOGI", "pattern": [{"LOWER": "herself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
10
- {"label": "SOGI", "pattern": [{"LOWER": "mother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
11
- {"label": "SOGI", "pattern": [{"LOWER": "grandmother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
12
- {"label": "SOGI", "pattern": [{"LOWER": "grandma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
13
- {"label": "SOGI", "pattern": [{"LOWER": "momma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
14
- {"label": "SOGI", "pattern": [{"LOWER": "mommy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
15
- {"label": "SOGI", "pattern": [{"LOWER": "babe", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
16
- {"label": "SOGI", "pattern": [{"LOWER": "daughter", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
17
- {"label": "SOGI", "pattern": [{"LOWER": "sister", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
18
- {"label": "SOGI", "pattern": [{"LOWER": "niece", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
19
- {"label": "SOGI", "pattern": [{"LOWER": "aunt", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
20
- {"label": "SOGI", "pattern": [{"LOWER": "girlfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
21
- {"label": "SOGI", "pattern": [{"LOWER": "wife", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
22
- {"label": "SOGI", "pattern": [{"LOWER": "mistress", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
23
- {"label": "SOGI", "pattern": [{"LOWER": "man", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
24
- {"label": "SOGI", "pattern": [{"LOWER": "masculine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
25
- {"label": "SOGI", "pattern": [{"LOWER": "male", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
26
- {"label": "SOGI", "pattern": [{"LOWER": "dude", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
27
- {"label": "SOGI", "pattern": [{"LOWER": "boy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
28
- {"label": "SOGI", "pattern": [{"LOWER": "he", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
29
- {"label": "SOGI", "pattern": [{"LOWER": "his", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
30
- {"label": "SOGI", "pattern": [{"LOWER": "him", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
31
- {"label": "SOGI", "pattern": [{"LOWER": "himself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
32
- {"label": "SOGI", "pattern": [{"LOWER": "father", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
33
- {"label": "SOGI", "pattern": [{"LOWER": "grandfather", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
34
- {"label": "SOGI", "pattern": [{"LOWER": "grandpa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
35
- {"label": "SOGI", "pattern": [{"LOWER": "poppa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
36
- {"label": "SOGI", "pattern": [{"LOWER": "daddy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
37
- {"label": "SOGI", "pattern": [{"LOWER": "lad", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
38
- {"label": "SOGI", "pattern": [{"LOWER": "son", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
39
- {"label": "SOGI", "pattern": [{"LOWER": "brother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
40
- {"label": "SOGI", "pattern": [{"LOWER": "nephew", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
41
- {"label": "SOGI", "pattern": [{"LOWER": "uncle", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
42
- {"label": "SOGI", "pattern": [{"LOWER": "boyfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
43
- {"label": "SOGI", "pattern": [{"LOWER": "husband", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
44
- {"label": "SOGI", "pattern": [{"LOWER": "gentleman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
45
- {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"woman"}],"id":"lbgtq-bias"}
46
- {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"man"}],"id":"lbgtq-bias"}
47
- {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
48
- {"label": "SOGI", "pattern": [{"LOWER": "bisexual"}],"id":"lbgtq-bias"}
49
- {"label": "SOGI", "pattern": [{"LOWER": "gay"}],"id":"lbgtq-bias"}
50
- {"label": "SOGI", "pattern": [{"LOWER": "gender-fluid"}],"id":"lbgtq-bias"}
51
- {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
52
- {"label": "SOGI", "pattern": [{"LOWER": "genderqueer"}],"id":"lbgtq-bias"}
53
- {"label": "SOGI", "pattern": [{"LOWER": "lesbian"}],"id":"lbgtq-bias"}
54
- {"label": "SOGI", "pattern": [{"LOWER": "non-binary"}],"id":"lbgtq-bias"}
55
- {"label": "SOGI", "pattern": [{"LOWER": "queer"}],"id":"lbgtq-bias"}
56
- {"label": "SOGI", "pattern": [{"LOWER": "pansexual"}],"id":"lbgtq-bias"}
57
- {"label": "SOGI", "pattern": [{"LOWER": "transgender"}],"id":"lbgtq-bias"}
58
- {"label": "SOGI", "pattern": [{"LOWER": "transwoman"}],"id":"lbgtq-bias"}
59
- {"label": "SOGI", "pattern": [{"LOWER": "transman"}],"id":"lbgtq-bias"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
NER-tweaks/.ipynb_checkpoints/main-ruler-bias-checkpoint.jsonl DELETED
@@ -1,862 +0,0 @@
1
- {"label": "SOGI", "pattern": [{"LOWER": "woman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
2
- {"label": "SOGI", "pattern": [{"LOWER": "feminine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
3
- {"label": "SOGI", "pattern": [{"LOWER": "female", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
4
- {"label": "SOGI", "pattern": [{"LOWER": "lady", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
5
- {"label": "SOGI", "pattern": [{"LOWER": "girl", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
6
- {"label": "SOGI", "pattern": [{"LOWER": "she", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
7
- {"label": "SOGI", "pattern": [{"LOWER": "hers", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
8
- {"label": "SOGI", "pattern": [{"LOWER": "her", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
9
- {"label": "SOGI", "pattern": [{"LOWER": "herself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
10
- {"label": "SOGI", "pattern": [{"LOWER": "mother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
11
- {"label": "SOGI", "pattern": [{"LOWER": "grandmother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
12
- {"label": "SOGI", "pattern": [{"LOWER": "grandma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
13
- {"label": "SOGI", "pattern": [{"LOWER": "momma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
14
- {"label": "SOGI", "pattern": [{"LOWER": "mommy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
15
- {"label": "SOGI", "pattern": [{"LOWER": "babe", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
16
- {"label": "SOGI", "pattern": [{"LOWER": "daughter", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
17
- {"label": "SOGI", "pattern": [{"LOWER": "sister", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
18
- {"label": "SOGI", "pattern": [{"LOWER": "niece", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
19
- {"label": "SOGI", "pattern": [{"LOWER": "aunt", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
20
- {"label": "SOGI", "pattern": [{"LOWER": "girlfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
21
- {"label": "SOGI", "pattern": [{"LOWER": "wife", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
22
- {"label": "SOGI", "pattern": [{"LOWER": "mistress", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
23
- {"label": "SOGI", "pattern": [{"LOWER": "man", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
24
- {"label": "SOGI", "pattern": [{"LOWER": "masculine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
25
- {"label": "SOGI", "pattern": [{"LOWER": "male", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
26
- {"label": "SOGI", "pattern": [{"LOWER": "dude", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
27
- {"label": "SOGI", "pattern": [{"LOWER": "boy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
28
- {"label": "SOGI", "pattern": [{"LOWER": "he", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
29
- {"label": "SOGI", "pattern": [{"LOWER": "his", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
30
- {"label": "SOGI", "pattern": [{"LOWER": "him", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
31
- {"label": "SOGI", "pattern": [{"LOWER": "himself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
32
- {"label": "SOGI", "pattern": [{"LOWER": "father", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
33
- {"label": "SOGI", "pattern": [{"LOWER": "grandfather", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
34
- {"label": "SOGI", "pattern": [{"LOWER": "grandpa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
35
- {"label": "SOGI", "pattern": [{"LOWER": "poppa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
36
- {"label": "SOGI", "pattern": [{"LOWER": "daddy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
37
- {"label": "SOGI", "pattern": [{"LOWER": "lad", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
38
- {"label": "SOGI", "pattern": [{"LOWER": "son", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
39
- {"label": "SOGI", "pattern": [{"LOWER": "brother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
40
- {"label": "SOGI", "pattern": [{"LOWER": "nephew", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
41
- {"label": "SOGI", "pattern": [{"LOWER": "uncle", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
42
- {"label": "SOGI", "pattern": [{"LOWER": "boyfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
43
- {"label": "SOGI", "pattern": [{"LOWER": "husband", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
44
- {"label": "SOGI", "pattern": [{"LOWER": "gentleman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
45
- {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"woman"}],"id":"lbgtq-bias"}
46
- {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"man"}],"id":"lbgtq-bias"}
47
- {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
48
- {"label": "SOGI", "pattern": [{"LOWER": "bisexual"}],"id":"lbgtq-bias"}
49
- {"label": "SOGI", "pattern": [{"LOWER": "gay"}],"id":"lbgtq-bias"}
50
- {"label": "SOGI", "pattern": [{"LOWER": "gender-fluid"}],"id":"lbgtq-bias"}
51
- {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
52
- {"label": "SOGI", "pattern": [{"LOWER": "genderqueer"}],"id":"lbgtq-bias"}
53
- {"label": "SOGI", "pattern": [{"LOWER": "lesbian"}],"id":"lbgtq-bias"}
54
- {"label": "SOGI", "pattern": [{"LOWER": "non-binary"}],"id":"lbgtq-bias"}
55
- {"label": "SOGI", "pattern": [{"LOWER": "queer"}],"id":"lbgtq-bias"}
56
- {"label": "SOGI", "pattern": [{"LOWER": "pansexual"}],"id":"lbgtq-bias"}
57
- {"label": "SOGI", "pattern": [{"LOWER": "transgender"}],"id":"lbgtq-bias"}
58
- {"label": "SOGI", "pattern": [{"LOWER": "transwoman"}],"id":"lbgtq-bias"}
59
- {"label": "SOGI", "pattern": [{"LOWER": "transman"}],"id":"lbgtq-bias"}
60
- {"label": "adjectives", "pattern": [{"LOWER": "agile"}], "id": "speed-bias"}
61
- {"label": "adjectives", "pattern": [{"LOWER": "express"}], "id": "speed-bias"}
62
- {"label": "adjectives", "pattern": [{"LOWER": "fast"}], "id": "speed-bias"}
63
- {"label": "adjectives", "pattern": [{"LOWER": "hasty"}], "id": "speed-bias"}
64
- {"label": "adjectives", "pattern": [{"LOWER": "immediate"}], "id": "speed-bias"}
65
- {"label": "adjectives", "pattern": [{"LOWER": "instant"}], "id": "speed-bias"}
66
- {"label": "adjectives", "pattern": [{"LOWER": "late"}], "id": "speed-bias"}
67
- {"label": "adjectives", "pattern": [{"LOWER": "lazy"}], "id": "speed-bias"}
68
- {"label": "adjectives", "pattern": [{"LOWER": "nimble"}], "id": "speed-bias"}
69
- {"label": "adjectives", "pattern": [{"LOWER": "poky"}], "id": "speed-bias"}
70
- {"label": "adjectives", "pattern": [{"LOWER": "prompt"}], "id": "speed-bias"}
71
- {"label": "adjectives", "pattern": [{"LOWER": "quick"}], "id": "speed-bias"}
72
- {"label": "adjectives", "pattern": [{"LOWER": "rapid"}], "id": "speed-bias"}
73
- {"label": "adjectives", "pattern": [{"LOWER": "slow"}], "id": "speed-bias"}
74
- {"label": "adjectives", "pattern": [{"LOWER": "sluggish"}], "id": "speed-bias"}
75
- {"label": "adjectives", "pattern": [{"LOWER": "speedy"}], "id": "speed-bias"}
76
- {"label": "adjectives", "pattern": [{"LOWER": "spry"}], "id": "speed-bias"}
77
- {"label": "adjectives", "pattern": [{"LOWER": "swift"}], "id": "speed-bias"}
78
- {"label": "adjectives", "pattern": [{"LOWER": "arctic"}], "id": "weather-bias"}
79
- {"label": "adjectives", "pattern": [{"LOWER": "arid"}], "id": "weather-bias"}
80
- {"label": "adjectives", "pattern": [{"LOWER": "breezy"}], "id": "weather-bias"}
81
- {"label": "adjectives", "pattern": [{"LOWER": "calm"}], "id": "weather-bias"}
82
- {"label": "adjectives", "pattern": [{"LOWER": "chilly"}], "id": "weather-bias"}
83
- {"label": "adjectives", "pattern": [{"LOWER": "cloudy"}], "id": "weather-bias"}
84
- {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "weather-bias"}
85
- {"label": "adjectives", "pattern": [{"LOWER": "cool"}], "id": "weather-bias"}
86
- {"label": "adjectives", "pattern": [{"LOWER": "damp"}], "id": "weather-bias"}
87
- {"label": "adjectives", "pattern": [{"LOWER": "dark"}], "id": "weather-bias"}
88
- {"label": "adjectives", "pattern": [{"LOWER": "dry"}], "id": "weather-bias"}
89
- {"label": "adjectives", "pattern": [{"LOWER": "foggy"}], "id": "weather-bias"}
90
- {"label": "adjectives", "pattern": [{"LOWER": "freezing"}], "id": "weather-bias"}
91
- {"label": "adjectives", "pattern": [{"LOWER": "frosty"}], "id": "weather-bias"}
92
- {"label": "adjectives", "pattern": [{"LOWER": "great"}], "id": "weather-bias"}
93
- {"label": "adjectives", "pattern": [{"LOWER": "hot"}], "id": "weather-bias"}
94
- {"label": "adjectives", "pattern": [{"LOWER": "humid"}], "id": "weather-bias"}
95
- {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "weather-bias"}
96
- {"label": "adjectives", "pattern": [{"LOWER": "light"}], "id": "weather-bias"}
97
- {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "weather-bias"}
98
- {"label": "adjectives", "pattern": [{"LOWER": "nice"}], "id": "weather-bias"}
99
- {"label": "adjectives", "pattern": [{"LOWER": "overcast"}], "id": "weather-bias"}
100
- {"label": "adjectives", "pattern": [{"LOWER": "rainy"}], "id": "weather-bias"}
101
- {"label": "adjectives", "pattern": [{"LOWER": "smoggy"}], "id": "weather-bias"}
102
- {"label": "adjectives", "pattern": [{"LOWER": "snowy"}], "id": "weather-bias"}
103
- {"label": "adjectives", "pattern": [{"LOWER": "sunny"}], "id": "weather-bias"}
104
- {"label": "adjectives", "pattern": [{"LOWER": "warm"}], "id": "weather-bias"}
105
- {"label": "adjectives", "pattern": [{"LOWER": "windy"}], "id": "weather-bias"}
106
- {"label": "adjectives", "pattern": [{"LOWER": "wintry"}], "id": "weather-bias"}
107
- {"label": "adjectives", "pattern": [{"LOWER": "bent"}], "id": "shape-bias"}
108
- {"label": "adjectives", "pattern": [{"LOWER": "blocky"}], "id": "shape-bias"}
109
- {"label": "adjectives", "pattern": [{"LOWER": "boxy"}], "id": "shape-bias"}
110
- {"label": "adjectives", "pattern": [{"LOWER": "broad"}], "id": "shape-bias"}
111
- {"label": "adjectives", "pattern": [{"LOWER": "chunky"}], "id": "shape-bias"}
112
- {"label": "adjectives", "pattern": [{"LOWER": "compact"}], "id": "shape-bias"}
113
- {"label": "adjectives", "pattern": [{"LOWER": "fat"}], "id": "shape-bias"}
114
- {"label": "adjectives", "pattern": [{"LOWER": "flat"}], "id": "shape-bias"}
115
- {"label": "adjectives", "pattern": [{"LOWER": "full"}], "id": "shape-bias"}
116
- {"label": "adjectives", "pattern": [{"LOWER": "narrow"}], "id": "shape-bias"}
117
- {"label": "adjectives", "pattern": [{"LOWER": "pointed"}], "id": "shape-bias"}
118
- {"label": "adjectives", "pattern": [{"LOWER": "round"}], "id": "shape-bias"}
119
- {"label": "adjectives", "pattern": [{"LOWER": "rounded"}], "id": "shape-bias"}
120
- {"label": "adjectives", "pattern": [{"LOWER": "skinny"}], "id": "shape-bias"}
121
- {"label": "adjectives", "pattern": [{"LOWER": "slim"}], "id": "shape-bias"}
122
- {"label": "adjectives", "pattern": [{"LOWER": "solid"}], "id": "shape-bias"}
123
- {"label": "adjectives", "pattern": [{"LOWER": "straight"}], "id": "shape-bias"}
124
- {"label": "adjectives", "pattern": [{"LOWER": "thick"}], "id": "shape-bias"}
125
- {"label": "adjectives", "pattern": [{"LOWER": "thin"}], "id": "shape-bias"}
126
- {"label": "adjectives", "pattern": [{"LOWER": "wide"}], "id": "shape-bias"}
127
- {"label": "adjectives", "pattern": [{"LOWER": "blaring"}], "id": "sound-bias"}
128
- {"label": "adjectives", "pattern": [{"LOWER": "booming"}], "id": "sound-bias"}
129
- {"label": "adjectives", "pattern": [{"LOWER": "deafening"}], "id": "sound-bias"}
130
- {"label": "adjectives", "pattern": [{"LOWER": "faint"}], "id": "sound-bias"}
131
- {"label": "adjectives", "pattern": [{"LOWER": "gentle"}], "id": "sound-bias"}
132
- {"label": "adjectives", "pattern": [{"LOWER": "grating"}], "id": "sound-bias"}
133
- {"label": "adjectives", "pattern": [{"LOWER": "hushed"}], "id": "sound-bias"}
134
- {"label": "adjectives", "pattern": [{"LOWER": "loud"}], "id": "sound-bias"}
135
- {"label": "adjectives", "pattern": [{"LOWER": "muffled"}], "id": "sound-bias"}
136
- {"label": "adjectives", "pattern": [{"LOWER": "mute"}], "id": "sound-bias"}
137
- {"label": "adjectives", "pattern": [{"LOWER": "noisy"}], "id": "sound-bias"}
138
- {"label": "adjectives", "pattern": [{"LOWER": "piercing"}], "id": "sound-bias"}
139
- {"label": "adjectives", "pattern": [{"LOWER": "quiet"}], "id": "sound-bias"}
140
- {"label": "adjectives", "pattern": [{"LOWER": "roaring"}], "id": "sound-bias"}
141
- {"label": "adjectives", "pattern": [{"LOWER": "rowdy"}], "id": "sound-bias"}
142
- {"label": "adjectives", "pattern": [{"LOWER": "silent"}], "id": "sound-bias"}
143
- {"label": "adjectives", "pattern": [{"LOWER": "soft"}], "id": "sound-bias"}
144
- {"label": "adjectives", "pattern": [{"LOWER": "thundering"}], "id": "sound-bias"}
145
- {"label": "adjectives", "pattern": [{"LOWER": "absolute"}], "id": "physics-bias"}
146
- {"label": "adjectives", "pattern": [{"LOWER": "achromatic"}], "id": "physics-bias"}
147
- {"label": "adjectives", "pattern": [{"LOWER": "acoustic"}], "id": "physics-bias"}
148
- {"label": "adjectives", "pattern": [{"LOWER": "adiabatic"}], "id": "physics-bias"}
149
- {"label": "adjectives", "pattern": [{"LOWER": "alternating"}], "id": "physics-bias"}
150
- {"label": "adjectives", "pattern": [{"LOWER": "atomic"}], "id": "physics-bias"}
151
- {"label": "adjectives", "pattern": [{"LOWER": "binding"}], "id": "physics-bias"}
152
- {"label": "adjectives", "pattern": [{"LOWER": "brownian"}], "id": "physics-bias"}
153
- {"label": "adjectives", "pattern": [{"LOWER": "buoyant"}], "id": "physics-bias"}
154
- {"label": "adjectives", "pattern": [{"LOWER": "chromatic"}], "id": "physics-bias"}
155
- {"label": "adjectives", "pattern": [{"LOWER": "closed"}], "id": "physics-bias"}
156
- {"label": "adjectives", "pattern": [{"LOWER": "coherent"}], "id": "physics-bias"}
157
- {"label": "adjectives", "pattern": [{"LOWER": "critical"}], "id": "physics-bias"}
158
- {"label": "adjectives", "pattern": [{"LOWER": "dense"}], "id": "physics-bias"}
159
- {"label": "adjectives", "pattern": [{"LOWER": "direct"}], "id": "physics-bias"}
160
- {"label": "adjectives", "pattern": [{"LOWER": "electric"}], "id": "physics-bias"}
161
- {"label": "adjectives", "pattern": [{"LOWER": "electrical"}], "id": "physics-bias"}
162
- {"label": "adjectives", "pattern": [{"LOWER": "endothermic"}], "id": "physics-bias"}
163
- {"label": "adjectives", "pattern": [{"LOWER": "exothermic"}], "id": "physics-bias"}
164
- {"label": "adjectives", "pattern": [{"LOWER": "free"}], "id": "physics-bias"}
165
- {"label": "adjectives", "pattern": [{"LOWER": "fundamental"}], "id": "physics-bias"}
166
- {"label": "adjectives", "pattern": [{"LOWER": "gravitational"}], "id": "physics-bias"}
167
- {"label": "adjectives", "pattern": [{"LOWER": "internal"}], "id": "physics-bias"}
168
- {"label": "adjectives", "pattern": [{"LOWER": "isobaric"}], "id": "physics-bias"}
169
- {"label": "adjectives", "pattern": [{"LOWER": "isochoric"}], "id": "physics-bias"}
170
- {"label": "adjectives", "pattern": [{"LOWER": "isothermal"}], "id": "physics-bias"}
171
- {"label": "adjectives", "pattern": [{"LOWER": "kinetic"}], "id": "physics-bias"}
172
- {"label": "adjectives", "pattern": [{"LOWER": "latent"}], "id": "physics-bias"}
173
- {"label": "adjectives", "pattern": [{"LOWER": "magnetic"}], "id": "physics-bias"}
174
- {"label": "adjectives", "pattern": [{"LOWER": "mechanical"}], "id": "physics-bias"}
175
- {"label": "adjectives", "pattern": [{"LOWER": "natural"}], "id": "physics-bias"}
176
- {"label": "adjectives", "pattern": [{"LOWER": "nuclear"}], "id": "physics-bias"}
177
- {"label": "adjectives", "pattern": [{"LOWER": "open"}], "id": "physics-bias"}
178
- {"label": "adjectives", "pattern": [{"LOWER": "optical"}], "id": "physics-bias"}
179
- {"label": "adjectives", "pattern": [{"LOWER": "potential"}], "id": "physics-bias"}
180
- {"label": "adjectives", "pattern": [{"LOWER": "primary"}], "id": "physics-bias"}
181
- {"label": "adjectives", "pattern": [{"LOWER": "progressive"}], "id": "physics-bias"}
182
- {"label": "adjectives", "pattern": [{"LOWER": "quantum"}], "id": "physics-bias"}
183
- {"label": "adjectives", "pattern": [{"LOWER": "radiant"}], "id": "physics-bias"}
184
- {"label": "adjectives", "pattern": [{"LOWER": "radioactive"}], "id": "physics-bias"}
185
- {"label": "adjectives", "pattern": [{"LOWER": "rectilinear"}], "id": "physics-bias"}
186
- {"label": "adjectives", "pattern": [{"LOWER": "relative"}], "id": "physics-bias"}
187
- {"label": "adjectives", "pattern": [{"LOWER": "resolving"}], "id": "physics-bias"}
188
- {"label": "adjectives", "pattern": [{"LOWER": "resonnt"}], "id": "physics-bias"}
189
- {"label": "adjectives", "pattern": [{"LOWER": "resultant"}], "id": "physics-bias"}
190
- {"label": "adjectives", "pattern": [{"LOWER": "rigid"}], "id": "physics-bias"}
191
- {"label": "adjectives", "pattern": [{"LOWER": "volumetric"}], "id": "physics-bias"}
192
- {"label": "adjectives", "pattern": [{"LOWER": ""}], "id": "temperature-bias"}
193
- {"label": "adjectives", "pattern": [{"LOWER": "blistering"}], "id": "temperature-bias"}
194
- {"label": "adjectives", "pattern": [{"LOWER": "burning"}], "id": "temperature-bias"}
195
- {"label": "adjectives", "pattern": [{"LOWER": "chill"}], "id": "temperature-bias"}
196
- {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "temperature-bias"}
197
- {"label": "adjectives", "pattern": [{"LOWER": "cool"}], "id": "temperature-bias"}
198
- {"label": "adjectives", "pattern": [{"LOWER": "freezing"}], "id": "temperature-bias"}
199
- {"label": "adjectives", "pattern": [{"LOWER": "frigid"}], "id": "temperature-bias"}
200
- {"label": "adjectives", "pattern": [{"LOWER": "frosty"}], "id": "temperature-bias"}
201
- {"label": "adjectives", "pattern": [{"LOWER": "hot"}], "id": "temperature-bias"}
202
- {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "temperature-bias"}
203
- {"label": "adjectives", "pattern": [{"LOWER": "molten"}], "id": "temperature-bias"}
204
- {"label": "adjectives", "pattern": [{"LOWER": "nippy"}], "id": "temperature-bias"}
205
- {"label": "adjectives", "pattern": [{"LOWER": "scalding"}], "id": "temperature-bias"}
206
- {"label": "adjectives", "pattern": [{"LOWER": "searing"}], "id": "temperature-bias"}
207
- {"label": "adjectives", "pattern": [{"LOWER": "sizzling"}], "id": "temperature-bias"}
208
- {"label": "adjectives", "pattern": [{"LOWER": "warm"}], "id": "temperature-bias"}
209
- {"label": "adjectives", "pattern": [{"LOWER": "central"}], "id": "corporate_prefixes-bias"}
210
- {"label": "adjectives", "pattern": [{"LOWER": "chief"}], "id": "corporate_prefixes-bias"}
211
- {"label": "adjectives", "pattern": [{"LOWER": "corporate"}], "id": "corporate_prefixes-bias"}
212
- {"label": "adjectives", "pattern": [{"LOWER": "customer"}], "id": "corporate_prefixes-bias"}
213
- {"label": "adjectives", "pattern": [{"LOWER": "direct"}], "id": "corporate_prefixes-bias"}
214
- {"label": "adjectives", "pattern": [{"LOWER": "district"}], "id": "corporate_prefixes-bias"}
215
- {"label": "adjectives", "pattern": [{"LOWER": "dynamic"}], "id": "corporate_prefixes-bias"}
216
- {"label": "adjectives", "pattern": [{"LOWER": "forward"}], "id": "corporate_prefixes-bias"}
217
- {"label": "adjectives", "pattern": [{"LOWER": "future"}], "id": "corporate_prefixes-bias"}
218
- {"label": "adjectives", "pattern": [{"LOWER": "global"}], "id": "corporate_prefixes-bias"}
219
- {"label": "adjectives", "pattern": [{"LOWER": "human"}], "id": "corporate_prefixes-bias"}
220
- {"label": "adjectives", "pattern": [{"LOWER": "internal"}], "id": "corporate_prefixes-bias"}
221
- {"label": "adjectives", "pattern": [{"LOWER": "international"}], "id": "corporate_prefixes-bias"}
222
- {"label": "adjectives", "pattern": [{"LOWER": "investor"}], "id": "corporate_prefixes-bias"}
223
- {"label": "adjectives", "pattern": [{"LOWER": "lead"}], "id": "corporate_prefixes-bias"}
224
- {"label": "adjectives", "pattern": [{"LOWER": "legacy"}], "id": "corporate_prefixes-bias"}
225
- {"label": "adjectives", "pattern": [{"LOWER": "national"}], "id": "corporate_prefixes-bias"}
226
- {"label": "adjectives", "pattern": [{"LOWER": "principal"}], "id": "corporate_prefixes-bias"}
227
- {"label": "adjectives", "pattern": [{"LOWER": "product"}], "id": "corporate_prefixes-bias"}
228
- {"label": "adjectives", "pattern": [{"LOWER": "regional"}], "id": "corporate_prefixes-bias"}
229
- {"label": "adjectives", "pattern": [{"LOWER": "senior"}], "id": "corporate_prefixes-bias"}
230
- {"label": "adjectives", "pattern": [{"LOWER": "staff"}], "id": "corporate_prefixes-bias"}
231
- {"label": "adjectives", "pattern": [{"LOWER": "bare"}], "id": "complexity-bias"}
232
- {"label": "adjectives", "pattern": [{"LOWER": "basic"}], "id": "complexity-bias"}
233
- {"label": "adjectives", "pattern": [{"LOWER": "clear"}], "id": "complexity-bias"}
234
- {"label": "adjectives", "pattern": [{"LOWER": "complex"}], "id": "complexity-bias"}
235
- {"label": "adjectives", "pattern": [{"LOWER": "complicated"}], "id": "complexity-bias"}
236
- {"label": "adjectives", "pattern": [{"LOWER": "convoluted"}], "id": "complexity-bias"}
237
- {"label": "adjectives", "pattern": [{"LOWER": "direct"}], "id": "complexity-bias"}
238
- {"label": "adjectives", "pattern": [{"LOWER": "easy"}], "id": "complexity-bias"}
239
- {"label": "adjectives", "pattern": [{"LOWER": "elaborate"}], "id": "complexity-bias"}
240
- {"label": "adjectives", "pattern": [{"LOWER": "fancy"}], "id": "complexity-bias"}
241
- {"label": "adjectives", "pattern": [{"LOWER": "hard"}], "id": "complexity-bias"}
242
- {"label": "adjectives", "pattern": [{"LOWER": "intricate"}], "id": "complexity-bias"}
243
- {"label": "adjectives", "pattern": [{"LOWER": "obvious"}], "id": "complexity-bias"}
244
- {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "complexity-bias"}
245
- {"label": "adjectives", "pattern": [{"LOWER": "pure"}], "id": "complexity-bias"}
246
- {"label": "adjectives", "pattern": [{"LOWER": "simple"}], "id": "complexity-bias"}
247
- {"label": "adjectives", "pattern": [{"LOWER": "amber"}], "id": "colors-bias"}
248
- {"label": "adjectives", "pattern": [{"LOWER": "ash"}], "id": "colors-bias"}
249
- {"label": "adjectives", "pattern": [{"LOWER": "asphalt"}], "id": "colors-bias"}
250
- {"label": "adjectives", "pattern": [{"LOWER": "auburn"}], "id": "colors-bias"}
251
- {"label": "adjectives", "pattern": [{"LOWER": "avocado"}], "id": "colors-bias"}
252
- {"label": "adjectives", "pattern": [{"LOWER": "aquamarine"}], "id": "colors-bias"}
253
- {"label": "adjectives", "pattern": [{"LOWER": "azure"}], "id": "colors-bias"}
254
- {"label": "adjectives", "pattern": [{"LOWER": "beige"}], "id": "colors-bias"}
255
- {"label": "adjectives", "pattern": [{"LOWER": "bisque"}], "id": "colors-bias"}
256
- {"label": "adjectives", "pattern": [{"LOWER": "black"}], "id": "colors-bias"}
257
- {"label": "adjectives", "pattern": [{"LOWER": "blue"}], "id": "colors-bias"}
258
- {"label": "adjectives", "pattern": [{"LOWER": "bone"}], "id": "colors-bias"}
259
- {"label": "adjectives", "pattern": [{"LOWER": "bordeaux"}], "id": "colors-bias"}
260
- {"label": "adjectives", "pattern": [{"LOWER": "brass"}], "id": "colors-bias"}
261
- {"label": "adjectives", "pattern": [{"LOWER": "bronze"}], "id": "colors-bias"}
262
- {"label": "adjectives", "pattern": [{"LOWER": "brown"}], "id": "colors-bias"}
263
- {"label": "adjectives", "pattern": [{"LOWER": "burgundy"}], "id": "colors-bias"}
264
- {"label": "adjectives", "pattern": [{"LOWER": "camel"}], "id": "colors-bias"}
265
- {"label": "adjectives", "pattern": [{"LOWER": "caramel"}], "id": "colors-bias"}
266
- {"label": "adjectives", "pattern": [{"LOWER": "canary"}], "id": "colors-bias"}
267
- {"label": "adjectives", "pattern": [{"LOWER": "celeste"}], "id": "colors-bias"}
268
- {"label": "adjectives", "pattern": [{"LOWER": "cerulean"}], "id": "colors-bias"}
269
- {"label": "adjectives", "pattern": [{"LOWER": "champagne"}], "id": "colors-bias"}
270
- {"label": "adjectives", "pattern": [{"LOWER": "charcoal"}], "id": "colors-bias"}
271
- {"label": "adjectives", "pattern": [{"LOWER": "chartreuse"}], "id": "colors-bias"}
272
- {"label": "adjectives", "pattern": [{"LOWER": "chestnut"}], "id": "colors-bias"}
273
- {"label": "adjectives", "pattern": [{"LOWER": "chocolate"}], "id": "colors-bias"}
274
- {"label": "adjectives", "pattern": [{"LOWER": "citron"}], "id": "colors-bias"}
275
- {"label": "adjectives", "pattern": [{"LOWER": "claret"}], "id": "colors-bias"}
276
- {"label": "adjectives", "pattern": [{"LOWER": "coal"}], "id": "colors-bias"}
277
- {"label": "adjectives", "pattern": [{"LOWER": "cobalt"}], "id": "colors-bias"}
278
- {"label": "adjectives", "pattern": [{"LOWER": "coffee"}], "id": "colors-bias"}
279
- {"label": "adjectives", "pattern": [{"LOWER": "coral"}], "id": "colors-bias"}
280
- {"label": "adjectives", "pattern": [{"LOWER": "corn"}], "id": "colors-bias"}
281
- {"label": "adjectives", "pattern": [{"LOWER": "cream"}], "id": "colors-bias"}
282
- {"label": "adjectives", "pattern": [{"LOWER": "crimson"}], "id": "colors-bias"}
283
- {"label": "adjectives", "pattern": [{"LOWER": "cyan"}], "id": "colors-bias"}
284
- {"label": "adjectives", "pattern": [{"LOWER": "denim"}], "id": "colors-bias"}
285
- {"label": "adjectives", "pattern": [{"LOWER": "desert"}], "id": "colors-bias"}
286
- {"label": "adjectives", "pattern": [{"LOWER": "ebony"}], "id": "colors-bias"}
287
- {"label": "adjectives", "pattern": [{"LOWER": "ecru"}], "id": "colors-bias"}
288
- {"label": "adjectives", "pattern": [{"LOWER": "emerald"}], "id": "colors-bias"}
289
- {"label": "adjectives", "pattern": [{"LOWER": "feldspar"}], "id": "colors-bias"}
290
- {"label": "adjectives", "pattern": [{"LOWER": "fuchsia"}], "id": "colors-bias"}
291
- {"label": "adjectives", "pattern": [{"LOWER": "gold"}], "id": "colors-bias"}
292
- {"label": "adjectives", "pattern": [{"LOWER": "gray"}], "id": "colors-bias"}
293
- {"label": "adjectives", "pattern": [{"LOWER": "green"}], "id": "colors-bias"}
294
- {"label": "adjectives", "pattern": [{"LOWER": "heather"}], "id": "colors-bias"}
295
- {"label": "adjectives", "pattern": [{"LOWER": "indigo"}], "id": "colors-bias"}
296
- {"label": "adjectives", "pattern": [{"LOWER": "ivory"}], "id": "colors-bias"}
297
- {"label": "adjectives", "pattern": [{"LOWER": "jet"}], "id": "colors-bias"}
298
- {"label": "adjectives", "pattern": [{"LOWER": "khaki"}], "id": "colors-bias"}
299
- {"label": "adjectives", "pattern": [{"LOWER": "lime"}], "id": "colors-bias"}
300
- {"label": "adjectives", "pattern": [{"LOWER": "magenta"}], "id": "colors-bias"}
301
- {"label": "adjectives", "pattern": [{"LOWER": "maroon"}], "id": "colors-bias"}
302
- {"label": "adjectives", "pattern": [{"LOWER": "mint"}], "id": "colors-bias"}
303
- {"label": "adjectives", "pattern": [{"LOWER": "navy"}], "id": "colors-bias"}
304
- {"label": "adjectives", "pattern": [{"LOWER": "olive"}], "id": "colors-bias"}
305
- {"label": "adjectives", "pattern": [{"LOWER": "orange"}], "id": "colors-bias"}
306
- {"label": "adjectives", "pattern": [{"LOWER": "pink"}], "id": "colors-bias"}
307
- {"label": "adjectives", "pattern": [{"LOWER": "plum"}], "id": "colors-bias"}
308
- {"label": "adjectives", "pattern": [{"LOWER": "purple"}], "id": "colors-bias"}
309
- {"label": "adjectives", "pattern": [{"LOWER": "red"}], "id": "colors-bias"}
310
- {"label": "adjectives", "pattern": [{"LOWER": "rust"}], "id": "colors-bias"}
311
- {"label": "adjectives", "pattern": [{"LOWER": "salmon"}], "id": "colors-bias"}
312
- {"label": "adjectives", "pattern": [{"LOWER": "sienna"}], "id": "colors-bias"}
313
- {"label": "adjectives", "pattern": [{"LOWER": "silver"}], "id": "colors-bias"}
314
- {"label": "adjectives", "pattern": [{"LOWER": "snow"}], "id": "colors-bias"}
315
- {"label": "adjectives", "pattern": [{"LOWER": "steel"}], "id": "colors-bias"}
316
- {"label": "adjectives", "pattern": [{"LOWER": "tan"}], "id": "colors-bias"}
317
- {"label": "adjectives", "pattern": [{"LOWER": "teal"}], "id": "colors-bias"}
318
- {"label": "adjectives", "pattern": [{"LOWER": "tomato"}], "id": "colors-bias"}
319
- {"label": "adjectives", "pattern": [{"LOWER": "violet"}], "id": "colors-bias"}
320
- {"label": "adjectives", "pattern": [{"LOWER": "white"}], "id": "colors-bias"}
321
- {"label": "adjectives", "pattern": [{"LOWER": "yellow"}], "id": "colors-bias"}
322
- {"label": "adjectives", "pattern": [{"LOWER": "bitter"}], "id": "taste-bias"}
323
- {"label": "adjectives", "pattern": [{"LOWER": "chalky"}], "id": "taste-bias"}
324
- {"label": "adjectives", "pattern": [{"LOWER": "chewy"}], "id": "taste-bias"}
325
- {"label": "adjectives", "pattern": [{"LOWER": "creamy"}], "id": "taste-bias"}
326
- {"label": "adjectives", "pattern": [{"LOWER": "crispy"}], "id": "taste-bias"}
327
- {"label": "adjectives", "pattern": [{"LOWER": "crunchy"}], "id": "taste-bias"}
328
- {"label": "adjectives", "pattern": [{"LOWER": "dry"}], "id": "taste-bias"}
329
- {"label": "adjectives", "pattern": [{"LOWER": "greasy"}], "id": "taste-bias"}
330
- {"label": "adjectives", "pattern": [{"LOWER": "gritty"}], "id": "taste-bias"}
331
- {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "taste-bias"}
332
- {"label": "adjectives", "pattern": [{"LOWER": "moist"}], "id": "taste-bias"}
333
- {"label": "adjectives", "pattern": [{"LOWER": "oily"}], "id": "taste-bias"}
334
- {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "taste-bias"}
335
- {"label": "adjectives", "pattern": [{"LOWER": "salty"}], "id": "taste-bias"}
336
- {"label": "adjectives", "pattern": [{"LOWER": "savory"}], "id": "taste-bias"}
337
- {"label": "adjectives", "pattern": [{"LOWER": "sour"}], "id": "taste-bias"}
338
- {"label": "adjectives", "pattern": [{"LOWER": "spicy"}], "id": "taste-bias"}
339
- {"label": "adjectives", "pattern": [{"LOWER": "sweet"}], "id": "taste-bias"}
340
- {"label": "adjectives", "pattern": [{"LOWER": "tangy"}], "id": "taste-bias"}
341
- {"label": "adjectives", "pattern": [{"LOWER": "tart"}], "id": "taste-bias"}
342
- {"label": "adjectives", "pattern": [{"LOWER": "zesty"}], "id": "taste-bias"}
343
- {"label": "adjectives", "pattern": [{"LOWER": "all"}], "id": "quantity-bias"}
344
- {"label": "adjectives", "pattern": [{"LOWER": "another"}], "id": "quantity-bias"}
345
- {"label": "adjectives", "pattern": [{"LOWER": "each"}], "id": "quantity-bias"}
346
- {"label": "adjectives", "pattern": [{"LOWER": "either"}], "id": "quantity-bias"}
347
- {"label": "adjectives", "pattern": [{"LOWER": "every"}], "id": "quantity-bias"}
348
- {"label": "adjectives", "pattern": [{"LOWER": "few"}], "id": "quantity-bias"}
349
- {"label": "adjectives", "pattern": [{"LOWER": "many"}], "id": "quantity-bias"}
350
- {"label": "adjectives", "pattern": [{"LOWER": "numerous"}], "id": "quantity-bias"}
351
- {"label": "adjectives", "pattern": [{"LOWER": "one"}], "id": "quantity-bias"}
352
- {"label": "adjectives", "pattern": [{"LOWER": "other"}], "id": "quantity-bias"}
353
- {"label": "adjectives", "pattern": [{"LOWER": "several"}], "id": "quantity-bias"}
354
- {"label": "adjectives", "pattern": [{"LOWER": "some"}], "id": "quantity-bias"}
355
- {"label": "adjectives", "pattern": [{"LOWER": "average"}], "id": "size-bias"}
356
- {"label": "adjectives", "pattern": [{"LOWER": "big"}], "id": "size-bias"}
357
- {"label": "adjectives", "pattern": [{"LOWER": "broad"}], "id": "size-bias"}
358
- {"label": "adjectives", "pattern": [{"LOWER": "flat"}], "id": "size-bias"}
359
- {"label": "adjectives", "pattern": [{"LOWER": "giant"}], "id": "size-bias"}
360
- {"label": "adjectives", "pattern": [{"LOWER": "huge"}], "id": "size-bias"}
361
- {"label": "adjectives", "pattern": [{"LOWER": "humongous"}], "id": "size-bias"}
362
- {"label": "adjectives", "pattern": [{"LOWER": "immense"}], "id": "size-bias"}
363
- {"label": "adjectives", "pattern": [{"LOWER": "large"}], "id": "size-bias"}
364
- {"label": "adjectives", "pattern": [{"LOWER": "little"}], "id": "size-bias"}
365
- {"label": "adjectives", "pattern": [{"LOWER": "long"}], "id": "size-bias"}
366
- {"label": "adjectives", "pattern": [{"LOWER": "massive"}], "id": "size-bias"}
367
- {"label": "adjectives", "pattern": [{"LOWER": "medium"}], "id": "size-bias"}
368
- {"label": "adjectives", "pattern": [{"LOWER": "miniature"}], "id": "size-bias"}
369
- {"label": "adjectives", "pattern": [{"LOWER": "short"}], "id": "size-bias"}
370
- {"label": "adjectives", "pattern": [{"LOWER": "small"}], "id": "size-bias"}
371
- {"label": "adjectives", "pattern": [{"LOWER": "tall"}], "id": "size-bias"}
372
- {"label": "adjectives", "pattern": [{"LOWER": "tiny"}], "id": "size-bias"}
373
- {"label": "adjectives", "pattern": [{"LOWER": "wide"}], "id": "size-bias"}
374
- {"label": "adjectives", "pattern": [{"LOWER": "absolute"}], "id": "algorithms-bias"}
375
- {"label": "adjectives", "pattern": [{"LOWER": "abstract"}], "id": "algorithms-bias"}
376
- {"label": "adjectives", "pattern": [{"LOWER": "active"}], "id": "algorithms-bias"}
377
- {"label": "adjectives", "pattern": [{"LOWER": "acyclic"}], "id": "algorithms-bias"}
378
- {"label": "adjectives", "pattern": [{"LOWER": "adaptive"}], "id": "algorithms-bias"}
379
- {"label": "adjectives", "pattern": [{"LOWER": "amortized"}], "id": "algorithms-bias"}
380
- {"label": "adjectives", "pattern": [{"LOWER": "approximate"}], "id": "algorithms-bias"}
381
- {"label": "adjectives", "pattern": [{"LOWER": "ascent"}], "id": "algorithms-bias"}
382
- {"label": "adjectives", "pattern": [{"LOWER": "associative"}], "id": "algorithms-bias"}
383
- {"label": "adjectives", "pattern": [{"LOWER": "asymptotic"}], "id": "algorithms-bias"}
384
- {"label": "adjectives", "pattern": [{"LOWER": "augmenting"}], "id": "algorithms-bias"}
385
- {"label": "adjectives", "pattern": [{"LOWER": "average"}], "id": "algorithms-bias"}
386
- {"label": "adjectives", "pattern": [{"LOWER": "balanced"}], "id": "algorithms-bias"}
387
- {"label": "adjectives", "pattern": [{"LOWER": "best"}], "id": "algorithms-bias"}
388
- {"label": "adjectives", "pattern": [{"LOWER": "binary"}], "id": "algorithms-bias"}
389
- {"label": "adjectives", "pattern": [{"LOWER": "bipartite"}], "id": "algorithms-bias"}
390
- {"label": "adjectives", "pattern": [{"LOWER": "blocking"}], "id": "algorithms-bias"}
391
- {"label": "adjectives", "pattern": [{"LOWER": "boolean"}], "id": "algorithms-bias"}
392
- {"label": "adjectives", "pattern": [{"LOWER": "bounded"}], "id": "algorithms-bias"}
393
- {"label": "adjectives", "pattern": [{"LOWER": "brute force"}], "id": "algorithms-bias"}
394
- {"label": "adjectives", "pattern": [{"LOWER": "commutative"}], "id": "algorithms-bias"}
395
- {"label": "adjectives", "pattern": [{"LOWER": "complete"}], "id": "algorithms-bias"}
396
- {"label": "adjectives", "pattern": [{"LOWER": "concave"}], "id": "algorithms-bias"}
397
- {"label": "adjectives", "pattern": [{"LOWER": "concurrent"}], "id": "algorithms-bias"}
398
- {"label": "adjectives", "pattern": [{"LOWER": "connected"}], "id": "algorithms-bias"}
399
- {"label": "adjectives", "pattern": [{"LOWER": "constant"}], "id": "algorithms-bias"}
400
- {"label": "adjectives", "pattern": [{"LOWER": "counting"}], "id": "algorithms-bias"}
401
- {"label": "adjectives", "pattern": [{"LOWER": "covering"}], "id": "algorithms-bias"}
402
- {"label": "adjectives", "pattern": [{"LOWER": "cyclic"}], "id": "algorithms-bias"}
403
- {"label": "adjectives", "pattern": [{"LOWER": "decidable"}], "id": "algorithms-bias"}
404
- {"label": "adjectives", "pattern": [{"LOWER": "descent"}], "id": "algorithms-bias"}
405
- {"label": "adjectives", "pattern": [{"LOWER": "deterministic"}], "id": "algorithms-bias"}
406
- {"label": "adjectives", "pattern": [{"LOWER": "dichotomic"}], "id": "algorithms-bias"}
407
- {"label": "adjectives", "pattern": [{"LOWER": "dyadic"}], "id": "algorithms-bias"}
408
- {"label": "adjectives", "pattern": [{"LOWER": "dynamic"}], "id": "algorithms-bias"}
409
- {"label": "adjectives", "pattern": [{"LOWER": "exact"}], "id": "algorithms-bias"}
410
- {"label": "adjectives", "pattern": [{"LOWER": "exhaustive"}], "id": "algorithms-bias"}
411
- {"label": "adjectives", "pattern": [{"LOWER": "exponential"}], "id": "algorithms-bias"}
412
- {"label": "adjectives", "pattern": [{"LOWER": "extended"}], "id": "algorithms-bias"}
413
- {"label": "adjectives", "pattern": [{"LOWER": "external"}], "id": "algorithms-bias"}
414
- {"label": "adjectives", "pattern": [{"LOWER": "extremal"}], "id": "algorithms-bias"}
415
- {"label": "adjectives", "pattern": [{"LOWER": "factorial"}], "id": "algorithms-bias"}
416
- {"label": "adjectives", "pattern": [{"LOWER": "feasible"}], "id": "algorithms-bias"}
417
- {"label": "adjectives", "pattern": [{"LOWER": "finite"}], "id": "algorithms-bias"}
418
- {"label": "adjectives", "pattern": [{"LOWER": "fixed"}], "id": "algorithms-bias"}
419
- {"label": "adjectives", "pattern": [{"LOWER": "formal"}], "id": "algorithms-bias"}
420
- {"label": "adjectives", "pattern": [{"LOWER": "forward"}], "id": "algorithms-bias"}
421
- {"label": "adjectives", "pattern": [{"LOWER": "free"}], "id": "algorithms-bias"}
422
- {"label": "adjectives", "pattern": [{"LOWER": "greedy"}], "id": "algorithms-bias"}
423
- {"label": "adjectives", "pattern": [{"LOWER": "hidden"}], "id": "algorithms-bias"}
424
- {"label": "adjectives", "pattern": [{"LOWER": "inclusive"}], "id": "algorithms-bias"}
425
- {"label": "adjectives", "pattern": [{"LOWER": "internal"}], "id": "algorithms-bias"}
426
- {"label": "adjectives", "pattern": [{"LOWER": "intractable"}], "id": "algorithms-bias"}
427
- {"label": "adjectives", "pattern": [{"LOWER": "inverse"}], "id": "algorithms-bias"}
428
- {"label": "adjectives", "pattern": [{"LOWER": "inverted"}], "id": "algorithms-bias"}
429
- {"label": "adjectives", "pattern": [{"LOWER": "isomorphic"}], "id": "algorithms-bias"}
430
- {"label": "adjectives", "pattern": [{"LOWER": "linear"}], "id": "algorithms-bias"}
431
- {"label": "adjectives", "pattern": [{"LOWER": "local"}], "id": "algorithms-bias"}
432
- {"label": "adjectives", "pattern": [{"LOWER": "lower"}], "id": "algorithms-bias"}
433
- {"label": "adjectives", "pattern": [{"LOWER": "matching"}], "id": "algorithms-bias"}
434
- {"label": "adjectives", "pattern": [{"LOWER": "maximum"}], "id": "algorithms-bias"}
435
- {"label": "adjectives", "pattern": [{"LOWER": "mean"}], "id": "algorithms-bias"}
436
- {"label": "adjectives", "pattern": [{"LOWER": "median"}], "id": "algorithms-bias"}
437
- {"label": "adjectives", "pattern": [{"LOWER": "minimum"}], "id": "algorithms-bias"}
438
- {"label": "adjectives", "pattern": [{"LOWER": "mode"}], "id": "algorithms-bias"}
439
- {"label": "adjectives", "pattern": [{"LOWER": "naive"}], "id": "algorithms-bias"}
440
- {"label": "adjectives", "pattern": [{"LOWER": "nearest"}], "id": "algorithms-bias"}
441
- {"label": "adjectives", "pattern": [{"LOWER": "nondeterministic"}], "id": "algorithms-bias"}
442
- {"label": "adjectives", "pattern": [{"LOWER": "null"}], "id": "algorithms-bias"}
443
- {"label": "adjectives", "pattern": [{"LOWER": "nullary"}], "id": "algorithms-bias"}
444
- {"label": "adjectives", "pattern": [{"LOWER": "objective"}], "id": "algorithms-bias"}
445
- {"label": "adjectives", "pattern": [{"LOWER": "offline"}], "id": "algorithms-bias"}
446
- {"label": "adjectives", "pattern": [{"LOWER": "online"}], "id": "algorithms-bias"}
447
- {"label": "adjectives", "pattern": [{"LOWER": "optimal"}], "id": "algorithms-bias"}
448
- {"label": "adjectives", "pattern": [{"LOWER": "ordered"}], "id": "algorithms-bias"}
449
- {"label": "adjectives", "pattern": [{"LOWER": "oriented"}], "id": "algorithms-bias"}
450
- {"label": "adjectives", "pattern": [{"LOWER": "orthogonal"}], "id": "algorithms-bias"}
451
- {"label": "adjectives", "pattern": [{"LOWER": "oscillating"}], "id": "algorithms-bias"}
452
- {"label": "adjectives", "pattern": [{"LOWER": "parallel"}], "id": "algorithms-bias"}
453
- {"label": "adjectives", "pattern": [{"LOWER": "partial"}], "id": "algorithms-bias"}
454
- {"label": "adjectives", "pattern": [{"LOWER": "perfect"}], "id": "algorithms-bias"}
455
- {"label": "adjectives", "pattern": [{"LOWER": "persistent"}], "id": "algorithms-bias"}
456
- {"label": "adjectives", "pattern": [{"LOWER": "planar"}], "id": "algorithms-bias"}
457
- {"label": "adjectives", "pattern": [{"LOWER": "polynomial"}], "id": "algorithms-bias"}
458
- {"label": "adjectives", "pattern": [{"LOWER": "proper"}], "id": "algorithms-bias"}
459
- {"label": "adjectives", "pattern": [{"LOWER": "quadratic"}], "id": "algorithms-bias"}
460
- {"label": "adjectives", "pattern": [{"LOWER": "ragged"}], "id": "algorithms-bias"}
461
- {"label": "adjectives", "pattern": [{"LOWER": "random"}], "id": "algorithms-bias"}
462
- {"label": "adjectives", "pattern": [{"LOWER": "randomized"}], "id": "algorithms-bias"}
463
- {"label": "adjectives", "pattern": [{"LOWER": "rectilinear"}], "id": "algorithms-bias"}
464
- {"label": "adjectives", "pattern": [{"LOWER": "recursive"}], "id": "algorithms-bias"}
465
- {"label": "adjectives", "pattern": [{"LOWER": "reduced"}], "id": "algorithms-bias"}
466
- {"label": "adjectives", "pattern": [{"LOWER": "relaxed"}], "id": "algorithms-bias"}
467
- {"label": "adjectives", "pattern": [{"LOWER": "shortest"}], "id": "algorithms-bias"}
468
- {"label": "adjectives", "pattern": [{"LOWER": "simple"}], "id": "algorithms-bias"}
469
- {"label": "adjectives", "pattern": [{"LOWER": "sparse"}], "id": "algorithms-bias"}
470
- {"label": "adjectives", "pattern": [{"LOWER": "spatial"}], "id": "algorithms-bias"}
471
- {"label": "adjectives", "pattern": [{"LOWER": "square"}], "id": "algorithms-bias"}
472
- {"label": "adjectives", "pattern": [{"LOWER": "stable"}], "id": "algorithms-bias"}
473
- {"label": "adjectives", "pattern": [{"LOWER": "swarm"}], "id": "algorithms-bias"}
474
- {"label": "adjectives", "pattern": [{"LOWER": "symmetric"}], "id": "algorithms-bias"}
475
- {"label": "adjectives", "pattern": [{"LOWER": "terminal"}], "id": "algorithms-bias"}
476
- {"label": "adjectives", "pattern": [{"LOWER": "ternary"}], "id": "algorithms-bias"}
477
- {"label": "adjectives", "pattern": [{"LOWER": "threaded"}], "id": "algorithms-bias"}
478
- {"label": "adjectives", "pattern": [{"LOWER": "tractable"}], "id": "algorithms-bias"}
479
- {"label": "adjectives", "pattern": [{"LOWER": "unary"}], "id": "algorithms-bias"}
480
- {"label": "adjectives", "pattern": [{"LOWER": "undecidable"}], "id": "algorithms-bias"}
481
- {"label": "adjectives", "pattern": [{"LOWER": "undirected"}], "id": "algorithms-bias"}
482
- {"label": "adjectives", "pattern": [{"LOWER": "uniform"}], "id": "algorithms-bias"}
483
- {"label": "adjectives", "pattern": [{"LOWER": "universal"}], "id": "algorithms-bias"}
484
- {"label": "adjectives", "pattern": [{"LOWER": "unsolvable"}], "id": "algorithms-bias"}
485
- {"label": "adjectives", "pattern": [{"LOWER": "unsorted"}], "id": "algorithms-bias"}
486
- {"label": "adjectives", "pattern": [{"LOWER": "visible"}], "id": "algorithms-bias"}
487
- {"label": "adjectives", "pattern": [{"LOWER": "weighted"}], "id": "algorithms-bias"}
488
- {"label": "adjectives", "pattern": [{"LOWER": "acute"}], "id": "geometry-bias"}
489
- {"label": "adjectives", "pattern": [{"LOWER": "adjacent"}], "id": "geometry-bias"}
490
- {"label": "adjectives", "pattern": [{"LOWER": "alternate"}], "id": "geometry-bias"}
491
- {"label": "adjectives", "pattern": [{"LOWER": "central"}], "id": "geometry-bias"}
492
- {"label": "adjectives", "pattern": [{"LOWER": "coincident"}], "id": "geometry-bias"}
493
- {"label": "adjectives", "pattern": [{"LOWER": "collinear"}], "id": "geometry-bias"}
494
- {"label": "adjectives", "pattern": [{"LOWER": "composite"}], "id": "geometry-bias"}
495
- {"label": "adjectives", "pattern": [{"LOWER": "concave"}], "id": "geometry-bias"}
496
- {"label": "adjectives", "pattern": [{"LOWER": "concentric"}], "id": "geometry-bias"}
497
- {"label": "adjectives", "pattern": [{"LOWER": "congruent"}], "id": "geometry-bias"}
498
- {"label": "adjectives", "pattern": [{"LOWER": "convex"}], "id": "geometry-bias"}
499
- {"label": "adjectives", "pattern": [{"LOWER": "coplanar"}], "id": "geometry-bias"}
500
- {"label": "adjectives", "pattern": [{"LOWER": "diagonal"}], "id": "geometry-bias"}
501
- {"label": "adjectives", "pattern": [{"LOWER": "distinct"}], "id": "geometry-bias"}
502
- {"label": "adjectives", "pattern": [{"LOWER": "equidistant"}], "id": "geometry-bias"}
503
- {"label": "adjectives", "pattern": [{"LOWER": "equilateral"}], "id": "geometry-bias"}
504
- {"label": "adjectives", "pattern": [{"LOWER": "fixed"}], "id": "geometry-bias"}
505
- {"label": "adjectives", "pattern": [{"LOWER": "horizontal"}], "id": "geometry-bias"}
506
- {"label": "adjectives", "pattern": [{"LOWER": "inscribed"}], "id": "geometry-bias"}
507
- {"label": "adjectives", "pattern": [{"LOWER": "interior"}], "id": "geometry-bias"}
508
- {"label": "adjectives", "pattern": [{"LOWER": "irregular"}], "id": "geometry-bias"}
509
- {"label": "adjectives", "pattern": [{"LOWER": "linear"}], "id": "geometry-bias"}
510
- {"label": "adjectives", "pattern": [{"LOWER": "oblique"}], "id": "geometry-bias"}
511
- {"label": "adjectives", "pattern": [{"LOWER": "obtuse"}], "id": "geometry-bias"}
512
- {"label": "adjectives", "pattern": [{"LOWER": "parallel"}], "id": "geometry-bias"}
513
- {"label": "adjectives", "pattern": [{"LOWER": "perpendicular"}], "id": "geometry-bias"}
514
- {"label": "adjectives", "pattern": [{"LOWER": "regular"}], "id": "geometry-bias"}
515
- {"label": "adjectives", "pattern": [{"LOWER": "right"}], "id": "geometry-bias"}
516
- {"label": "adjectives", "pattern": [{"LOWER": "similar"}], "id": "geometry-bias"}
517
- {"label": "adjectives", "pattern": [{"LOWER": "vertical"}], "id": "geometry-bias"}
518
- {"label": "adjectives", "pattern": [{"LOWER": "brass"}], "id": "materials-bias"}
519
- {"label": "adjectives", "pattern": [{"LOWER": "chalky"}], "id": "materials-bias"}
520
- {"label": "adjectives", "pattern": [{"LOWER": "concrete"}], "id": "materials-bias"}
521
- {"label": "adjectives", "pattern": [{"LOWER": "felt"}], "id": "materials-bias"}
522
- {"label": "adjectives", "pattern": [{"LOWER": "gilded"}], "id": "materials-bias"}
523
- {"label": "adjectives", "pattern": [{"LOWER": "glass"}], "id": "materials-bias"}
524
- {"label": "adjectives", "pattern": [{"LOWER": "golden"}], "id": "materials-bias"}
525
- {"label": "adjectives", "pattern": [{"LOWER": "iron"}], "id": "materials-bias"}
526
- {"label": "adjectives", "pattern": [{"LOWER": "leather"}], "id": "materials-bias"}
527
- {"label": "adjectives", "pattern": [{"LOWER": "metal"}], "id": "materials-bias"}
528
- {"label": "adjectives", "pattern": [{"LOWER": "metallic"}], "id": "materials-bias"}
529
- {"label": "adjectives", "pattern": [{"LOWER": "oily"}], "id": "materials-bias"}
530
- {"label": "adjectives", "pattern": [{"LOWER": "paper"}], "id": "materials-bias"}
531
- {"label": "adjectives", "pattern": [{"LOWER": "plastic"}], "id": "materials-bias"}
532
- {"label": "adjectives", "pattern": [{"LOWER": "silver"}], "id": "materials-bias"}
533
- {"label": "adjectives", "pattern": [{"LOWER": "steel"}], "id": "materials-bias"}
534
- {"label": "adjectives", "pattern": [{"LOWER": "stone"}], "id": "materials-bias"}
535
- {"label": "adjectives", "pattern": [{"LOWER": "watery"}], "id": "materials-bias"}
536
- {"label": "adjectives", "pattern": [{"LOWER": "wicker"}], "id": "materials-bias"}
537
- {"label": "adjectives", "pattern": [{"LOWER": "wood"}], "id": "materials-bias"}
538
- {"label": "adjectives", "pattern": [{"LOWER": "wooden"}], "id": "materials-bias"}
539
- {"label": "adjectives", "pattern": [{"LOWER": "woolen"}], "id": "materials-bias"}
540
- {"label": "adjectives", "pattern": [{"LOWER": "beveled"}], "id": "construction-bias"}
541
- {"label": "adjectives", "pattern": [{"LOWER": "chamfered"}], "id": "construction-bias"}
542
- {"label": "adjectives", "pattern": [{"LOWER": "coped"}], "id": "construction-bias"}
543
- {"label": "adjectives", "pattern": [{"LOWER": "flashed"}], "id": "construction-bias"}
544
- {"label": "adjectives", "pattern": [{"LOWER": "flush"}], "id": "construction-bias"}
545
- {"label": "adjectives", "pattern": [{"LOWER": "inflammable"}], "id": "construction-bias"}
546
- {"label": "adjectives", "pattern": [{"LOWER": "insulated"}], "id": "construction-bias"}
547
- {"label": "adjectives", "pattern": [{"LOWER": "isometric"}], "id": "construction-bias"}
548
- {"label": "adjectives", "pattern": [{"LOWER": "joint"}], "id": "construction-bias"}
549
- {"label": "adjectives", "pattern": [{"LOWER": "knurled"}], "id": "construction-bias"}
550
- {"label": "adjectives", "pattern": [{"LOWER": "laminated"}], "id": "construction-bias"}
551
- {"label": "adjectives", "pattern": [{"LOWER": "level"}], "id": "construction-bias"}
552
- {"label": "adjectives", "pattern": [{"LOWER": "plumb"}], "id": "construction-bias"}
553
- {"label": "adjectives", "pattern": [{"LOWER": "radial"}], "id": "construction-bias"}
554
- {"label": "adjectives", "pattern": [{"LOWER": "rigid"}], "id": "construction-bias"}
555
- {"label": "adjectives", "pattern": [{"LOWER": "soluble"}], "id": "construction-bias"}
556
- {"label": "adjectives", "pattern": [{"LOWER": "tempered"}], "id": "construction-bias"}
557
- {"label": "adjectives", "pattern": [{"LOWER": "warped"}], "id": "construction-bias"}
558
- {"label": "adjectives", "pattern": [{"LOWER": "adagio"}], "id": "music_theory-bias"}
559
- {"label": "adjectives", "pattern": [{"LOWER": "allegro"}], "id": "music_theory-bias"}
560
- {"label": "adjectives", "pattern": [{"LOWER": "andante"}], "id": "music_theory-bias"}
561
- {"label": "adjectives", "pattern": [{"LOWER": "animato"}], "id": "music_theory-bias"}
562
- {"label": "adjectives", "pattern": [{"LOWER": "espressivo"}], "id": "music_theory-bias"}
563
- {"label": "adjectives", "pattern": [{"LOWER": "grandioso"}], "id": "music_theory-bias"}
564
- {"label": "adjectives", "pattern": [{"LOWER": "grave"}], "id": "music_theory-bias"}
565
- {"label": "adjectives", "pattern": [{"LOWER": "largo"}], "id": "music_theory-bias"}
566
- {"label": "adjectives", "pattern": [{"LOWER": "legato"}], "id": "music_theory-bias"}
567
- {"label": "adjectives", "pattern": [{"LOWER": "libretto"}], "id": "music_theory-bias"}
568
- {"label": "adjectives", "pattern": [{"LOWER": "moderato"}], "id": "music_theory-bias"}
569
- {"label": "adjectives", "pattern": [{"LOWER": "molto"}], "id": "music_theory-bias"}
570
- {"label": "adjectives", "pattern": [{"LOWER": "pizzicato"}], "id": "music_theory-bias"}
571
- {"label": "adjectives", "pattern": [{"LOWER": "presto"}], "id": "music_theory-bias"}
572
- {"label": "adjectives", "pattern": [{"LOWER": "staccato"}], "id": "music_theory-bias"}
573
- {"label": "adjectives", "pattern": [{"LOWER": "vibrato"}], "id": "music_theory-bias"}
574
- {"label": "adjectives", "pattern": [{"LOWER": "blazing"}], "id": "appearance-bias"}
575
- {"label": "adjectives", "pattern": [{"LOWER": "bright"}], "id": "appearance-bias"}
576
- {"label": "adjectives", "pattern": [{"LOWER": "brilliant"}], "id": "appearance-bias"}
577
- {"label": "adjectives", "pattern": [{"LOWER": "burning"}], "id": "appearance-bias"}
578
- {"label": "adjectives", "pattern": [{"LOWER": "clean"}], "id": "appearance-bias"}
579
- {"label": "adjectives", "pattern": [{"LOWER": "colorful"}], "id": "appearance-bias"}
580
- {"label": "adjectives", "pattern": [{"LOWER": "dark"}], "id": "appearance-bias"}
581
- {"label": "adjectives", "pattern": [{"LOWER": "drab"}], "id": "appearance-bias"}
582
- {"label": "adjectives", "pattern": [{"LOWER": "dull"}], "id": "appearance-bias"}
583
- {"label": "adjectives", "pattern": [{"LOWER": "faded"}], "id": "appearance-bias"}
584
- {"label": "adjectives", "pattern": [{"LOWER": "flat"}], "id": "appearance-bias"}
585
- {"label": "adjectives", "pattern": [{"LOWER": "glossy"}], "id": "appearance-bias"}
586
- {"label": "adjectives", "pattern": [{"LOWER": "glowing"}], "id": "appearance-bias"}
587
- {"label": "adjectives", "pattern": [{"LOWER": "light"}], "id": "appearance-bias"}
588
- {"label": "adjectives", "pattern": [{"LOWER": "matte"}], "id": "appearance-bias"}
589
- {"label": "adjectives", "pattern": [{"LOWER": "muted"}], "id": "appearance-bias"}
590
- {"label": "adjectives", "pattern": [{"LOWER": "pale"}], "id": "appearance-bias"}
591
- {"label": "adjectives", "pattern": [{"LOWER": "pallid"}], "id": "appearance-bias"}
592
- {"label": "adjectives", "pattern": [{"LOWER": "radiant"}], "id": "appearance-bias"}
593
- {"label": "adjectives", "pattern": [{"LOWER": "shiny"}], "id": "appearance-bias"}
594
- {"label": "adjectives", "pattern": [{"LOWER": "sleek"}], "id": "appearance-bias"}
595
- {"label": "adjectives", "pattern": [{"LOWER": "sunny"}], "id": "appearance-bias"}
596
- {"label": "adjectives", "pattern": [{"LOWER": "vibrant"}], "id": "appearance-bias"}
597
- {"label": "adjectives", "pattern": [{"LOWER": "vivid"}], "id": "appearance-bias"}
598
- {"label": "adjectives", "pattern": [{"LOWER": "wan"}], "id": "appearance-bias"}
599
- {"label": "adjectives", "pattern": [{"LOWER": "weathered"}], "id": "appearance-bias"}
600
- {"label": "adjectives", "pattern": [{"LOWER": "worn"}], "id": "appearance-bias"}
601
- {"label": "adjectives", "pattern": [{"LOWER": "descriptive"}], "id": "linguistics-bias"}
602
- {"label": "adjectives", "pattern": [{"LOWER": "diachronic"}], "id": "linguistics-bias"}
603
- {"label": "adjectives", "pattern": [{"LOWER": "figurative"}], "id": "linguistics-bias"}
604
- {"label": "adjectives", "pattern": [{"LOWER": "generative"}], "id": "linguistics-bias"}
605
- {"label": "adjectives", "pattern": [{"LOWER": "marked"}], "id": "linguistics-bias"}
606
- {"label": "adjectives", "pattern": [{"LOWER": "regular"}], "id": "linguistics-bias"}
607
- {"label": "adjectives", "pattern": [{"LOWER": "synchronic"}], "id": "linguistics-bias"}
608
- {"label": "adjectives", "pattern": [{"LOWER": "taxonomic"}], "id": "linguistics-bias"}
609
- {"label": "adjectives", "pattern": [{"LOWER": "unproductive"}], "id": "linguistics-bias"}
610
- {"label": "adjectives", "pattern": [{"LOWER": "afraid"}], "id": "emotions-bias"}
611
- {"label": "adjectives", "pattern": [{"LOWER": "angry"}], "id": "emotions-bias"}
612
- {"label": "adjectives", "pattern": [{"LOWER": "calm"}], "id": "emotions-bias"}
613
- {"label": "adjectives", "pattern": [{"LOWER": "cheerful"}], "id": "emotions-bias"}
614
- {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "emotions-bias"}
615
- {"label": "adjectives", "pattern": [{"LOWER": "crabby"}], "id": "emotions-bias"}
616
- {"label": "adjectives", "pattern": [{"LOWER": "crazy"}], "id": "emotions-bias"}
617
- {"label": "adjectives", "pattern": [{"LOWER": "cross"}], "id": "emotions-bias"}
618
- {"label": "adjectives", "pattern": [{"LOWER": "excited"}], "id": "emotions-bias"}
619
- {"label": "adjectives", "pattern": [{"LOWER": "frigid"}], "id": "emotions-bias"}
620
- {"label": "adjectives", "pattern": [{"LOWER": "furious"}], "id": "emotions-bias"}
621
- {"label": "adjectives", "pattern": [{"LOWER": "glad"}], "id": "emotions-bias"}
622
- {"label": "adjectives", "pattern": [{"LOWER": "glum"}], "id": "emotions-bias"}
623
- {"label": "adjectives", "pattern": [{"LOWER": "happy"}], "id": "emotions-bias"}
624
- {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "emotions-bias"}
625
- {"label": "adjectives", "pattern": [{"LOWER": "jolly"}], "id": "emotions-bias"}
626
- {"label": "adjectives", "pattern": [{"LOWER": "jovial"}], "id": "emotions-bias"}
627
- {"label": "adjectives", "pattern": [{"LOWER": "kind"}], "id": "emotions-bias"}
628
- {"label": "adjectives", "pattern": [{"LOWER": "lively"}], "id": "emotions-bias"}
629
- {"label": "adjectives", "pattern": [{"LOWER": "livid"}], "id": "emotions-bias"}
630
- {"label": "adjectives", "pattern": [{"LOWER": "mad"}], "id": "emotions-bias"}
631
- {"label": "adjectives", "pattern": [{"LOWER": "ornery"}], "id": "emotions-bias"}
632
- {"label": "adjectives", "pattern": [{"LOWER": "rosy"}], "id": "emotions-bias"}
633
- {"label": "adjectives", "pattern": [{"LOWER": "sad"}], "id": "emotions-bias"}
634
- {"label": "adjectives", "pattern": [{"LOWER": "scared"}], "id": "emotions-bias"}
635
- {"label": "adjectives", "pattern": [{"LOWER": "seething"}], "id": "emotions-bias"}
636
- {"label": "adjectives", "pattern": [{"LOWER": "shy"}], "id": "emotions-bias"}
637
- {"label": "adjectives", "pattern": [{"LOWER": "sunny"}], "id": "emotions-bias"}
638
- {"label": "adjectives", "pattern": [{"LOWER": "tense"}], "id": "emotions-bias"}
639
- {"label": "adjectives", "pattern": [{"LOWER": "tranquil"}], "id": "emotions-bias"}
640
- {"label": "adjectives", "pattern": [{"LOWER": "upbeat"}], "id": "emotions-bias"}
641
- {"label": "adjectives", "pattern": [{"LOWER": "wary"}], "id": "emotions-bias"}
642
- {"label": "adjectives", "pattern": [{"LOWER": "weary"}], "id": "emotions-bias"}
643
- {"label": "adjectives", "pattern": [{"LOWER": "worried"}], "id": "emotions-bias"}
644
- {"label": "adjectives", "pattern": [{"LOWER": "advanced"}], "id": "age-bias"}
645
- {"label": "adjectives", "pattern": [{"LOWER": "aged"}], "id": "age-bias"}
646
- {"label": "adjectives", "pattern": [{"LOWER": "ancient"}], "id": "age-bias"}
647
- {"label": "adjectives", "pattern": [{"LOWER": "antique"}], "id": "age-bias"}
648
- {"label": "adjectives", "pattern": [{"LOWER": "archaic"}], "id": "age-bias"}
649
- {"label": "adjectives", "pattern": [{"LOWER": "contemporary"}], "id": "age-bias"}
650
- {"label": "adjectives", "pattern": [{"LOWER": "current"}], "id": "age-bias"}
651
- {"label": "adjectives", "pattern": [{"LOWER": "frayed"}], "id": "age-bias"}
652
- {"label": "adjectives", "pattern": [{"LOWER": "fresh"}], "id": "age-bias"}
653
- {"label": "adjectives", "pattern": [{"LOWER": "grizzled"}], "id": "age-bias"}
654
- {"label": "adjectives", "pattern": [{"LOWER": "hoary"}], "id": "age-bias"}
655
- {"label": "adjectives", "pattern": [{"LOWER": "immature"}], "id": "age-bias"}
656
- {"label": "adjectives", "pattern": [{"LOWER": "juvenile"}], "id": "age-bias"}
657
- {"label": "adjectives", "pattern": [{"LOWER": "mature"}], "id": "age-bias"}
658
- {"label": "adjectives", "pattern": [{"LOWER": "modern"}], "id": "age-bias"}
659
- {"label": "adjectives", "pattern": [{"LOWER": "new"}], "id": "age-bias"}
660
- {"label": "adjectives", "pattern": [{"LOWER": "novel"}], "id": "age-bias"}
661
- {"label": "adjectives", "pattern": [{"LOWER": "obsolete"}], "id": "age-bias"}
662
- {"label": "adjectives", "pattern": [{"LOWER": "old"}], "id": "age-bias"}
663
- {"label": "adjectives", "pattern": [{"LOWER": "primordial"}], "id": "age-bias"}
664
- {"label": "adjectives", "pattern": [{"LOWER": "ragged"}], "id": "age-bias"}
665
- {"label": "adjectives", "pattern": [{"LOWER": "raw"}], "id": "age-bias"}
666
- {"label": "adjectives", "pattern": [{"LOWER": "recent"}], "id": "age-bias"}
667
- {"label": "adjectives", "pattern": [{"LOWER": "senile"}], "id": "age-bias"}
668
- {"label": "adjectives", "pattern": [{"LOWER": "shabby"}], "id": "age-bias"}
669
- {"label": "adjectives", "pattern": [{"LOWER": "stale"}], "id": "age-bias"}
670
- {"label": "adjectives", "pattern": [{"LOWER": "tattered"}], "id": "age-bias"}
671
- {"label": "adjectives", "pattern": [{"LOWER": "threadbare"}], "id": "age-bias"}
672
- {"label": "adjectives", "pattern": [{"LOWER": "trite"}], "id": "age-bias"}
673
- {"label": "adjectives", "pattern": [{"LOWER": "vintage"}], "id": "age-bias"}
674
- {"label": "adjectives", "pattern": [{"LOWER": "worn"}], "id": "age-bias"}
675
- {"label": "adjectives", "pattern": [{"LOWER": "young"}], "id": "age-bias"}
676
- {"label": "adjectives", "pattern": [{"LOWER": "accepting"}], "id": "character-bias"}
677
- {"label": "adjectives", "pattern": [{"LOWER": "adventurous"}], "id": "character-bias"}
678
- {"label": "adjectives", "pattern": [{"LOWER": "affable"}], "id": "character-bias"}
679
- {"label": "adjectives", "pattern": [{"LOWER": "ambitious"}], "id": "character-bias"}
680
- {"label": "adjectives", "pattern": [{"LOWER": "amiable"}], "id": "character-bias"}
681
- {"label": "adjectives", "pattern": [{"LOWER": "amicable"}], "id": "character-bias"}
682
- {"label": "adjectives", "pattern": [{"LOWER": "annoying"}], "id": "character-bias"}
683
- {"label": "adjectives", "pattern": [{"LOWER": "bold"}], "id": "character-bias"}
684
- {"label": "adjectives", "pattern": [{"LOWER": "brave"}], "id": "character-bias"}
685
- {"label": "adjectives", "pattern": [{"LOWER": "bright"}], "id": "character-bias"}
686
- {"label": "adjectives", "pattern": [{"LOWER": "brutal"}], "id": "character-bias"}
687
- {"label": "adjectives", "pattern": [{"LOWER": "brute"}], "id": "character-bias"}
688
- {"label": "adjectives", "pattern": [{"LOWER": "callous"}], "id": "character-bias"}
689
- {"label": "adjectives", "pattern": [{"LOWER": "calm"}], "id": "character-bias"}
690
- {"label": "adjectives", "pattern": [{"LOWER": "careful"}], "id": "character-bias"}
691
- {"label": "adjectives", "pattern": [{"LOWER": "cautious"}], "id": "character-bias"}
692
- {"label": "adjectives", "pattern": [{"LOWER": "charitable"}], "id": "character-bias"}
693
- {"label": "adjectives", "pattern": [{"LOWER": "cheerful"}], "id": "character-bias"}
694
- {"label": "adjectives", "pattern": [{"LOWER": "clever"}], "id": "character-bias"}
695
- {"label": "adjectives", "pattern": [{"LOWER": "courtly"}], "id": "character-bias"}
696
- {"label": "adjectives", "pattern": [{"LOWER": "creative"}], "id": "character-bias"}
697
- {"label": "adjectives", "pattern": [{"LOWER": "cruel"}], "id": "character-bias"}
698
- {"label": "adjectives", "pattern": [{"LOWER": "curious"}], "id": "character-bias"}
699
- {"label": "adjectives", "pattern": [{"LOWER": "daring"}], "id": "character-bias"}
700
- {"label": "adjectives", "pattern": [{"LOWER": "devout"}], "id": "character-bias"}
701
- {"label": "adjectives", "pattern": [{"LOWER": "eager"}], "id": "character-bias"}
702
- {"label": "adjectives", "pattern": [{"LOWER": "elegant"}], "id": "character-bias"}
703
- {"label": "adjectives", "pattern": [{"LOWER": "energetic"}], "id": "character-bias"}
704
- {"label": "adjectives", "pattern": [{"LOWER": "excited"}], "id": "character-bias"}
705
- {"label": "adjectives", "pattern": [{"LOWER": "ferocious"}], "id": "character-bias"}
706
- {"label": "adjectives", "pattern": [{"LOWER": "forgiving"}], "id": "character-bias"}
707
- {"label": "adjectives", "pattern": [{"LOWER": "free"}], "id": "character-bias"}
708
- {"label": "adjectives", "pattern": [{"LOWER": "friendly"}], "id": "character-bias"}
709
- {"label": "adjectives", "pattern": [{"LOWER": "funny"}], "id": "character-bias"}
710
- {"label": "adjectives", "pattern": [{"LOWER": "generous"}], "id": "character-bias"}
711
- {"label": "adjectives", "pattern": [{"LOWER": "genteel"}], "id": "character-bias"}
712
- {"label": "adjectives", "pattern": [{"LOWER": "gentle"}], "id": "character-bias"}
713
- {"label": "adjectives", "pattern": [{"LOWER": "graceful"}], "id": "character-bias"}
714
- {"label": "adjectives", "pattern": [{"LOWER": "grim"}], "id": "character-bias"}
715
- {"label": "adjectives", "pattern": [{"LOWER": "grouchy"}], "id": "character-bias"}
716
- {"label": "adjectives", "pattern": [{"LOWER": "happy"}], "id": "character-bias"}
717
- {"label": "adjectives", "pattern": [{"LOWER": "heartless"}], "id": "character-bias"}
718
- {"label": "adjectives", "pattern": [{"LOWER": "helpful"}], "id": "character-bias"}
719
- {"label": "adjectives", "pattern": [{"LOWER": "honest"}], "id": "character-bias"}
720
- {"label": "adjectives", "pattern": [{"LOWER": "humane"}], "id": "character-bias"}
721
- {"label": "adjectives", "pattern": [{"LOWER": "humble"}], "id": "character-bias"}
722
- {"label": "adjectives", "pattern": [{"LOWER": "impulsive"}], "id": "character-bias"}
723
- {"label": "adjectives", "pattern": [{"LOWER": "independent"}], "id": "character-bias"}
724
- {"label": "adjectives", "pattern": [{"LOWER": "indulgent"}], "id": "character-bias"}
725
- {"label": "adjectives", "pattern": [{"LOWER": "intense"}], "id": "character-bias"}
726
- {"label": "adjectives", "pattern": [{"LOWER": "inventive"}], "id": "character-bias"}
727
- {"label": "adjectives", "pattern": [{"LOWER": "kind"}], "id": "character-bias"}
728
- {"label": "adjectives", "pattern": [{"LOWER": "lazy"}], "id": "character-bias"}
729
- {"label": "adjectives", "pattern": [{"LOWER": "lenient"}], "id": "character-bias"}
730
- {"label": "adjectives", "pattern": [{"LOWER": "loyal"}], "id": "character-bias"}
731
- {"label": "adjectives", "pattern": [{"LOWER": "meek"}], "id": "character-bias"}
732
- {"label": "adjectives", "pattern": [{"LOWER": "merciless"}], "id": "character-bias"}
733
- {"label": "adjectives", "pattern": [{"LOWER": "merry"}], "id": "character-bias"}
734
- {"label": "adjectives", "pattern": [{"LOWER": "messy"}], "id": "character-bias"}
735
- {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "character-bias"}
736
- {"label": "adjectives", "pattern": [{"LOWER": "neat"}], "id": "character-bias"}
737
- {"label": "adjectives", "pattern": [{"LOWER": "nervous"}], "id": "character-bias"}
738
- {"label": "adjectives", "pattern": [{"LOWER": "obliging"}], "id": "character-bias"}
739
- {"label": "adjectives", "pattern": [{"LOWER": "obnoxious"}], "id": "character-bias"}
740
- {"label": "adjectives", "pattern": [{"LOWER": "odious"}], "id": "character-bias"}
741
- {"label": "adjectives", "pattern": [{"LOWER": "patient"}], "id": "character-bias"}
742
- {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "character-bias"}
743
- {"label": "adjectives", "pattern": [{"LOWER": "pleasant"}], "id": "character-bias"}
744
- {"label": "adjectives", "pattern": [{"LOWER": "polite"}], "id": "character-bias"}
745
- {"label": "adjectives", "pattern": [{"LOWER": "proper"}], "id": "character-bias"}
746
- {"label": "adjectives", "pattern": [{"LOWER": "proud"}], "id": "character-bias"}
747
- {"label": "adjectives", "pattern": [{"LOWER": "quick"}], "id": "character-bias"}
748
- {"label": "adjectives", "pattern": [{"LOWER": "quiet"}], "id": "character-bias"}
749
- {"label": "adjectives", "pattern": [{"LOWER": "refined"}], "id": "character-bias"}
750
- {"label": "adjectives", "pattern": [{"LOWER": "relaxed"}], "id": "character-bias"}
751
- {"label": "adjectives", "pattern": [{"LOWER": "religious"}], "id": "character-bias"}
752
- {"label": "adjectives", "pattern": [{"LOWER": "respectful"}], "id": "character-bias"}
753
- {"label": "adjectives", "pattern": [{"LOWER": "rude"}], "id": "character-bias"}
754
- {"label": "adjectives", "pattern": [{"LOWER": "savage"}], "id": "character-bias"}
755
- {"label": "adjectives", "pattern": [{"LOWER": "selfish"}], "id": "character-bias"}
756
- {"label": "adjectives", "pattern": [{"LOWER": "sensitive"}], "id": "character-bias"}
757
- {"label": "adjectives", "pattern": [{"LOWER": "serious"}], "id": "character-bias"}
758
- {"label": "adjectives", "pattern": [{"LOWER": "shrewd"}], "id": "character-bias"}
759
- {"label": "adjectives", "pattern": [{"LOWER": "silly"}], "id": "character-bias"}
760
- {"label": "adjectives", "pattern": [{"LOWER": "simple"}], "id": "character-bias"}
761
- {"label": "adjectives", "pattern": [{"LOWER": "smart"}], "id": "character-bias"}
762
- {"label": "adjectives", "pattern": [{"LOWER": "soft"}], "id": "character-bias"}
763
- {"label": "adjectives", "pattern": [{"LOWER": "sophisticated"}], "id": "character-bias"}
764
- {"label": "adjectives", "pattern": [{"LOWER": "stern"}], "id": "character-bias"}
765
- {"label": "adjectives", "pattern": [{"LOWER": "strong"}], "id": "character-bias"}
766
- {"label": "adjectives", "pattern": [{"LOWER": "stubborn"}], "id": "character-bias"}
767
- {"label": "adjectives", "pattern": [{"LOWER": "tender"}], "id": "character-bias"}
768
- {"label": "adjectives", "pattern": [{"LOWER": "tense"}], "id": "character-bias"}
769
- {"label": "adjectives", "pattern": [{"LOWER": "timid"}], "id": "character-bias"}
770
- {"label": "adjectives", "pattern": [{"LOWER": "tough"}], "id": "character-bias"}
771
- {"label": "adjectives", "pattern": [{"LOWER": "trusting"}], "id": "character-bias"}
772
- {"label": "adjectives", "pattern": [{"LOWER": "urbane"}], "id": "character-bias"}
773
- {"label": "adjectives", "pattern": [{"LOWER": "vain"}], "id": "character-bias"}
774
- {"label": "adjectives", "pattern": [{"LOWER": "vicious"}], "id": "character-bias"}
775
- {"label": "adjectives", "pattern": [{"LOWER": "violent"}], "id": "character-bias"}
776
- {"label": "adjectives", "pattern": [{"LOWER": "warm"}], "id": "character-bias"}
777
- {"label": "adjectives", "pattern": [{"LOWER": "wise"}], "id": "character-bias"}
778
- {"label": "adjectives", "pattern": [{"LOWER": "witty"}], "id": "character-bias"}
779
- {"label": "adjectives", "pattern": [{"LOWER": "acidic"}], "id": "food-bias"}
780
- {"label": "adjectives", "pattern": [{"LOWER": "baked"}], "id": "food-bias"}
781
- {"label": "adjectives", "pattern": [{"LOWER": "bitter"}], "id": "food-bias"}
782
- {"label": "adjectives", "pattern": [{"LOWER": "bland"}], "id": "food-bias"}
783
- {"label": "adjectives", "pattern": [{"LOWER": "blended"}], "id": "food-bias"}
784
- {"label": "adjectives", "pattern": [{"LOWER": "briny"}], "id": "food-bias"}
785
- {"label": "adjectives", "pattern": [{"LOWER": "buttery"}], "id": "food-bias"}
786
- {"label": "adjectives", "pattern": [{"LOWER": "candied"}], "id": "food-bias"}
787
- {"label": "adjectives", "pattern": [{"LOWER": "cheesy"}], "id": "food-bias"}
788
- {"label": "adjectives", "pattern": [{"LOWER": "chewy"}], "id": "food-bias"}
789
- {"label": "adjectives", "pattern": [{"LOWER": "chocolaty"}], "id": "food-bias"}
790
- {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "food-bias"}
791
- {"label": "adjectives", "pattern": [{"LOWER": "creamy"}], "id": "food-bias"}
792
- {"label": "adjectives", "pattern": [{"LOWER": "crispy"}], "id": "food-bias"}
793
- {"label": "adjectives", "pattern": [{"LOWER": "crunchy"}], "id": "food-bias"}
794
- {"label": "adjectives", "pattern": [{"LOWER": "delicious"}], "id": "food-bias"}
795
- {"label": "adjectives", "pattern": [{"LOWER": "doughy"}], "id": "food-bias"}
796
- {"label": "adjectives", "pattern": [{"LOWER": "dry"}], "id": "food-bias"}
797
- {"label": "adjectives", "pattern": [{"LOWER": "flavorful"}], "id": "food-bias"}
798
- {"label": "adjectives", "pattern": [{"LOWER": "frozen"}], "id": "food-bias"}
799
- {"label": "adjectives", "pattern": [{"LOWER": "golden"}], "id": "food-bias"}
800
- {"label": "adjectives", "pattern": [{"LOWER": "gourmet"}], "id": "food-bias"}
801
- {"label": "adjectives", "pattern": [{"LOWER": "greasy"}], "id": "food-bias"}
802
- {"label": "adjectives", "pattern": [{"LOWER": "grilled"}], "id": "food-bias"}
803
- {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "food-bias"}
804
- {"label": "adjectives", "pattern": [{"LOWER": "intense"}], "id": "food-bias"}
805
- {"label": "adjectives", "pattern": [{"LOWER": "jellied"}], "id": "food-bias"}
806
- {"label": "adjectives", "pattern": [{"LOWER": "juicy"}], "id": "food-bias"}
807
- {"label": "adjectives", "pattern": [{"LOWER": "jumbo"}], "id": "food-bias"}
808
- {"label": "adjectives", "pattern": [{"LOWER": "lean"}], "id": "food-bias"}
809
- {"label": "adjectives", "pattern": [{"LOWER": "marinated"}], "id": "food-bias"}
810
- {"label": "adjectives", "pattern": [{"LOWER": "mashed"}], "id": "food-bias"}
811
- {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "food-bias"}
812
- {"label": "adjectives", "pattern": [{"LOWER": "minty"}], "id": "food-bias"}
813
- {"label": "adjectives", "pattern": [{"LOWER": "nutty"}], "id": "food-bias"}
814
- {"label": "adjectives", "pattern": [{"LOWER": "organic"}], "id": "food-bias"}
815
- {"label": "adjectives", "pattern": [{"LOWER": "piquant"}], "id": "food-bias"}
816
- {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "food-bias"}
817
- {"label": "adjectives", "pattern": [{"LOWER": "poached"}], "id": "food-bias"}
818
- {"label": "adjectives", "pattern": [{"LOWER": "pounded"}], "id": "food-bias"}
819
- {"label": "adjectives", "pattern": [{"LOWER": "prepared"}], "id": "food-bias"}
820
- {"label": "adjectives", "pattern": [{"LOWER": "pureed"}], "id": "food-bias"}
821
- {"label": "adjectives", "pattern": [{"LOWER": "rancid"}], "id": "food-bias"}
822
- {"label": "adjectives", "pattern": [{"LOWER": "rank"}], "id": "food-bias"}
823
- {"label": "adjectives", "pattern": [{"LOWER": "rich"}], "id": "food-bias"}
824
- {"label": "adjectives", "pattern": [{"LOWER": "ripe"}], "id": "food-bias"}
825
- {"label": "adjectives", "pattern": [{"LOWER": "rubbery"}], "id": "food-bias"}
826
- {"label": "adjectives", "pattern": [{"LOWER": "salty"}], "id": "food-bias"}
827
- {"label": "adjectives", "pattern": [{"LOWER": "saucy"}], "id": "food-bias"}
828
- {"label": "adjectives", "pattern": [{"LOWER": "savory"}], "id": "food-bias"}
829
- {"label": "adjectives", "pattern": [{"LOWER": "seasoned"}], "id": "food-bias"}
830
- {"label": "adjectives", "pattern": [{"LOWER": "sharp"}], "id": "food-bias"}
831
- {"label": "adjectives", "pattern": [{"LOWER": "simmered"}], "id": "food-bias"}
832
- {"label": "adjectives", "pattern": [{"LOWER": "smoked"}], "id": "food-bias"}
833
- {"label": "adjectives", "pattern": [{"LOWER": "smoky"}], "id": "food-bias"}
834
- {"label": "adjectives", "pattern": [{"LOWER": "sour"}], "id": "food-bias"}
835
- {"label": "adjectives", "pattern": [{"LOWER": "spicy"}], "id": "food-bias"}
836
- {"label": "adjectives", "pattern": [{"LOWER": "steamed"}], "id": "food-bias"}
837
- {"label": "adjectives", "pattern": [{"LOWER": "sticky"}], "id": "food-bias"}
838
- {"label": "adjectives", "pattern": [{"LOWER": "stringy"}], "id": "food-bias"}
839
- {"label": "adjectives", "pattern": [{"LOWER": "strong"}], "id": "food-bias"}
840
- {"label": "adjectives", "pattern": [{"LOWER": "succulent"}], "id": "food-bias"}
841
- {"label": "adjectives", "pattern": [{"LOWER": "sugary"}], "id": "food-bias"}
842
- {"label": "adjectives", "pattern": [{"LOWER": "sweet"}], "id": "food-bias"}
843
- {"label": "adjectives", "pattern": [{"LOWER": "syrupy"}], "id": "food-bias"}
844
- {"label": "adjectives", "pattern": [{"LOWER": "tangy"}], "id": "food-bias"}
845
- {"label": "adjectives", "pattern": [{"LOWER": "tart"}], "id": "food-bias"}
846
- {"label": "adjectives", "pattern": [{"LOWER": "tender"}], "id": "food-bias"}
847
- {"label": "adjectives", "pattern": [{"LOWER": "toasted"}], "id": "food-bias"}
848
- {"label": "adjectives", "pattern": [{"LOWER": "topped"}], "id": "food-bias"}
849
- {"label": "adjectives", "pattern": [{"LOWER": "tossed"}], "id": "food-bias"}
850
- {"label": "adjectives", "pattern": [{"LOWER": "yummy"}], "id": "food-bias"}
851
- {"label": "adjectives", "pattern": [{"LOWER": "zingy"}], "id": "food-bias"}
852
- {"label": "adjectives", "pattern": [{"LOWER": "braised"}], "id": "food-bias"}
853
- {"label": "adjectives", "pattern": [{"LOWER": "fried"}], "id": "food-bias"}
854
- {"label": "adjectives", "pattern": [{"LOWER": "fermented"}], "id": "food-bias"}
855
- {"label": "adjectives", "pattern": [{"LOWER": "milky"}], "id": "food-bias"}
856
- {"label": "adjectives", "pattern": [{"LOWER": "damaged"}], "id": "food-bias"}
857
- {"label": "adjectives", "pattern": [{"LOWER": "spicy"}], "id": "food-bias"}
858
- {"label": "adjectives", "pattern": [{"LOWER": "edible"}], "id": "food-bias"}
859
- {"label": "adjectives", "pattern": [{"LOWER": "nutritious"}], "id": "food-bias"}
860
- {"label": "adjectives", "pattern": [{"LOWER": "citric"}], "id": "food-bias"}
861
- {"label": "adjectives", "pattern": [{"LOWER": "cloying"}], "id": "food-bias"}
862
- {"label": "adjectives", "pattern": [{"LOWER": "caramelized"}], "id": "food-bias"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
NER-tweaks/age-bias.jsonl DELETED
@@ -1,32 +0,0 @@
1
- {"label": "age", "pattern": [{"LOWER": "advanced"}], "id": "age-bias"}
2
- {"label": "age", "pattern": [{"LOWER": "aged"}], "id": "age-bias"}
3
- {"label": "age", "pattern": [{"LOWER": "ancient"}], "id": "age-bias"}
4
- {"label": "age", "pattern": [{"LOWER": "antique"}], "id": "age-bias"}
5
- {"label": "age", "pattern": [{"LOWER": "archaic"}], "id": "age-bias"}
6
- {"label": "age", "pattern": [{"LOWER": "contemporary"}], "id": "age-bias"}
7
- {"label": "age", "pattern": [{"LOWER": "current"}], "id": "age-bias"}
8
- {"label": "age", "pattern": [{"LOWER": "frayed"}], "id": "age-bias"}
9
- {"label": "age", "pattern": [{"LOWER": "fresh"}], "id": "age-bias"}
10
- {"label": "age", "pattern": [{"LOWER": "grizzled"}], "id": "age-bias"}
11
- {"label": "age", "pattern": [{"LOWER": "hoary"}], "id": "age-bias"}
12
- {"label": "age", "pattern": [{"LOWER": "immature"}], "id": "age-bias"}
13
- {"label": "age", "pattern": [{"LOWER": "juvenile"}], "id": "age-bias"}
14
- {"label": "age", "pattern": [{"LOWER": "mature"}], "id": "age-bias"}
15
- {"label": "age", "pattern": [{"LOWER": "modern"}], "id": "age-bias"}
16
- {"label": "age", "pattern": [{"LOWER": "new"}], "id": "age-bias"}
17
- {"label": "age", "pattern": [{"LOWER": "novel"}], "id": "age-bias"}
18
- {"label": "age", "pattern": [{"LOWER": "obsolete"}], "id": "age-bias"}
19
- {"label": "age", "pattern": [{"LOWER": "old"}], "id": "age-bias"}
20
- {"label": "age", "pattern": [{"LOWER": "primordial"}], "id": "age-bias"}
21
- {"label": "age", "pattern": [{"LOWER": "ragged"}], "id": "age-bias"}
22
- {"label": "age", "pattern": [{"LOWER": "raw"}], "id": "age-bias"}
23
- {"label": "age", "pattern": [{"LOWER": "recent"}], "id": "age-bias"}
24
- {"label": "age", "pattern": [{"LOWER": "senile"}], "id": "age-bias"}
25
- {"label": "age", "pattern": [{"LOWER": "shabby"}], "id": "age-bias"}
26
- {"label": "age", "pattern": [{"LOWER": "stale"}], "id": "age-bias"}
27
- {"label": "age", "pattern": [{"LOWER": "tattered"}], "id": "age-bias"}
28
- {"label": "age", "pattern": [{"LOWER": "threadbare"}], "id": "age-bias"}
29
- {"label": "age", "pattern": [{"LOWER": "trite"}], "id": "age-bias"}
30
- {"label": "age", "pattern": [{"LOWER": "vintage"}], "id": "age-bias"}
31
- {"label": "age", "pattern": [{"LOWER": "worn"}], "id": "age-bias"}
32
- {"label": "age", "pattern": [{"LOWER": "young"}], "id": "age-bias"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
NER-tweaks/entity-ruler-input.jsonl DELETED
@@ -1,44 +0,0 @@
1
- {"label": "GENDER", "pattern": [{"LOWER": "woman"}],"id":"female-bias"}
2
- {"label": "GENDER", "pattern": [{"LOWER": "feminine"}],"id":"female-bias"}
3
- {"label": "GENDER", "pattern": [{"LOWER": "female"}],"id":"female-bias"}
4
- {"label": "GENDER", "pattern": [{"LOWER": "lady"}],"id":"female-bias"}
5
- {"label": "GENDER", "pattern": [{"LOWER": "girl"}],"id":"female-bias"}
6
- {"label": "GENDER", "pattern": [{"LOWER": "she"}],"id":"female-bias"}
7
- {"label": "GENDER", "pattern": [{"LOWER": "her"}],"id":"female-bias"}
8
- {"label": "GENDER", "pattern": [{"LOWER": "hers"}],"id":"female-bias"}
9
- {"label": "GENDER", "pattern": [{"LOWER": "herself"}],"id":"female-bias"}
10
- {"label": "GENDER", "pattern": [{"LOWER": "mother"}],"id":"female-bias"}
11
- {"label": "GENDER", "pattern": [{"LOWER": "grandmother"}],"id":"female-bias"}
12
- {"label": "GENDER", "pattern": [{"LOWER": "grandma"}],"id":"female-bias"}
13
- {"label": "GENDER", "pattern": [{"LOWER": "momma"}],"id":"female-bias"}
14
- {"label": "GENDER", "pattern": [{"LOWER": "mommy"}],"id":"female-bias"}
15
- {"label": "GENDER", "pattern": [{"LOWER": "babe"}],"id":"female-bias"}
16
- {"label": "GENDER", "pattern": [{"LOWER": "daughter"}],"id":"female-bias"}
17
- {"label": "GENDER", "pattern": [{"LOWER": "sister"}],"id":"female-bias"}
18
- {"label": "GENDER", "pattern": [{"LOWER": "niece"}],"id":"female-bias"}
19
- {"label": "GENDER", "pattern": [{"LOWER": "aunt"}],"id":"female-bias"}
20
- {"label": "GENDER", "pattern": [{"LOWER": "girlfriend"}],"id":"female-bias"}
21
- {"label": "GENDER", "pattern": [{"LOWER": "wife"}],"id":"female-bias"}
22
- {"label": "GENDER", "pattern": [{"LOWER": "mistress"}],"id":"female-bias"}
23
- {"label": "GENDER", "pattern": [{"LOWER": "man"}],"id":"male-bias"}
24
- {"label": "GENDER", "pattern": [{"LOWER": "masculine"}],"id":"male-bias"}
25
- {"label": "GENDER", "pattern": [{"LOWER": "male"}],"id":"male-bias"}
26
- {"label": "GENDER", "pattern": [{"LOWER": "dude"}],"id":"male-bias"}
27
- {"label": "GENDER", "pattern": [{"LOWER": "boy"}],"id":"male-bias"}
28
- {"label": "GENDER", "pattern": [{"LOWER": "he"}],"id":"male-bias"}
29
- {"label": "GENDER", "pattern": [{"LOWER": "his"}],"id":"male-bias"}
30
- {"label": "GENDER", "pattern": [{"LOWER": "him"}],"id":"male-bias"}
31
- {"label": "GENDER", "pattern": [{"LOWER": "himself"}],"id":"male-bias"}
32
- {"label": "GENDER", "pattern": [{"LOWER": "father"}],"id":"male-bias"}
33
- {"label": "GENDER", "pattern": [{"LOWER": "grandfather"}],"id":"male-bias"}
34
- {"label": "GENDER", "pattern": [{"LOWER": "grandpa"}],"id":"male-bias"}
35
- {"label": "GENDER", "pattern": [{"LOWER": "poppa"}],"id":"male-bias"}
36
- {"label": "GENDER", "pattern": [{"LOWER": "daddy"}],"id":"male-bias"}
37
- {"label": "GENDER", "pattern": [{"LOWER": "lad"}],"id":"male-bias"}
38
- {"label": "GENDER", "pattern": [{"LOWER": "son"}],"id":"male-bias"}
39
- {"label": "GENDER", "pattern": [{"LOWER": "brother"}],"id":"male-bias"}
40
- {"label": "GENDER", "pattern": [{"LOWER": "nephew"}],"id":"male-bias"}
41
- {"label": "GENDER", "pattern": [{"LOWER": "uncle"}],"id":"male-bias"}
42
- {"label": "GENDER", "pattern": [{"LOWER": "boyfriend"}],"id":"male-bias"}
43
- {"label": "GENDER", "pattern": [{"LOWER": "husband"}],"id":"male-bias"}
44
- {"label": "GENDER", "pattern": [{"LOWER": "gentleman"}],"id":"male-bias"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
NER-tweaks/gender-test.jsonl DELETED
@@ -1,59 +0,0 @@
1
- {"label": "SOGI", "pattern": [{"LOWER": "woman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
2
- {"label": "SOGI", "pattern": [{"LOWER": "feminine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
3
- {"label": "SOGI", "pattern": [{"LOWER": "female", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
4
- {"label": "SOGI", "pattern": [{"LOWER": "lady", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
5
- {"label": "SOGI", "pattern": [{"LOWER": "girl", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
6
- {"label": "SOGI", "pattern": [{"LOWER": "she", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
7
- {"label": "SOGI", "pattern": [{"LOWER": "hers", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
8
- {"label": "SOGI", "pattern": [{"LOWER": "her", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
9
- {"label": "SOGI", "pattern": [{"LOWER": "herself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
10
- {"label": "SOGI", "pattern": [{"LOWER": "mother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
11
- {"label": "SOGI", "pattern": [{"LOWER": "grandmother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
12
- {"label": "SOGI", "pattern": [{"LOWER": "grandma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
13
- {"label": "SOGI", "pattern": [{"LOWER": "momma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
14
- {"label": "SOGI", "pattern": [{"LOWER": "mommy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
15
- {"label": "SOGI", "pattern": [{"LOWER": "babe", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
16
- {"label": "SOGI", "pattern": [{"LOWER": "daughter", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
17
- {"label": "SOGI", "pattern": [{"LOWER": "sister", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
18
- {"label": "SOGI", "pattern": [{"LOWER": "niece", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
19
- {"label": "SOGI", "pattern": [{"LOWER": "aunt", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
20
- {"label": "SOGI", "pattern": [{"LOWER": "girlfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
21
- {"label": "SOGI", "pattern": [{"LOWER": "wife", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
22
- {"label": "SOGI", "pattern": [{"LOWER": "mistress", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
23
- {"label": "SOGI", "pattern": [{"LOWER": "man", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
24
- {"label": "SOGI", "pattern": [{"LOWER": "masculine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
25
- {"label": "SOGI", "pattern": [{"LOWER": "male", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
26
- {"label": "SOGI", "pattern": [{"LOWER": "dude", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
27
- {"label": "SOGI", "pattern": [{"LOWER": "boy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
28
- {"label": "SOGI", "pattern": [{"LOWER": "he", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
29
- {"label": "SOGI", "pattern": [{"LOWER": "his", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
30
- {"label": "SOGI", "pattern": [{"LOWER": "him", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
31
- {"label": "SOGI", "pattern": [{"LOWER": "himself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
32
- {"label": "SOGI", "pattern": [{"LOWER": "father", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
33
- {"label": "SOGI", "pattern": [{"LOWER": "grandfather", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
34
- {"label": "SOGI", "pattern": [{"LOWER": "grandpa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
35
- {"label": "SOGI", "pattern": [{"LOWER": "poppa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
36
- {"label": "SOGI", "pattern": [{"LOWER": "daddy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
37
- {"label": "SOGI", "pattern": [{"LOWER": "lad", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
38
- {"label": "SOGI", "pattern": [{"LOWER": "son", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
39
- {"label": "SOGI", "pattern": [{"LOWER": "brother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
40
- {"label": "SOGI", "pattern": [{"LOWER": "nephew", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
41
- {"label": "SOGI", "pattern": [{"LOWER": "uncle", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
42
- {"label": "SOGI", "pattern": [{"LOWER": "boyfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
43
- {"label": "SOGI", "pattern": [{"LOWER": "husband", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
44
- {"label": "SOGI", "pattern": [{"LOWER": "gentleman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
45
- {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"woman"}],"id":"lbgtq-bias"}
46
- {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"man"}],"id":"lbgtq-bias"}
47
- {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
48
- {"label": "SOGI", "pattern": [{"LOWER": "bisexual"}],"id":"lbgtq-bias"}
49
- {"label": "SOGI", "pattern": [{"LOWER": "gay"}],"id":"lbgtq-bias"}
50
- {"label": "SOGI", "pattern": [{"LOWER": "gender-fluid"}],"id":"lbgtq-bias"}
51
- {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
52
- {"label": "SOGI", "pattern": [{"LOWER": "genderqueer"}],"id":"lbgtq-bias"}
53
- {"label": "SOGI", "pattern": [{"LOWER": "lesbian"}],"id":"lbgtq-bias"}
54
- {"label": "SOGI", "pattern": [{"LOWER": "non-binary"}],"id":"lbgtq-bias"}
55
- {"label": "SOGI", "pattern": [{"LOWER": "queer"}],"id":"lbgtq-bias"}
56
- {"label": "SOGI", "pattern": [{"LOWER": "pansexual"}],"id":"lbgtq-bias"}
57
- {"label": "SOGI", "pattern": [{"LOWER": "transgender"}],"id":"lbgtq-bias"}
58
- {"label": "SOGI", "pattern": [{"LOWER": "transwoman"}],"id":"lbgtq-bias"}
59
- {"label": "SOGI", "pattern": [{"LOWER": "transman"}],"id":"lbgtq-bias"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
NER-tweaks/main-ruler-bias.jsonl DELETED
@@ -1,862 +0,0 @@
1
- {"label": "SOGI", "pattern": [{"LOWER": "woman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
2
- {"label": "SOGI", "pattern": [{"LOWER": "feminine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
3
- {"label": "SOGI", "pattern": [{"LOWER": "female", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
4
- {"label": "SOGI", "pattern": [{"LOWER": "lady", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
5
- {"label": "SOGI", "pattern": [{"LOWER": "girl", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
6
- {"label": "SOGI", "pattern": [{"LOWER": "she", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
7
- {"label": "SOGI", "pattern": [{"LOWER": "hers", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
8
- {"label": "SOGI", "pattern": [{"LOWER": "her", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
9
- {"label": "SOGI", "pattern": [{"LOWER": "herself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
10
- {"label": "SOGI", "pattern": [{"LOWER": "mother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
11
- {"label": "SOGI", "pattern": [{"LOWER": "grandmother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
12
- {"label": "SOGI", "pattern": [{"LOWER": "grandma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
13
- {"label": "SOGI", "pattern": [{"LOWER": "momma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
14
- {"label": "SOGI", "pattern": [{"LOWER": "mommy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
15
- {"label": "SOGI", "pattern": [{"LOWER": "babe", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
16
- {"label": "SOGI", "pattern": [{"LOWER": "daughter", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
17
- {"label": "SOGI", "pattern": [{"LOWER": "sister", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
18
- {"label": "SOGI", "pattern": [{"LOWER": "niece", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
19
- {"label": "SOGI", "pattern": [{"LOWER": "aunt", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
20
- {"label": "SOGI", "pattern": [{"LOWER": "girlfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
21
- {"label": "SOGI", "pattern": [{"LOWER": "wife", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
22
- {"label": "SOGI", "pattern": [{"LOWER": "mistress", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
23
- {"label": "SOGI", "pattern": [{"LOWER": "man", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
24
- {"label": "SOGI", "pattern": [{"LOWER": "masculine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
25
- {"label": "SOGI", "pattern": [{"LOWER": "male", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
26
- {"label": "SOGI", "pattern": [{"LOWER": "dude", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
27
- {"label": "SOGI", "pattern": [{"LOWER": "boy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
28
- {"label": "SOGI", "pattern": [{"LOWER": "he", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
29
- {"label": "SOGI", "pattern": [{"LOWER": "his", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
30
- {"label": "SOGI", "pattern": [{"LOWER": "him", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
31
- {"label": "SOGI", "pattern": [{"LOWER": "himself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
32
- {"label": "SOGI", "pattern": [{"LOWER": "father", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
33
- {"label": "SOGI", "pattern": [{"LOWER": "grandfather", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
34
- {"label": "SOGI", "pattern": [{"LOWER": "grandpa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
35
- {"label": "SOGI", "pattern": [{"LOWER": "poppa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
36
- {"label": "SOGI", "pattern": [{"LOWER": "daddy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
37
- {"label": "SOGI", "pattern": [{"LOWER": "lad", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
38
- {"label": "SOGI", "pattern": [{"LOWER": "son", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
39
- {"label": "SOGI", "pattern": [{"LOWER": "brother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
40
- {"label": "SOGI", "pattern": [{"LOWER": "nephew", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
41
- {"label": "SOGI", "pattern": [{"LOWER": "uncle", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
42
- {"label": "SOGI", "pattern": [{"LOWER": "boyfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
43
- {"label": "SOGI", "pattern": [{"LOWER": "husband", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
44
- {"label": "SOGI", "pattern": [{"LOWER": "gentleman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
45
- {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"woman"}],"id":"lbgtq-bias"}
46
- {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"man"}],"id":"lbgtq-bias"}
47
- {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
48
- {"label": "SOGI", "pattern": [{"LOWER": "bisexual"}],"id":"lbgtq-bias"}
49
- {"label": "SOGI", "pattern": [{"LOWER": "gay"}],"id":"lbgtq-bias"}
50
- {"label": "SOGI", "pattern": [{"LOWER": "gender-fluid"}],"id":"lbgtq-bias"}
51
- {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
52
- {"label": "SOGI", "pattern": [{"LOWER": "genderqueer"}],"id":"lbgtq-bias"}
53
- {"label": "SOGI", "pattern": [{"LOWER": "lesbian"}],"id":"lbgtq-bias"}
54
- {"label": "SOGI", "pattern": [{"LOWER": "non-binary"}],"id":"lbgtq-bias"}
55
- {"label": "SOGI", "pattern": [{"LOWER": "queer"}],"id":"lbgtq-bias"}
56
- {"label": "SOGI", "pattern": [{"LOWER": "pansexual"}],"id":"lbgtq-bias"}
57
- {"label": "SOGI", "pattern": [{"LOWER": "transgender"}],"id":"lbgtq-bias"}
58
- {"label": "SOGI", "pattern": [{"LOWER": "transwoman"}],"id":"lbgtq-bias"}
59
- {"label": "SOGI", "pattern": [{"LOWER": "transman"}],"id":"lbgtq-bias"}
60
- {"label": "adjectives", "pattern": [{"LOWER": "agile"}], "id": "speed-bias"}
61
- {"label": "adjectives", "pattern": [{"LOWER": "express"}], "id": "speed-bias"}
62
- {"label": "adjectives", "pattern": [{"LOWER": "fast"}], "id": "speed-bias"}
63
- {"label": "adjectives", "pattern": [{"LOWER": "hasty"}], "id": "speed-bias"}
64
- {"label": "adjectives", "pattern": [{"LOWER": "immediate"}], "id": "speed-bias"}
65
- {"label": "adjectives", "pattern": [{"LOWER": "instant"}], "id": "speed-bias"}
66
- {"label": "adjectives", "pattern": [{"LOWER": "late"}], "id": "speed-bias"}
67
- {"label": "adjectives", "pattern": [{"LOWER": "lazy"}], "id": "speed-bias"}
68
- {"label": "adjectives", "pattern": [{"LOWER": "nimble"}], "id": "speed-bias"}
69
- {"label": "adjectives", "pattern": [{"LOWER": "poky"}], "id": "speed-bias"}
70
- {"label": "adjectives", "pattern": [{"LOWER": "prompt"}], "id": "speed-bias"}
71
- {"label": "adjectives", "pattern": [{"LOWER": "quick"}], "id": "speed-bias"}
72
- {"label": "adjectives", "pattern": [{"LOWER": "rapid"}], "id": "speed-bias"}
73
- {"label": "adjectives", "pattern": [{"LOWER": "slow"}], "id": "speed-bias"}
74
- {"label": "adjectives", "pattern": [{"LOWER": "sluggish"}], "id": "speed-bias"}
75
- {"label": "adjectives", "pattern": [{"LOWER": "speedy"}], "id": "speed-bias"}
76
- {"label": "adjectives", "pattern": [{"LOWER": "spry"}], "id": "speed-bias"}
77
- {"label": "adjectives", "pattern": [{"LOWER": "swift"}], "id": "speed-bias"}
78
- {"label": "adjectives", "pattern": [{"LOWER": "arctic"}], "id": "weather-bias"}
79
- {"label": "adjectives", "pattern": [{"LOWER": "arid"}], "id": "weather-bias"}
80
- {"label": "adjectives", "pattern": [{"LOWER": "breezy"}], "id": "weather-bias"}
81
- {"label": "adjectives", "pattern": [{"LOWER": "calm"}], "id": "weather-bias"}
82
- {"label": "adjectives", "pattern": [{"LOWER": "chilly"}], "id": "weather-bias"}
83
- {"label": "adjectives", "pattern": [{"LOWER": "cloudy"}], "id": "weather-bias"}
84
- {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "weather-bias"}
85
- {"label": "adjectives", "pattern": [{"LOWER": "cool"}], "id": "weather-bias"}
86
- {"label": "adjectives", "pattern": [{"LOWER": "damp"}], "id": "weather-bias"}
87
- {"label": "adjectives", "pattern": [{"LOWER": "dark"}], "id": "weather-bias"}
88
- {"label": "adjectives", "pattern": [{"LOWER": "dry"}], "id": "weather-bias"}
89
- {"label": "adjectives", "pattern": [{"LOWER": "foggy"}], "id": "weather-bias"}
90
- {"label": "adjectives", "pattern": [{"LOWER": "freezing"}], "id": "weather-bias"}
91
- {"label": "adjectives", "pattern": [{"LOWER": "frosty"}], "id": "weather-bias"}
92
- {"label": "adjectives", "pattern": [{"LOWER": "great"}], "id": "weather-bias"}
93
- {"label": "adjectives", "pattern": [{"LOWER": "hot"}], "id": "weather-bias"}
94
- {"label": "adjectives", "pattern": [{"LOWER": "humid"}], "id": "weather-bias"}
95
- {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "weather-bias"}
96
- {"label": "adjectives", "pattern": [{"LOWER": "light"}], "id": "weather-bias"}
97
- {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "weather-bias"}
98
- {"label": "adjectives", "pattern": [{"LOWER": "nice"}], "id": "weather-bias"}
99
- {"label": "adjectives", "pattern": [{"LOWER": "overcast"}], "id": "weather-bias"}
100
- {"label": "adjectives", "pattern": [{"LOWER": "rainy"}], "id": "weather-bias"}
101
- {"label": "adjectives", "pattern": [{"LOWER": "smoggy"}], "id": "weather-bias"}
102
- {"label": "adjectives", "pattern": [{"LOWER": "snowy"}], "id": "weather-bias"}
103
- {"label": "adjectives", "pattern": [{"LOWER": "sunny"}], "id": "weather-bias"}
104
- {"label": "adjectives", "pattern": [{"LOWER": "warm"}], "id": "weather-bias"}
105
- {"label": "adjectives", "pattern": [{"LOWER": "windy"}], "id": "weather-bias"}
106
- {"label": "adjectives", "pattern": [{"LOWER": "wintry"}], "id": "weather-bias"}
107
- {"label": "adjectives", "pattern": [{"LOWER": "bent"}], "id": "shape-bias"}
108
- {"label": "adjectives", "pattern": [{"LOWER": "blocky"}], "id": "shape-bias"}
109
- {"label": "adjectives", "pattern": [{"LOWER": "boxy"}], "id": "shape-bias"}
110
- {"label": "adjectives", "pattern": [{"LOWER": "broad"}], "id": "shape-bias"}
111
- {"label": "adjectives", "pattern": [{"LOWER": "chunky"}], "id": "shape-bias"}
112
- {"label": "adjectives", "pattern": [{"LOWER": "compact"}], "id": "shape-bias"}
113
- {"label": "adjectives", "pattern": [{"LOWER": "fat"}], "id": "shape-bias"}
114
- {"label": "adjectives", "pattern": [{"LOWER": "flat"}], "id": "shape-bias"}
115
- {"label": "adjectives", "pattern": [{"LOWER": "full"}], "id": "shape-bias"}
116
- {"label": "adjectives", "pattern": [{"LOWER": "narrow"}], "id": "shape-bias"}
117
- {"label": "adjectives", "pattern": [{"LOWER": "pointed"}], "id": "shape-bias"}
118
- {"label": "adjectives", "pattern": [{"LOWER": "round"}], "id": "shape-bias"}
119
- {"label": "adjectives", "pattern": [{"LOWER": "rounded"}], "id": "shape-bias"}
120
- {"label": "adjectives", "pattern": [{"LOWER": "skinny"}], "id": "shape-bias"}
121
- {"label": "adjectives", "pattern": [{"LOWER": "slim"}], "id": "shape-bias"}
122
- {"label": "adjectives", "pattern": [{"LOWER": "solid"}], "id": "shape-bias"}
123
- {"label": "adjectives", "pattern": [{"LOWER": "straight"}], "id": "shape-bias"}
124
- {"label": "adjectives", "pattern": [{"LOWER": "thick"}], "id": "shape-bias"}
125
- {"label": "adjectives", "pattern": [{"LOWER": "thin"}], "id": "shape-bias"}
126
- {"label": "adjectives", "pattern": [{"LOWER": "wide"}], "id": "shape-bias"}
127
- {"label": "adjectives", "pattern": [{"LOWER": "blaring"}], "id": "sound-bias"}
128
- {"label": "adjectives", "pattern": [{"LOWER": "booming"}], "id": "sound-bias"}
129
- {"label": "adjectives", "pattern": [{"LOWER": "deafening"}], "id": "sound-bias"}
130
- {"label": "adjectives", "pattern": [{"LOWER": "faint"}], "id": "sound-bias"}
131
- {"label": "adjectives", "pattern": [{"LOWER": "gentle"}], "id": "sound-bias"}
132
- {"label": "adjectives", "pattern": [{"LOWER": "grating"}], "id": "sound-bias"}
133
- {"label": "adjectives", "pattern": [{"LOWER": "hushed"}], "id": "sound-bias"}
134
- {"label": "adjectives", "pattern": [{"LOWER": "loud"}], "id": "sound-bias"}
135
- {"label": "adjectives", "pattern": [{"LOWER": "muffled"}], "id": "sound-bias"}
136
- {"label": "adjectives", "pattern": [{"LOWER": "mute"}], "id": "sound-bias"}
137
- {"label": "adjectives", "pattern": [{"LOWER": "noisy"}], "id": "sound-bias"}
138
- {"label": "adjectives", "pattern": [{"LOWER": "piercing"}], "id": "sound-bias"}
139
- {"label": "adjectives", "pattern": [{"LOWER": "quiet"}], "id": "sound-bias"}
140
- {"label": "adjectives", "pattern": [{"LOWER": "roaring"}], "id": "sound-bias"}
141
- {"label": "adjectives", "pattern": [{"LOWER": "rowdy"}], "id": "sound-bias"}
142
- {"label": "adjectives", "pattern": [{"LOWER": "silent"}], "id": "sound-bias"}
143
- {"label": "adjectives", "pattern": [{"LOWER": "soft"}], "id": "sound-bias"}
144
- {"label": "adjectives", "pattern": [{"LOWER": "thundering"}], "id": "sound-bias"}
145
- {"label": "adjectives", "pattern": [{"LOWER": "absolute"}], "id": "physics-bias"}
146
- {"label": "adjectives", "pattern": [{"LOWER": "achromatic"}], "id": "physics-bias"}
147
- {"label": "adjectives", "pattern": [{"LOWER": "acoustic"}], "id": "physics-bias"}
148
- {"label": "adjectives", "pattern": [{"LOWER": "adiabatic"}], "id": "physics-bias"}
149
- {"label": "adjectives", "pattern": [{"LOWER": "alternating"}], "id": "physics-bias"}
150
- {"label": "adjectives", "pattern": [{"LOWER": "atomic"}], "id": "physics-bias"}
151
- {"label": "adjectives", "pattern": [{"LOWER": "binding"}], "id": "physics-bias"}
152
- {"label": "adjectives", "pattern": [{"LOWER": "brownian"}], "id": "physics-bias"}
153
- {"label": "adjectives", "pattern": [{"LOWER": "buoyant"}], "id": "physics-bias"}
154
- {"label": "adjectives", "pattern": [{"LOWER": "chromatic"}], "id": "physics-bias"}
155
- {"label": "adjectives", "pattern": [{"LOWER": "closed"}], "id": "physics-bias"}
156
- {"label": "adjectives", "pattern": [{"LOWER": "coherent"}], "id": "physics-bias"}
157
- {"label": "adjectives", "pattern": [{"LOWER": "critical"}], "id": "physics-bias"}
158
- {"label": "adjectives", "pattern": [{"LOWER": "dense"}], "id": "physics-bias"}
159
- {"label": "adjectives", "pattern": [{"LOWER": "direct"}], "id": "physics-bias"}
160
- {"label": "adjectives", "pattern": [{"LOWER": "electric"}], "id": "physics-bias"}
161
- {"label": "adjectives", "pattern": [{"LOWER": "electrical"}], "id": "physics-bias"}
162
- {"label": "adjectives", "pattern": [{"LOWER": "endothermic"}], "id": "physics-bias"}
163
- {"label": "adjectives", "pattern": [{"LOWER": "exothermic"}], "id": "physics-bias"}
164
- {"label": "adjectives", "pattern": [{"LOWER": "free"}], "id": "physics-bias"}
165
- {"label": "adjectives", "pattern": [{"LOWER": "fundamental"}], "id": "physics-bias"}
166
- {"label": "adjectives", "pattern": [{"LOWER": "gravitational"}], "id": "physics-bias"}
167
- {"label": "adjectives", "pattern": [{"LOWER": "internal"}], "id": "physics-bias"}
168
- {"label": "adjectives", "pattern": [{"LOWER": "isobaric"}], "id": "physics-bias"}
169
- {"label": "adjectives", "pattern": [{"LOWER": "isochoric"}], "id": "physics-bias"}
170
- {"label": "adjectives", "pattern": [{"LOWER": "isothermal"}], "id": "physics-bias"}
171
- {"label": "adjectives", "pattern": [{"LOWER": "kinetic"}], "id": "physics-bias"}
172
- {"label": "adjectives", "pattern": [{"LOWER": "latent"}], "id": "physics-bias"}
173
- {"label": "adjectives", "pattern": [{"LOWER": "magnetic"}], "id": "physics-bias"}
174
- {"label": "adjectives", "pattern": [{"LOWER": "mechanical"}], "id": "physics-bias"}
175
- {"label": "adjectives", "pattern": [{"LOWER": "natural"}], "id": "physics-bias"}
176
- {"label": "adjectives", "pattern": [{"LOWER": "nuclear"}], "id": "physics-bias"}
177
- {"label": "adjectives", "pattern": [{"LOWER": "open"}], "id": "physics-bias"}
178
- {"label": "adjectives", "pattern": [{"LOWER": "optical"}], "id": "physics-bias"}
179
- {"label": "adjectives", "pattern": [{"LOWER": "potential"}], "id": "physics-bias"}
180
- {"label": "adjectives", "pattern": [{"LOWER": "primary"}], "id": "physics-bias"}
181
- {"label": "adjectives", "pattern": [{"LOWER": "progressive"}], "id": "physics-bias"}
182
- {"label": "adjectives", "pattern": [{"LOWER": "quantum"}], "id": "physics-bias"}
183
- {"label": "adjectives", "pattern": [{"LOWER": "radiant"}], "id": "physics-bias"}
184
- {"label": "adjectives", "pattern": [{"LOWER": "radioactive"}], "id": "physics-bias"}
185
- {"label": "adjectives", "pattern": [{"LOWER": "rectilinear"}], "id": "physics-bias"}
186
- {"label": "adjectives", "pattern": [{"LOWER": "relative"}], "id": "physics-bias"}
187
- {"label": "adjectives", "pattern": [{"LOWER": "resolving"}], "id": "physics-bias"}
188
- {"label": "adjectives", "pattern": [{"LOWER": "resonnt"}], "id": "physics-bias"}
189
- {"label": "adjectives", "pattern": [{"LOWER": "resultant"}], "id": "physics-bias"}
190
- {"label": "adjectives", "pattern": [{"LOWER": "rigid"}], "id": "physics-bias"}
191
- {"label": "adjectives", "pattern": [{"LOWER": "volumetric"}], "id": "physics-bias"}
192
- {"label": "adjectives", "pattern": [{"LOWER": ""}], "id": "temperature-bias"}
193
- {"label": "adjectives", "pattern": [{"LOWER": "blistering"}], "id": "temperature-bias"}
194
- {"label": "adjectives", "pattern": [{"LOWER": "burning"}], "id": "temperature-bias"}
195
- {"label": "adjectives", "pattern": [{"LOWER": "chill"}], "id": "temperature-bias"}
196
- {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "temperature-bias"}
197
- {"label": "adjectives", "pattern": [{"LOWER": "cool"}], "id": "temperature-bias"}
198
- {"label": "adjectives", "pattern": [{"LOWER": "freezing"}], "id": "temperature-bias"}
199
- {"label": "adjectives", "pattern": [{"LOWER": "frigid"}], "id": "temperature-bias"}
200
- {"label": "adjectives", "pattern": [{"LOWER": "frosty"}], "id": "temperature-bias"}
201
- {"label": "adjectives", "pattern": [{"LOWER": "hot"}], "id": "temperature-bias"}
202
- {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "temperature-bias"}
203
- {"label": "adjectives", "pattern": [{"LOWER": "molten"}], "id": "temperature-bias"}
204
- {"label": "adjectives", "pattern": [{"LOWER": "nippy"}], "id": "temperature-bias"}
205
- {"label": "adjectives", "pattern": [{"LOWER": "scalding"}], "id": "temperature-bias"}
206
- {"label": "adjectives", "pattern": [{"LOWER": "searing"}], "id": "temperature-bias"}
207
- {"label": "adjectives", "pattern": [{"LOWER": "sizzling"}], "id": "temperature-bias"}
208
- {"label": "adjectives", "pattern": [{"LOWER": "warm"}], "id": "temperature-bias"}
209
- {"label": "adjectives", "pattern": [{"LOWER": "central"}], "id": "corporate_prefixes-bias"}
210
- {"label": "adjectives", "pattern": [{"LOWER": "chief"}], "id": "corporate_prefixes-bias"}
211
- {"label": "adjectives", "pattern": [{"LOWER": "corporate"}], "id": "corporate_prefixes-bias"}
212
- {"label": "adjectives", "pattern": [{"LOWER": "customer"}], "id": "corporate_prefixes-bias"}
213
- {"label": "adjectives", "pattern": [{"LOWER": "direct"}], "id": "corporate_prefixes-bias"}
214
- {"label": "adjectives", "pattern": [{"LOWER": "district"}], "id": "corporate_prefixes-bias"}
215
- {"label": "adjectives", "pattern": [{"LOWER": "dynamic"}], "id": "corporate_prefixes-bias"}
216
- {"label": "adjectives", "pattern": [{"LOWER": "forward"}], "id": "corporate_prefixes-bias"}
217
- {"label": "adjectives", "pattern": [{"LOWER": "future"}], "id": "corporate_prefixes-bias"}
218
- {"label": "adjectives", "pattern": [{"LOWER": "global"}], "id": "corporate_prefixes-bias"}
219
- {"label": "adjectives", "pattern": [{"LOWER": "human"}], "id": "corporate_prefixes-bias"}
220
- {"label": "adjectives", "pattern": [{"LOWER": "internal"}], "id": "corporate_prefixes-bias"}
221
- {"label": "adjectives", "pattern": [{"LOWER": "international"}], "id": "corporate_prefixes-bias"}
222
- {"label": "adjectives", "pattern": [{"LOWER": "investor"}], "id": "corporate_prefixes-bias"}
223
- {"label": "adjectives", "pattern": [{"LOWER": "lead"}], "id": "corporate_prefixes-bias"}
224
- {"label": "adjectives", "pattern": [{"LOWER": "legacy"}], "id": "corporate_prefixes-bias"}
225
- {"label": "adjectives", "pattern": [{"LOWER": "national"}], "id": "corporate_prefixes-bias"}
226
- {"label": "adjectives", "pattern": [{"LOWER": "principal"}], "id": "corporate_prefixes-bias"}
227
- {"label": "adjectives", "pattern": [{"LOWER": "product"}], "id": "corporate_prefixes-bias"}
228
- {"label": "adjectives", "pattern": [{"LOWER": "regional"}], "id": "corporate_prefixes-bias"}
229
- {"label": "adjectives", "pattern": [{"LOWER": "senior"}], "id": "corporate_prefixes-bias"}
230
- {"label": "adjectives", "pattern": [{"LOWER": "staff"}], "id": "corporate_prefixes-bias"}
231
- {"label": "adjectives", "pattern": [{"LOWER": "bare"}], "id": "complexity-bias"}
232
- {"label": "adjectives", "pattern": [{"LOWER": "basic"}], "id": "complexity-bias"}
233
- {"label": "adjectives", "pattern": [{"LOWER": "clear"}], "id": "complexity-bias"}
234
- {"label": "adjectives", "pattern": [{"LOWER": "complex"}], "id": "complexity-bias"}
235
- {"label": "adjectives", "pattern": [{"LOWER": "complicated"}], "id": "complexity-bias"}
236
- {"label": "adjectives", "pattern": [{"LOWER": "convoluted"}], "id": "complexity-bias"}
237
- {"label": "adjectives", "pattern": [{"LOWER": "direct"}], "id": "complexity-bias"}
238
- {"label": "adjectives", "pattern": [{"LOWER": "easy"}], "id": "complexity-bias"}
239
- {"label": "adjectives", "pattern": [{"LOWER": "elaborate"}], "id": "complexity-bias"}
240
- {"label": "adjectives", "pattern": [{"LOWER": "fancy"}], "id": "complexity-bias"}
241
- {"label": "adjectives", "pattern": [{"LOWER": "hard"}], "id": "complexity-bias"}
242
- {"label": "adjectives", "pattern": [{"LOWER": "intricate"}], "id": "complexity-bias"}
243
- {"label": "adjectives", "pattern": [{"LOWER": "obvious"}], "id": "complexity-bias"}
244
- {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "complexity-bias"}
245
- {"label": "adjectives", "pattern": [{"LOWER": "pure"}], "id": "complexity-bias"}
246
- {"label": "adjectives", "pattern": [{"LOWER": "simple"}], "id": "complexity-bias"}
247
- {"label": "adjectives", "pattern": [{"LOWER": "amber"}], "id": "colors-bias"}
248
- {"label": "adjectives", "pattern": [{"LOWER": "ash"}], "id": "colors-bias"}
249
- {"label": "adjectives", "pattern": [{"LOWER": "asphalt"}], "id": "colors-bias"}
250
- {"label": "adjectives", "pattern": [{"LOWER": "auburn"}], "id": "colors-bias"}
251
- {"label": "adjectives", "pattern": [{"LOWER": "avocado"}], "id": "colors-bias"}
252
- {"label": "adjectives", "pattern": [{"LOWER": "aquamarine"}], "id": "colors-bias"}
253
- {"label": "adjectives", "pattern": [{"LOWER": "azure"}], "id": "colors-bias"}
254
- {"label": "adjectives", "pattern": [{"LOWER": "beige"}], "id": "colors-bias"}
255
- {"label": "adjectives", "pattern": [{"LOWER": "bisque"}], "id": "colors-bias"}
256
- {"label": "adjectives", "pattern": [{"LOWER": "black"}], "id": "colors-bias"}
257
- {"label": "adjectives", "pattern": [{"LOWER": "blue"}], "id": "colors-bias"}
258
- {"label": "adjectives", "pattern": [{"LOWER": "bone"}], "id": "colors-bias"}
259
- {"label": "adjectives", "pattern": [{"LOWER": "bordeaux"}], "id": "colors-bias"}
260
- {"label": "adjectives", "pattern": [{"LOWER": "brass"}], "id": "colors-bias"}
261
- {"label": "adjectives", "pattern": [{"LOWER": "bronze"}], "id": "colors-bias"}
262
- {"label": "adjectives", "pattern": [{"LOWER": "brown"}], "id": "colors-bias"}
263
- {"label": "adjectives", "pattern": [{"LOWER": "burgundy"}], "id": "colors-bias"}
264
- {"label": "adjectives", "pattern": [{"LOWER": "camel"}], "id": "colors-bias"}
265
- {"label": "adjectives", "pattern": [{"LOWER": "caramel"}], "id": "colors-bias"}
266
- {"label": "adjectives", "pattern": [{"LOWER": "canary"}], "id": "colors-bias"}
267
- {"label": "adjectives", "pattern": [{"LOWER": "celeste"}], "id": "colors-bias"}
268
- {"label": "adjectives", "pattern": [{"LOWER": "cerulean"}], "id": "colors-bias"}
269
- {"label": "adjectives", "pattern": [{"LOWER": "champagne"}], "id": "colors-bias"}
270
- {"label": "adjectives", "pattern": [{"LOWER": "charcoal"}], "id": "colors-bias"}
271
- {"label": "adjectives", "pattern": [{"LOWER": "chartreuse"}], "id": "colors-bias"}
272
- {"label": "adjectives", "pattern": [{"LOWER": "chestnut"}], "id": "colors-bias"}
273
- {"label": "adjectives", "pattern": [{"LOWER": "chocolate"}], "id": "colors-bias"}
274
- {"label": "adjectives", "pattern": [{"LOWER": "citron"}], "id": "colors-bias"}
275
- {"label": "adjectives", "pattern": [{"LOWER": "claret"}], "id": "colors-bias"}
276
- {"label": "adjectives", "pattern": [{"LOWER": "coal"}], "id": "colors-bias"}
277
- {"label": "adjectives", "pattern": [{"LOWER": "cobalt"}], "id": "colors-bias"}
278
- {"label": "adjectives", "pattern": [{"LOWER": "coffee"}], "id": "colors-bias"}
279
- {"label": "adjectives", "pattern": [{"LOWER": "coral"}], "id": "colors-bias"}
280
- {"label": "adjectives", "pattern": [{"LOWER": "corn"}], "id": "colors-bias"}
281
- {"label": "adjectives", "pattern": [{"LOWER": "cream"}], "id": "colors-bias"}
282
- {"label": "adjectives", "pattern": [{"LOWER": "crimson"}], "id": "colors-bias"}
283
- {"label": "adjectives", "pattern": [{"LOWER": "cyan"}], "id": "colors-bias"}
284
- {"label": "adjectives", "pattern": [{"LOWER": "denim"}], "id": "colors-bias"}
285
- {"label": "adjectives", "pattern": [{"LOWER": "desert"}], "id": "colors-bias"}
286
- {"label": "adjectives", "pattern": [{"LOWER": "ebony"}], "id": "colors-bias"}
287
- {"label": "adjectives", "pattern": [{"LOWER": "ecru"}], "id": "colors-bias"}
288
- {"label": "adjectives", "pattern": [{"LOWER": "emerald"}], "id": "colors-bias"}
289
- {"label": "adjectives", "pattern": [{"LOWER": "feldspar"}], "id": "colors-bias"}
290
- {"label": "adjectives", "pattern": [{"LOWER": "fuchsia"}], "id": "colors-bias"}
291
- {"label": "adjectives", "pattern": [{"LOWER": "gold"}], "id": "colors-bias"}
292
- {"label": "adjectives", "pattern": [{"LOWER": "gray"}], "id": "colors-bias"}
293
- {"label": "adjectives", "pattern": [{"LOWER": "green"}], "id": "colors-bias"}
294
- {"label": "adjectives", "pattern": [{"LOWER": "heather"}], "id": "colors-bias"}
295
- {"label": "adjectives", "pattern": [{"LOWER": "indigo"}], "id": "colors-bias"}
296
- {"label": "adjectives", "pattern": [{"LOWER": "ivory"}], "id": "colors-bias"}
297
- {"label": "adjectives", "pattern": [{"LOWER": "jet"}], "id": "colors-bias"}
298
- {"label": "adjectives", "pattern": [{"LOWER": "khaki"}], "id": "colors-bias"}
299
- {"label": "adjectives", "pattern": [{"LOWER": "lime"}], "id": "colors-bias"}
300
- {"label": "adjectives", "pattern": [{"LOWER": "magenta"}], "id": "colors-bias"}
301
- {"label": "adjectives", "pattern": [{"LOWER": "maroon"}], "id": "colors-bias"}
302
- {"label": "adjectives", "pattern": [{"LOWER": "mint"}], "id": "colors-bias"}
303
- {"label": "adjectives", "pattern": [{"LOWER": "navy"}], "id": "colors-bias"}
304
- {"label": "adjectives", "pattern": [{"LOWER": "olive"}], "id": "colors-bias"}
305
- {"label": "adjectives", "pattern": [{"LOWER": "orange"}], "id": "colors-bias"}
306
- {"label": "adjectives", "pattern": [{"LOWER": "pink"}], "id": "colors-bias"}
307
- {"label": "adjectives", "pattern": [{"LOWER": "plum"}], "id": "colors-bias"}
308
- {"label": "adjectives", "pattern": [{"LOWER": "purple"}], "id": "colors-bias"}
309
- {"label": "adjectives", "pattern": [{"LOWER": "red"}], "id": "colors-bias"}
310
- {"label": "adjectives", "pattern": [{"LOWER": "rust"}], "id": "colors-bias"}
311
- {"label": "adjectives", "pattern": [{"LOWER": "salmon"}], "id": "colors-bias"}
312
- {"label": "adjectives", "pattern": [{"LOWER": "sienna"}], "id": "colors-bias"}
313
- {"label": "adjectives", "pattern": [{"LOWER": "silver"}], "id": "colors-bias"}
314
- {"label": "adjectives", "pattern": [{"LOWER": "snow"}], "id": "colors-bias"}
315
- {"label": "adjectives", "pattern": [{"LOWER": "steel"}], "id": "colors-bias"}
316
- {"label": "adjectives", "pattern": [{"LOWER": "tan"}], "id": "colors-bias"}
317
- {"label": "adjectives", "pattern": [{"LOWER": "teal"}], "id": "colors-bias"}
318
- {"label": "adjectives", "pattern": [{"LOWER": "tomato"}], "id": "colors-bias"}
319
- {"label": "adjectives", "pattern": [{"LOWER": "violet"}], "id": "colors-bias"}
320
- {"label": "adjectives", "pattern": [{"LOWER": "white"}], "id": "colors-bias"}
321
- {"label": "adjectives", "pattern": [{"LOWER": "yellow"}], "id": "colors-bias"}
322
- {"label": "adjectives", "pattern": [{"LOWER": "bitter"}], "id": "taste-bias"}
323
- {"label": "adjectives", "pattern": [{"LOWER": "chalky"}], "id": "taste-bias"}
324
- {"label": "adjectives", "pattern": [{"LOWER": "chewy"}], "id": "taste-bias"}
325
- {"label": "adjectives", "pattern": [{"LOWER": "creamy"}], "id": "taste-bias"}
326
- {"label": "adjectives", "pattern": [{"LOWER": "crispy"}], "id": "taste-bias"}
327
- {"label": "adjectives", "pattern": [{"LOWER": "crunchy"}], "id": "taste-bias"}
328
- {"label": "adjectives", "pattern": [{"LOWER": "dry"}], "id": "taste-bias"}
329
- {"label": "adjectives", "pattern": [{"LOWER": "greasy"}], "id": "taste-bias"}
330
- {"label": "adjectives", "pattern": [{"LOWER": "gritty"}], "id": "taste-bias"}
331
- {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "taste-bias"}
332
- {"label": "adjectives", "pattern": [{"LOWER": "moist"}], "id": "taste-bias"}
333
- {"label": "adjectives", "pattern": [{"LOWER": "oily"}], "id": "taste-bias"}
334
- {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "taste-bias"}
335
- {"label": "adjectives", "pattern": [{"LOWER": "salty"}], "id": "taste-bias"}
336
- {"label": "adjectives", "pattern": [{"LOWER": "savory"}], "id": "taste-bias"}
337
- {"label": "adjectives", "pattern": [{"LOWER": "sour"}], "id": "taste-bias"}
338
- {"label": "adjectives", "pattern": [{"LOWER": "spicy"}], "id": "taste-bias"}
339
- {"label": "adjectives", "pattern": [{"LOWER": "sweet"}], "id": "taste-bias"}
340
- {"label": "adjectives", "pattern": [{"LOWER": "tangy"}], "id": "taste-bias"}
341
- {"label": "adjectives", "pattern": [{"LOWER": "tart"}], "id": "taste-bias"}
342
- {"label": "adjectives", "pattern": [{"LOWER": "zesty"}], "id": "taste-bias"}
343
- {"label": "adjectives", "pattern": [{"LOWER": "all"}], "id": "quantity-bias"}
344
- {"label": "adjectives", "pattern": [{"LOWER": "another"}], "id": "quantity-bias"}
345
- {"label": "adjectives", "pattern": [{"LOWER": "each"}], "id": "quantity-bias"}
346
- {"label": "adjectives", "pattern": [{"LOWER": "either"}], "id": "quantity-bias"}
347
- {"label": "adjectives", "pattern": [{"LOWER": "every"}], "id": "quantity-bias"}
348
- {"label": "adjectives", "pattern": [{"LOWER": "few"}], "id": "quantity-bias"}
349
- {"label": "adjectives", "pattern": [{"LOWER": "many"}], "id": "quantity-bias"}
350
- {"label": "adjectives", "pattern": [{"LOWER": "numerous"}], "id": "quantity-bias"}
351
- {"label": "adjectives", "pattern": [{"LOWER": "one"}], "id": "quantity-bias"}
352
- {"label": "adjectives", "pattern": [{"LOWER": "other"}], "id": "quantity-bias"}
353
- {"label": "adjectives", "pattern": [{"LOWER": "several"}], "id": "quantity-bias"}
354
- {"label": "adjectives", "pattern": [{"LOWER": "some"}], "id": "quantity-bias"}
355
- {"label": "adjectives", "pattern": [{"LOWER": "average"}], "id": "size-bias"}
356
- {"label": "adjectives", "pattern": [{"LOWER": "big"}], "id": "size-bias"}
357
- {"label": "adjectives", "pattern": [{"LOWER": "broad"}], "id": "size-bias"}
358
- {"label": "adjectives", "pattern": [{"LOWER": "flat"}], "id": "size-bias"}
359
- {"label": "adjectives", "pattern": [{"LOWER": "giant"}], "id": "size-bias"}
360
- {"label": "adjectives", "pattern": [{"LOWER": "huge"}], "id": "size-bias"}
361
- {"label": "adjectives", "pattern": [{"LOWER": "humongous"}], "id": "size-bias"}
362
- {"label": "adjectives", "pattern": [{"LOWER": "immense"}], "id": "size-bias"}
363
- {"label": "adjectives", "pattern": [{"LOWER": "large"}], "id": "size-bias"}
364
- {"label": "adjectives", "pattern": [{"LOWER": "little"}], "id": "size-bias"}
365
- {"label": "adjectives", "pattern": [{"LOWER": "long"}], "id": "size-bias"}
366
- {"label": "adjectives", "pattern": [{"LOWER": "massive"}], "id": "size-bias"}
367
- {"label": "adjectives", "pattern": [{"LOWER": "medium"}], "id": "size-bias"}
368
- {"label": "adjectives", "pattern": [{"LOWER": "miniature"}], "id": "size-bias"}
369
- {"label": "adjectives", "pattern": [{"LOWER": "short"}], "id": "size-bias"}
370
- {"label": "adjectives", "pattern": [{"LOWER": "small"}], "id": "size-bias"}
371
- {"label": "adjectives", "pattern": [{"LOWER": "tall"}], "id": "size-bias"}
372
- {"label": "adjectives", "pattern": [{"LOWER": "tiny"}], "id": "size-bias"}
373
- {"label": "adjectives", "pattern": [{"LOWER": "wide"}], "id": "size-bias"}
374
- {"label": "adjectives", "pattern": [{"LOWER": "absolute"}], "id": "algorithms-bias"}
375
- {"label": "adjectives", "pattern": [{"LOWER": "abstract"}], "id": "algorithms-bias"}
376
- {"label": "adjectives", "pattern": [{"LOWER": "active"}], "id": "algorithms-bias"}
377
- {"label": "adjectives", "pattern": [{"LOWER": "acyclic"}], "id": "algorithms-bias"}
378
- {"label": "adjectives", "pattern": [{"LOWER": "adaptive"}], "id": "algorithms-bias"}
379
- {"label": "adjectives", "pattern": [{"LOWER": "amortized"}], "id": "algorithms-bias"}
380
- {"label": "adjectives", "pattern": [{"LOWER": "approximate"}], "id": "algorithms-bias"}
381
- {"label": "adjectives", "pattern": [{"LOWER": "ascent"}], "id": "algorithms-bias"}
382
- {"label": "adjectives", "pattern": [{"LOWER": "associative"}], "id": "algorithms-bias"}
383
- {"label": "adjectives", "pattern": [{"LOWER": "asymptotic"}], "id": "algorithms-bias"}
384
- {"label": "adjectives", "pattern": [{"LOWER": "augmenting"}], "id": "algorithms-bias"}
385
- {"label": "adjectives", "pattern": [{"LOWER": "average"}], "id": "algorithms-bias"}
386
- {"label": "adjectives", "pattern": [{"LOWER": "balanced"}], "id": "algorithms-bias"}
387
- {"label": "adjectives", "pattern": [{"LOWER": "best"}], "id": "algorithms-bias"}
388
- {"label": "adjectives", "pattern": [{"LOWER": "binary"}], "id": "algorithms-bias"}
389
- {"label": "adjectives", "pattern": [{"LOWER": "bipartite"}], "id": "algorithms-bias"}
390
- {"label": "adjectives", "pattern": [{"LOWER": "blocking"}], "id": "algorithms-bias"}
391
- {"label": "adjectives", "pattern": [{"LOWER": "boolean"}], "id": "algorithms-bias"}
392
- {"label": "adjectives", "pattern": [{"LOWER": "bounded"}], "id": "algorithms-bias"}
393
- {"label": "adjectives", "pattern": [{"LOWER": "brute force"}], "id": "algorithms-bias"}
394
- {"label": "adjectives", "pattern": [{"LOWER": "commutative"}], "id": "algorithms-bias"}
395
- {"label": "adjectives", "pattern": [{"LOWER": "complete"}], "id": "algorithms-bias"}
396
- {"label": "adjectives", "pattern": [{"LOWER": "concave"}], "id": "algorithms-bias"}
397
- {"label": "adjectives", "pattern": [{"LOWER": "concurrent"}], "id": "algorithms-bias"}
398
- {"label": "adjectives", "pattern": [{"LOWER": "connected"}], "id": "algorithms-bias"}
399
- {"label": "adjectives", "pattern": [{"LOWER": "constant"}], "id": "algorithms-bias"}
400
- {"label": "adjectives", "pattern": [{"LOWER": "counting"}], "id": "algorithms-bias"}
401
- {"label": "adjectives", "pattern": [{"LOWER": "covering"}], "id": "algorithms-bias"}
402
- {"label": "adjectives", "pattern": [{"LOWER": "cyclic"}], "id": "algorithms-bias"}
403
- {"label": "adjectives", "pattern": [{"LOWER": "decidable"}], "id": "algorithms-bias"}
404
- {"label": "adjectives", "pattern": [{"LOWER": "descent"}], "id": "algorithms-bias"}
405
- {"label": "adjectives", "pattern": [{"LOWER": "deterministic"}], "id": "algorithms-bias"}
406
- {"label": "adjectives", "pattern": [{"LOWER": "dichotomic"}], "id": "algorithms-bias"}
407
- {"label": "adjectives", "pattern": [{"LOWER": "dyadic"}], "id": "algorithms-bias"}
408
- {"label": "adjectives", "pattern": [{"LOWER": "dynamic"}], "id": "algorithms-bias"}
409
- {"label": "adjectives", "pattern": [{"LOWER": "exact"}], "id": "algorithms-bias"}
410
- {"label": "adjectives", "pattern": [{"LOWER": "exhaustive"}], "id": "algorithms-bias"}
411
- {"label": "adjectives", "pattern": [{"LOWER": "exponential"}], "id": "algorithms-bias"}
412
- {"label": "adjectives", "pattern": [{"LOWER": "extended"}], "id": "algorithms-bias"}
413
- {"label": "adjectives", "pattern": [{"LOWER": "external"}], "id": "algorithms-bias"}
414
- {"label": "adjectives", "pattern": [{"LOWER": "extremal"}], "id": "algorithms-bias"}
415
- {"label": "adjectives", "pattern": [{"LOWER": "factorial"}], "id": "algorithms-bias"}
416
- {"label": "adjectives", "pattern": [{"LOWER": "feasible"}], "id": "algorithms-bias"}
417
- {"label": "adjectives", "pattern": [{"LOWER": "finite"}], "id": "algorithms-bias"}
418
- {"label": "adjectives", "pattern": [{"LOWER": "fixed"}], "id": "algorithms-bias"}
419
- {"label": "adjectives", "pattern": [{"LOWER": "formal"}], "id": "algorithms-bias"}
420
- {"label": "adjectives", "pattern": [{"LOWER": "forward"}], "id": "algorithms-bias"}
421
- {"label": "adjectives", "pattern": [{"LOWER": "free"}], "id": "algorithms-bias"}
422
- {"label": "adjectives", "pattern": [{"LOWER": "greedy"}], "id": "algorithms-bias"}
423
- {"label": "adjectives", "pattern": [{"LOWER": "hidden"}], "id": "algorithms-bias"}
424
- {"label": "adjectives", "pattern": [{"LOWER": "inclusive"}], "id": "algorithms-bias"}
425
- {"label": "adjectives", "pattern": [{"LOWER": "internal"}], "id": "algorithms-bias"}
426
- {"label": "adjectives", "pattern": [{"LOWER": "intractable"}], "id": "algorithms-bias"}
427
- {"label": "adjectives", "pattern": [{"LOWER": "inverse"}], "id": "algorithms-bias"}
428
- {"label": "adjectives", "pattern": [{"LOWER": "inverted"}], "id": "algorithms-bias"}
429
- {"label": "adjectives", "pattern": [{"LOWER": "isomorphic"}], "id": "algorithms-bias"}
430
- {"label": "adjectives", "pattern": [{"LOWER": "linear"}], "id": "algorithms-bias"}
431
- {"label": "adjectives", "pattern": [{"LOWER": "local"}], "id": "algorithms-bias"}
432
- {"label": "adjectives", "pattern": [{"LOWER": "lower"}], "id": "algorithms-bias"}
433
- {"label": "adjectives", "pattern": [{"LOWER": "matching"}], "id": "algorithms-bias"}
434
- {"label": "adjectives", "pattern": [{"LOWER": "maximum"}], "id": "algorithms-bias"}
435
- {"label": "adjectives", "pattern": [{"LOWER": "mean"}], "id": "algorithms-bias"}
436
- {"label": "adjectives", "pattern": [{"LOWER": "median"}], "id": "algorithms-bias"}
437
- {"label": "adjectives", "pattern": [{"LOWER": "minimum"}], "id": "algorithms-bias"}
438
- {"label": "adjectives", "pattern": [{"LOWER": "mode"}], "id": "algorithms-bias"}
439
- {"label": "adjectives", "pattern": [{"LOWER": "naive"}], "id": "algorithms-bias"}
440
- {"label": "adjectives", "pattern": [{"LOWER": "nearest"}], "id": "algorithms-bias"}
441
- {"label": "adjectives", "pattern": [{"LOWER": "nondeterministic"}], "id": "algorithms-bias"}
442
- {"label": "adjectives", "pattern": [{"LOWER": "null"}], "id": "algorithms-bias"}
443
- {"label": "adjectives", "pattern": [{"LOWER": "nullary"}], "id": "algorithms-bias"}
444
- {"label": "adjectives", "pattern": [{"LOWER": "objective"}], "id": "algorithms-bias"}
445
- {"label": "adjectives", "pattern": [{"LOWER": "offline"}], "id": "algorithms-bias"}
446
- {"label": "adjectives", "pattern": [{"LOWER": "online"}], "id": "algorithms-bias"}
447
- {"label": "adjectives", "pattern": [{"LOWER": "optimal"}], "id": "algorithms-bias"}
448
- {"label": "adjectives", "pattern": [{"LOWER": "ordered"}], "id": "algorithms-bias"}
449
- {"label": "adjectives", "pattern": [{"LOWER": "oriented"}], "id": "algorithms-bias"}
450
- {"label": "adjectives", "pattern": [{"LOWER": "orthogonal"}], "id": "algorithms-bias"}
451
- {"label": "adjectives", "pattern": [{"LOWER": "oscillating"}], "id": "algorithms-bias"}
452
- {"label": "adjectives", "pattern": [{"LOWER": "parallel"}], "id": "algorithms-bias"}
453
- {"label": "adjectives", "pattern": [{"LOWER": "partial"}], "id": "algorithms-bias"}
454
- {"label": "adjectives", "pattern": [{"LOWER": "perfect"}], "id": "algorithms-bias"}
455
- {"label": "adjectives", "pattern": [{"LOWER": "persistent"}], "id": "algorithms-bias"}
456
- {"label": "adjectives", "pattern": [{"LOWER": "planar"}], "id": "algorithms-bias"}
457
- {"label": "adjectives", "pattern": [{"LOWER": "polynomial"}], "id": "algorithms-bias"}
458
- {"label": "adjectives", "pattern": [{"LOWER": "proper"}], "id": "algorithms-bias"}
459
- {"label": "adjectives", "pattern": [{"LOWER": "quadratic"}], "id": "algorithms-bias"}
460
- {"label": "adjectives", "pattern": [{"LOWER": "ragged"}], "id": "algorithms-bias"}
461
- {"label": "adjectives", "pattern": [{"LOWER": "random"}], "id": "algorithms-bias"}
462
- {"label": "adjectives", "pattern": [{"LOWER": "randomized"}], "id": "algorithms-bias"}
463
- {"label": "adjectives", "pattern": [{"LOWER": "rectilinear"}], "id": "algorithms-bias"}
464
- {"label": "adjectives", "pattern": [{"LOWER": "recursive"}], "id": "algorithms-bias"}
465
- {"label": "adjectives", "pattern": [{"LOWER": "reduced"}], "id": "algorithms-bias"}
466
- {"label": "adjectives", "pattern": [{"LOWER": "relaxed"}], "id": "algorithms-bias"}
467
- {"label": "adjectives", "pattern": [{"LOWER": "shortest"}], "id": "algorithms-bias"}
468
- {"label": "adjectives", "pattern": [{"LOWER": "simple"}], "id": "algorithms-bias"}
469
- {"label": "adjectives", "pattern": [{"LOWER": "sparse"}], "id": "algorithms-bias"}
470
- {"label": "adjectives", "pattern": [{"LOWER": "spatial"}], "id": "algorithms-bias"}
471
- {"label": "adjectives", "pattern": [{"LOWER": "square"}], "id": "algorithms-bias"}
472
- {"label": "adjectives", "pattern": [{"LOWER": "stable"}], "id": "algorithms-bias"}
473
- {"label": "adjectives", "pattern": [{"LOWER": "swarm"}], "id": "algorithms-bias"}
474
- {"label": "adjectives", "pattern": [{"LOWER": "symmetric"}], "id": "algorithms-bias"}
475
- {"label": "adjectives", "pattern": [{"LOWER": "terminal"}], "id": "algorithms-bias"}
476
- {"label": "adjectives", "pattern": [{"LOWER": "ternary"}], "id": "algorithms-bias"}
477
- {"label": "adjectives", "pattern": [{"LOWER": "threaded"}], "id": "algorithms-bias"}
478
- {"label": "adjectives", "pattern": [{"LOWER": "tractable"}], "id": "algorithms-bias"}
479
- {"label": "adjectives", "pattern": [{"LOWER": "unary"}], "id": "algorithms-bias"}
480
- {"label": "adjectives", "pattern": [{"LOWER": "undecidable"}], "id": "algorithms-bias"}
481
- {"label": "adjectives", "pattern": [{"LOWER": "undirected"}], "id": "algorithms-bias"}
482
- {"label": "adjectives", "pattern": [{"LOWER": "uniform"}], "id": "algorithms-bias"}
483
- {"label": "adjectives", "pattern": [{"LOWER": "universal"}], "id": "algorithms-bias"}
484
- {"label": "adjectives", "pattern": [{"LOWER": "unsolvable"}], "id": "algorithms-bias"}
485
- {"label": "adjectives", "pattern": [{"LOWER": "unsorted"}], "id": "algorithms-bias"}
486
- {"label": "adjectives", "pattern": [{"LOWER": "visible"}], "id": "algorithms-bias"}
487
- {"label": "adjectives", "pattern": [{"LOWER": "weighted"}], "id": "algorithms-bias"}
488
- {"label": "adjectives", "pattern": [{"LOWER": "acute"}], "id": "geometry-bias"}
489
- {"label": "adjectives", "pattern": [{"LOWER": "adjacent"}], "id": "geometry-bias"}
490
- {"label": "adjectives", "pattern": [{"LOWER": "alternate"}], "id": "geometry-bias"}
491
- {"label": "adjectives", "pattern": [{"LOWER": "central"}], "id": "geometry-bias"}
492
- {"label": "adjectives", "pattern": [{"LOWER": "coincident"}], "id": "geometry-bias"}
493
- {"label": "adjectives", "pattern": [{"LOWER": "collinear"}], "id": "geometry-bias"}
494
- {"label": "adjectives", "pattern": [{"LOWER": "composite"}], "id": "geometry-bias"}
495
- {"label": "adjectives", "pattern": [{"LOWER": "concave"}], "id": "geometry-bias"}
496
- {"label": "adjectives", "pattern": [{"LOWER": "concentric"}], "id": "geometry-bias"}
497
- {"label": "adjectives", "pattern": [{"LOWER": "congruent"}], "id": "geometry-bias"}
498
- {"label": "adjectives", "pattern": [{"LOWER": "convex"}], "id": "geometry-bias"}
499
- {"label": "adjectives", "pattern": [{"LOWER": "coplanar"}], "id": "geometry-bias"}
500
- {"label": "adjectives", "pattern": [{"LOWER": "diagonal"}], "id": "geometry-bias"}
501
- {"label": "adjectives", "pattern": [{"LOWER": "distinct"}], "id": "geometry-bias"}
502
- {"label": "adjectives", "pattern": [{"LOWER": "equidistant"}], "id": "geometry-bias"}
503
- {"label": "adjectives", "pattern": [{"LOWER": "equilateral"}], "id": "geometry-bias"}
504
- {"label": "adjectives", "pattern": [{"LOWER": "fixed"}], "id": "geometry-bias"}
505
- {"label": "adjectives", "pattern": [{"LOWER": "horizontal"}], "id": "geometry-bias"}
506
- {"label": "adjectives", "pattern": [{"LOWER": "inscribed"}], "id": "geometry-bias"}
507
- {"label": "adjectives", "pattern": [{"LOWER": "interior"}], "id": "geometry-bias"}
508
- {"label": "adjectives", "pattern": [{"LOWER": "irregular"}], "id": "geometry-bias"}
509
- {"label": "adjectives", "pattern": [{"LOWER": "linear"}], "id": "geometry-bias"}
510
- {"label": "adjectives", "pattern": [{"LOWER": "oblique"}], "id": "geometry-bias"}
511
- {"label": "adjectives", "pattern": [{"LOWER": "obtuse"}], "id": "geometry-bias"}
512
- {"label": "adjectives", "pattern": [{"LOWER": "parallel"}], "id": "geometry-bias"}
513
- {"label": "adjectives", "pattern": [{"LOWER": "perpendicular"}], "id": "geometry-bias"}
514
- {"label": "adjectives", "pattern": [{"LOWER": "regular"}], "id": "geometry-bias"}
515
- {"label": "adjectives", "pattern": [{"LOWER": "right"}], "id": "geometry-bias"}
516
- {"label": "adjectives", "pattern": [{"LOWER": "similar"}], "id": "geometry-bias"}
517
- {"label": "adjectives", "pattern": [{"LOWER": "vertical"}], "id": "geometry-bias"}
518
- {"label": "adjectives", "pattern": [{"LOWER": "brass"}], "id": "materials-bias"}
519
- {"label": "adjectives", "pattern": [{"LOWER": "chalky"}], "id": "materials-bias"}
520
- {"label": "adjectives", "pattern": [{"LOWER": "concrete"}], "id": "materials-bias"}
521
- {"label": "adjectives", "pattern": [{"LOWER": "felt"}], "id": "materials-bias"}
522
- {"label": "adjectives", "pattern": [{"LOWER": "gilded"}], "id": "materials-bias"}
523
- {"label": "adjectives", "pattern": [{"LOWER": "glass"}], "id": "materials-bias"}
524
- {"label": "adjectives", "pattern": [{"LOWER": "golden"}], "id": "materials-bias"}
525
- {"label": "adjectives", "pattern": [{"LOWER": "iron"}], "id": "materials-bias"}
526
- {"label": "adjectives", "pattern": [{"LOWER": "leather"}], "id": "materials-bias"}
527
- {"label": "adjectives", "pattern": [{"LOWER": "metal"}], "id": "materials-bias"}
528
- {"label": "adjectives", "pattern": [{"LOWER": "metallic"}], "id": "materials-bias"}
529
- {"label": "adjectives", "pattern": [{"LOWER": "oily"}], "id": "materials-bias"}
530
- {"label": "adjectives", "pattern": [{"LOWER": "paper"}], "id": "materials-bias"}
531
- {"label": "adjectives", "pattern": [{"LOWER": "plastic"}], "id": "materials-bias"}
532
- {"label": "adjectives", "pattern": [{"LOWER": "silver"}], "id": "materials-bias"}
533
- {"label": "adjectives", "pattern": [{"LOWER": "steel"}], "id": "materials-bias"}
534
- {"label": "adjectives", "pattern": [{"LOWER": "stone"}], "id": "materials-bias"}
535
- {"label": "adjectives", "pattern": [{"LOWER": "watery"}], "id": "materials-bias"}
536
- {"label": "adjectives", "pattern": [{"LOWER": "wicker"}], "id": "materials-bias"}
537
- {"label": "adjectives", "pattern": [{"LOWER": "wood"}], "id": "materials-bias"}
538
- {"label": "adjectives", "pattern": [{"LOWER": "wooden"}], "id": "materials-bias"}
539
- {"label": "adjectives", "pattern": [{"LOWER": "woolen"}], "id": "materials-bias"}
540
- {"label": "adjectives", "pattern": [{"LOWER": "beveled"}], "id": "construction-bias"}
541
- {"label": "adjectives", "pattern": [{"LOWER": "chamfered"}], "id": "construction-bias"}
542
- {"label": "adjectives", "pattern": [{"LOWER": "coped"}], "id": "construction-bias"}
543
- {"label": "adjectives", "pattern": [{"LOWER": "flashed"}], "id": "construction-bias"}
544
- {"label": "adjectives", "pattern": [{"LOWER": "flush"}], "id": "construction-bias"}
545
- {"label": "adjectives", "pattern": [{"LOWER": "inflammable"}], "id": "construction-bias"}
546
- {"label": "adjectives", "pattern": [{"LOWER": "insulated"}], "id": "construction-bias"}
547
- {"label": "adjectives", "pattern": [{"LOWER": "isometric"}], "id": "construction-bias"}
548
- {"label": "adjectives", "pattern": [{"LOWER": "joint"}], "id": "construction-bias"}
549
- {"label": "adjectives", "pattern": [{"LOWER": "knurled"}], "id": "construction-bias"}
550
- {"label": "adjectives", "pattern": [{"LOWER": "laminated"}], "id": "construction-bias"}
551
- {"label": "adjectives", "pattern": [{"LOWER": "level"}], "id": "construction-bias"}
552
- {"label": "adjectives", "pattern": [{"LOWER": "plumb"}], "id": "construction-bias"}
553
- {"label": "adjectives", "pattern": [{"LOWER": "radial"}], "id": "construction-bias"}
554
- {"label": "adjectives", "pattern": [{"LOWER": "rigid"}], "id": "construction-bias"}
555
- {"label": "adjectives", "pattern": [{"LOWER": "soluble"}], "id": "construction-bias"}
556
- {"label": "adjectives", "pattern": [{"LOWER": "tempered"}], "id": "construction-bias"}
557
- {"label": "adjectives", "pattern": [{"LOWER": "warped"}], "id": "construction-bias"}
558
- {"label": "adjectives", "pattern": [{"LOWER": "adagio"}], "id": "music_theory-bias"}
559
- {"label": "adjectives", "pattern": [{"LOWER": "allegro"}], "id": "music_theory-bias"}
560
- {"label": "adjectives", "pattern": [{"LOWER": "andante"}], "id": "music_theory-bias"}
561
- {"label": "adjectives", "pattern": [{"LOWER": "animato"}], "id": "music_theory-bias"}
562
- {"label": "adjectives", "pattern": [{"LOWER": "espressivo"}], "id": "music_theory-bias"}
563
- {"label": "adjectives", "pattern": [{"LOWER": "grandioso"}], "id": "music_theory-bias"}
564
- {"label": "adjectives", "pattern": [{"LOWER": "grave"}], "id": "music_theory-bias"}
565
- {"label": "adjectives", "pattern": [{"LOWER": "largo"}], "id": "music_theory-bias"}
566
- {"label": "adjectives", "pattern": [{"LOWER": "legato"}], "id": "music_theory-bias"}
567
- {"label": "adjectives", "pattern": [{"LOWER": "libretto"}], "id": "music_theory-bias"}
568
- {"label": "adjectives", "pattern": [{"LOWER": "moderato"}], "id": "music_theory-bias"}
569
- {"label": "adjectives", "pattern": [{"LOWER": "molto"}], "id": "music_theory-bias"}
570
- {"label": "adjectives", "pattern": [{"LOWER": "pizzicato"}], "id": "music_theory-bias"}
571
- {"label": "adjectives", "pattern": [{"LOWER": "presto"}], "id": "music_theory-bias"}
572
- {"label": "adjectives", "pattern": [{"LOWER": "staccato"}], "id": "music_theory-bias"}
573
- {"label": "adjectives", "pattern": [{"LOWER": "vibrato"}], "id": "music_theory-bias"}
574
- {"label": "adjectives", "pattern": [{"LOWER": "blazing"}], "id": "appearance-bias"}
575
- {"label": "adjectives", "pattern": [{"LOWER": "bright"}], "id": "appearance-bias"}
576
- {"label": "adjectives", "pattern": [{"LOWER": "brilliant"}], "id": "appearance-bias"}
577
- {"label": "adjectives", "pattern": [{"LOWER": "burning"}], "id": "appearance-bias"}
578
- {"label": "adjectives", "pattern": [{"LOWER": "clean"}], "id": "appearance-bias"}
579
- {"label": "adjectives", "pattern": [{"LOWER": "colorful"}], "id": "appearance-bias"}
580
- {"label": "adjectives", "pattern": [{"LOWER": "dark"}], "id": "appearance-bias"}
581
- {"label": "adjectives", "pattern": [{"LOWER": "drab"}], "id": "appearance-bias"}
582
- {"label": "adjectives", "pattern": [{"LOWER": "dull"}], "id": "appearance-bias"}
583
- {"label": "adjectives", "pattern": [{"LOWER": "faded"}], "id": "appearance-bias"}
584
- {"label": "adjectives", "pattern": [{"LOWER": "flat"}], "id": "appearance-bias"}
585
- {"label": "adjectives", "pattern": [{"LOWER": "glossy"}], "id": "appearance-bias"}
586
- {"label": "adjectives", "pattern": [{"LOWER": "glowing"}], "id": "appearance-bias"}
587
- {"label": "adjectives", "pattern": [{"LOWER": "light"}], "id": "appearance-bias"}
588
- {"label": "adjectives", "pattern": [{"LOWER": "matte"}], "id": "appearance-bias"}
589
- {"label": "adjectives", "pattern": [{"LOWER": "muted"}], "id": "appearance-bias"}
590
- {"label": "adjectives", "pattern": [{"LOWER": "pale"}], "id": "appearance-bias"}
591
- {"label": "adjectives", "pattern": [{"LOWER": "pallid"}], "id": "appearance-bias"}
592
- {"label": "adjectives", "pattern": [{"LOWER": "radiant"}], "id": "appearance-bias"}
593
- {"label": "adjectives", "pattern": [{"LOWER": "shiny"}], "id": "appearance-bias"}
594
- {"label": "adjectives", "pattern": [{"LOWER": "sleek"}], "id": "appearance-bias"}
595
- {"label": "adjectives", "pattern": [{"LOWER": "sunny"}], "id": "appearance-bias"}
596
- {"label": "adjectives", "pattern": [{"LOWER": "vibrant"}], "id": "appearance-bias"}
597
- {"label": "adjectives", "pattern": [{"LOWER": "vivid"}], "id": "appearance-bias"}
598
- {"label": "adjectives", "pattern": [{"LOWER": "wan"}], "id": "appearance-bias"}
599
- {"label": "adjectives", "pattern": [{"LOWER": "weathered"}], "id": "appearance-bias"}
600
- {"label": "adjectives", "pattern": [{"LOWER": "worn"}], "id": "appearance-bias"}
601
- {"label": "adjectives", "pattern": [{"LOWER": "descriptive"}], "id": "linguistics-bias"}
602
- {"label": "adjectives", "pattern": [{"LOWER": "diachronic"}], "id": "linguistics-bias"}
603
- {"label": "adjectives", "pattern": [{"LOWER": "figurative"}], "id": "linguistics-bias"}
604
- {"label": "adjectives", "pattern": [{"LOWER": "generative"}], "id": "linguistics-bias"}
605
- {"label": "adjectives", "pattern": [{"LOWER": "marked"}], "id": "linguistics-bias"}
606
- {"label": "adjectives", "pattern": [{"LOWER": "regular"}], "id": "linguistics-bias"}
607
- {"label": "adjectives", "pattern": [{"LOWER": "synchronic"}], "id": "linguistics-bias"}
608
- {"label": "adjectives", "pattern": [{"LOWER": "taxonomic"}], "id": "linguistics-bias"}
609
- {"label": "adjectives", "pattern": [{"LOWER": "unproductive"}], "id": "linguistics-bias"}
610
- {"label": "adjectives", "pattern": [{"LOWER": "afraid"}], "id": "emotions-bias"}
611
- {"label": "adjectives", "pattern": [{"LOWER": "angry"}], "id": "emotions-bias"}
612
- {"label": "adjectives", "pattern": [{"LOWER": "calm"}], "id": "emotions-bias"}
613
- {"label": "adjectives", "pattern": [{"LOWER": "cheerful"}], "id": "emotions-bias"}
614
- {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "emotions-bias"}
615
- {"label": "adjectives", "pattern": [{"LOWER": "crabby"}], "id": "emotions-bias"}
616
- {"label": "adjectives", "pattern": [{"LOWER": "crazy"}], "id": "emotions-bias"}
617
- {"label": "adjectives", "pattern": [{"LOWER": "cross"}], "id": "emotions-bias"}
618
- {"label": "adjectives", "pattern": [{"LOWER": "excited"}], "id": "emotions-bias"}
619
- {"label": "adjectives", "pattern": [{"LOWER": "frigid"}], "id": "emotions-bias"}
620
- {"label": "adjectives", "pattern": [{"LOWER": "furious"}], "id": "emotions-bias"}
621
- {"label": "adjectives", "pattern": [{"LOWER": "glad"}], "id": "emotions-bias"}
622
- {"label": "adjectives", "pattern": [{"LOWER": "glum"}], "id": "emotions-bias"}
623
- {"label": "adjectives", "pattern": [{"LOWER": "happy"}], "id": "emotions-bias"}
624
- {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "emotions-bias"}
625
- {"label": "adjectives", "pattern": [{"LOWER": "jolly"}], "id": "emotions-bias"}
626
- {"label": "adjectives", "pattern": [{"LOWER": "jovial"}], "id": "emotions-bias"}
627
- {"label": "adjectives", "pattern": [{"LOWER": "kind"}], "id": "emotions-bias"}
628
- {"label": "adjectives", "pattern": [{"LOWER": "lively"}], "id": "emotions-bias"}
629
- {"label": "adjectives", "pattern": [{"LOWER": "livid"}], "id": "emotions-bias"}
630
- {"label": "adjectives", "pattern": [{"LOWER": "mad"}], "id": "emotions-bias"}
631
- {"label": "adjectives", "pattern": [{"LOWER": "ornery"}], "id": "emotions-bias"}
632
- {"label": "adjectives", "pattern": [{"LOWER": "rosy"}], "id": "emotions-bias"}
633
- {"label": "adjectives", "pattern": [{"LOWER": "sad"}], "id": "emotions-bias"}
634
- {"label": "adjectives", "pattern": [{"LOWER": "scared"}], "id": "emotions-bias"}
635
- {"label": "adjectives", "pattern": [{"LOWER": "seething"}], "id": "emotions-bias"}
636
- {"label": "adjectives", "pattern": [{"LOWER": "shy"}], "id": "emotions-bias"}
637
- {"label": "adjectives", "pattern": [{"LOWER": "sunny"}], "id": "emotions-bias"}
638
- {"label": "adjectives", "pattern": [{"LOWER": "tense"}], "id": "emotions-bias"}
639
- {"label": "adjectives", "pattern": [{"LOWER": "tranquil"}], "id": "emotions-bias"}
640
- {"label": "adjectives", "pattern": [{"LOWER": "upbeat"}], "id": "emotions-bias"}
641
- {"label": "adjectives", "pattern": [{"LOWER": "wary"}], "id": "emotions-bias"}
642
- {"label": "adjectives", "pattern": [{"LOWER": "weary"}], "id": "emotions-bias"}
643
- {"label": "adjectives", "pattern": [{"LOWER": "worried"}], "id": "emotions-bias"}
644
- {"label": "adjectives", "pattern": [{"LOWER": "advanced"}], "id": "age-bias"}
645
- {"label": "adjectives", "pattern": [{"LOWER": "aged"}], "id": "age-bias"}
646
- {"label": "adjectives", "pattern": [{"LOWER": "ancient"}], "id": "age-bias"}
647
- {"label": "adjectives", "pattern": [{"LOWER": "antique"}], "id": "age-bias"}
648
- {"label": "adjectives", "pattern": [{"LOWER": "archaic"}], "id": "age-bias"}
649
- {"label": "adjectives", "pattern": [{"LOWER": "contemporary"}], "id": "age-bias"}
650
- {"label": "adjectives", "pattern": [{"LOWER": "current"}], "id": "age-bias"}
651
- {"label": "adjectives", "pattern": [{"LOWER": "frayed"}], "id": "age-bias"}
652
- {"label": "adjectives", "pattern": [{"LOWER": "fresh"}], "id": "age-bias"}
653
- {"label": "adjectives", "pattern": [{"LOWER": "grizzled"}], "id": "age-bias"}
654
- {"label": "adjectives", "pattern": [{"LOWER": "hoary"}], "id": "age-bias"}
655
- {"label": "adjectives", "pattern": [{"LOWER": "immature"}], "id": "age-bias"}
656
- {"label": "adjectives", "pattern": [{"LOWER": "juvenile"}], "id": "age-bias"}
657
- {"label": "adjectives", "pattern": [{"LOWER": "mature"}], "id": "age-bias"}
658
- {"label": "adjectives", "pattern": [{"LOWER": "modern"}], "id": "age-bias"}
659
- {"label": "adjectives", "pattern": [{"LOWER": "new"}], "id": "age-bias"}
660
- {"label": "adjectives", "pattern": [{"LOWER": "novel"}], "id": "age-bias"}
661
- {"label": "adjectives", "pattern": [{"LOWER": "obsolete"}], "id": "age-bias"}
662
- {"label": "adjectives", "pattern": [{"LOWER": "old"}], "id": "age-bias"}
663
- {"label": "adjectives", "pattern": [{"LOWER": "primordial"}], "id": "age-bias"}
664
- {"label": "adjectives", "pattern": [{"LOWER": "ragged"}], "id": "age-bias"}
665
- {"label": "adjectives", "pattern": [{"LOWER": "raw"}], "id": "age-bias"}
666
- {"label": "adjectives", "pattern": [{"LOWER": "recent"}], "id": "age-bias"}
667
- {"label": "adjectives", "pattern": [{"LOWER": "senile"}], "id": "age-bias"}
668
- {"label": "adjectives", "pattern": [{"LOWER": "shabby"}], "id": "age-bias"}
669
- {"label": "adjectives", "pattern": [{"LOWER": "stale"}], "id": "age-bias"}
670
- {"label": "adjectives", "pattern": [{"LOWER": "tattered"}], "id": "age-bias"}
671
- {"label": "adjectives", "pattern": [{"LOWER": "threadbare"}], "id": "age-bias"}
672
- {"label": "adjectives", "pattern": [{"LOWER": "trite"}], "id": "age-bias"}
673
- {"label": "adjectives", "pattern": [{"LOWER": "vintage"}], "id": "age-bias"}
674
- {"label": "adjectives", "pattern": [{"LOWER": "worn"}], "id": "age-bias"}
675
- {"label": "adjectives", "pattern": [{"LOWER": "young"}], "id": "age-bias"}
676
- {"label": "adjectives", "pattern": [{"LOWER": "accepting"}], "id": "character-bias"}
677
- {"label": "adjectives", "pattern": [{"LOWER": "adventurous"}], "id": "character-bias"}
678
- {"label": "adjectives", "pattern": [{"LOWER": "affable"}], "id": "character-bias"}
679
- {"label": "adjectives", "pattern": [{"LOWER": "ambitious"}], "id": "character-bias"}
680
- {"label": "adjectives", "pattern": [{"LOWER": "amiable"}], "id": "character-bias"}
681
- {"label": "adjectives", "pattern": [{"LOWER": "amicable"}], "id": "character-bias"}
682
- {"label": "adjectives", "pattern": [{"LOWER": "annoying"}], "id": "character-bias"}
683
- {"label": "adjectives", "pattern": [{"LOWER": "bold"}], "id": "character-bias"}
684
- {"label": "adjectives", "pattern": [{"LOWER": "brave"}], "id": "character-bias"}
685
- {"label": "adjectives", "pattern": [{"LOWER": "bright"}], "id": "character-bias"}
686
- {"label": "adjectives", "pattern": [{"LOWER": "brutal"}], "id": "character-bias"}
687
- {"label": "adjectives", "pattern": [{"LOWER": "brute"}], "id": "character-bias"}
688
- {"label": "adjectives", "pattern": [{"LOWER": "callous"}], "id": "character-bias"}
689
- {"label": "adjectives", "pattern": [{"LOWER": "calm"}], "id": "character-bias"}
690
- {"label": "adjectives", "pattern": [{"LOWER": "careful"}], "id": "character-bias"}
691
- {"label": "adjectives", "pattern": [{"LOWER": "cautious"}], "id": "character-bias"}
692
- {"label": "adjectives", "pattern": [{"LOWER": "charitable"}], "id": "character-bias"}
693
- {"label": "adjectives", "pattern": [{"LOWER": "cheerful"}], "id": "character-bias"}
694
- {"label": "adjectives", "pattern": [{"LOWER": "clever"}], "id": "character-bias"}
695
- {"label": "adjectives", "pattern": [{"LOWER": "courtly"}], "id": "character-bias"}
696
- {"label": "adjectives", "pattern": [{"LOWER": "creative"}], "id": "character-bias"}
697
- {"label": "adjectives", "pattern": [{"LOWER": "cruel"}], "id": "character-bias"}
698
- {"label": "adjectives", "pattern": [{"LOWER": "curious"}], "id": "character-bias"}
699
- {"label": "adjectives", "pattern": [{"LOWER": "daring"}], "id": "character-bias"}
700
- {"label": "adjectives", "pattern": [{"LOWER": "devout"}], "id": "character-bias"}
701
- {"label": "adjectives", "pattern": [{"LOWER": "eager"}], "id": "character-bias"}
702
- {"label": "adjectives", "pattern": [{"LOWER": "elegant"}], "id": "character-bias"}
703
- {"label": "adjectives", "pattern": [{"LOWER": "energetic"}], "id": "character-bias"}
704
- {"label": "adjectives", "pattern": [{"LOWER": "excited"}], "id": "character-bias"}
705
- {"label": "adjectives", "pattern": [{"LOWER": "ferocious"}], "id": "character-bias"}
706
- {"label": "adjectives", "pattern": [{"LOWER": "forgiving"}], "id": "character-bias"}
707
- {"label": "adjectives", "pattern": [{"LOWER": "free"}], "id": "character-bias"}
708
- {"label": "adjectives", "pattern": [{"LOWER": "friendly"}], "id": "character-bias"}
709
- {"label": "adjectives", "pattern": [{"LOWER": "funny"}], "id": "character-bias"}
710
- {"label": "adjectives", "pattern": [{"LOWER": "generous"}], "id": "character-bias"}
711
- {"label": "adjectives", "pattern": [{"LOWER": "genteel"}], "id": "character-bias"}
712
- {"label": "adjectives", "pattern": [{"LOWER": "gentle"}], "id": "character-bias"}
713
- {"label": "adjectives", "pattern": [{"LOWER": "graceful"}], "id": "character-bias"}
714
- {"label": "adjectives", "pattern": [{"LOWER": "grim"}], "id": "character-bias"}
715
- {"label": "adjectives", "pattern": [{"LOWER": "grouchy"}], "id": "character-bias"}
716
- {"label": "adjectives", "pattern": [{"LOWER": "happy"}], "id": "character-bias"}
717
- {"label": "adjectives", "pattern": [{"LOWER": "heartless"}], "id": "character-bias"}
718
- {"label": "adjectives", "pattern": [{"LOWER": "helpful"}], "id": "character-bias"}
719
- {"label": "adjectives", "pattern": [{"LOWER": "honest"}], "id": "character-bias"}
720
- {"label": "adjectives", "pattern": [{"LOWER": "humane"}], "id": "character-bias"}
721
- {"label": "adjectives", "pattern": [{"LOWER": "humble"}], "id": "character-bias"}
722
- {"label": "adjectives", "pattern": [{"LOWER": "impulsive"}], "id": "character-bias"}
723
- {"label": "adjectives", "pattern": [{"LOWER": "independent"}], "id": "character-bias"}
724
- {"label": "adjectives", "pattern": [{"LOWER": "indulgent"}], "id": "character-bias"}
725
- {"label": "adjectives", "pattern": [{"LOWER": "intense"}], "id": "character-bias"}
726
- {"label": "adjectives", "pattern": [{"LOWER": "inventive"}], "id": "character-bias"}
727
- {"label": "adjectives", "pattern": [{"LOWER": "kind"}], "id": "character-bias"}
728
- {"label": "adjectives", "pattern": [{"LOWER": "lazy"}], "id": "character-bias"}
729
- {"label": "adjectives", "pattern": [{"LOWER": "lenient"}], "id": "character-bias"}
730
- {"label": "adjectives", "pattern": [{"LOWER": "loyal"}], "id": "character-bias"}
731
- {"label": "adjectives", "pattern": [{"LOWER": "meek"}], "id": "character-bias"}
732
- {"label": "adjectives", "pattern": [{"LOWER": "merciless"}], "id": "character-bias"}
733
- {"label": "adjectives", "pattern": [{"LOWER": "merry"}], "id": "character-bias"}
734
- {"label": "adjectives", "pattern": [{"LOWER": "messy"}], "id": "character-bias"}
735
- {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "character-bias"}
736
- {"label": "adjectives", "pattern": [{"LOWER": "neat"}], "id": "character-bias"}
737
- {"label": "adjectives", "pattern": [{"LOWER": "nervous"}], "id": "character-bias"}
738
- {"label": "adjectives", "pattern": [{"LOWER": "obliging"}], "id": "character-bias"}
739
- {"label": "adjectives", "pattern": [{"LOWER": "obnoxious"}], "id": "character-bias"}
740
- {"label": "adjectives", "pattern": [{"LOWER": "odious"}], "id": "character-bias"}
741
- {"label": "adjectives", "pattern": [{"LOWER": "patient"}], "id": "character-bias"}
742
- {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "character-bias"}
743
- {"label": "adjectives", "pattern": [{"LOWER": "pleasant"}], "id": "character-bias"}
744
- {"label": "adjectives", "pattern": [{"LOWER": "polite"}], "id": "character-bias"}
745
- {"label": "adjectives", "pattern": [{"LOWER": "proper"}], "id": "character-bias"}
746
- {"label": "adjectives", "pattern": [{"LOWER": "proud"}], "id": "character-bias"}
747
- {"label": "adjectives", "pattern": [{"LOWER": "quick"}], "id": "character-bias"}
748
- {"label": "adjectives", "pattern": [{"LOWER": "quiet"}], "id": "character-bias"}
749
- {"label": "adjectives", "pattern": [{"LOWER": "refined"}], "id": "character-bias"}
750
- {"label": "adjectives", "pattern": [{"LOWER": "relaxed"}], "id": "character-bias"}
751
- {"label": "adjectives", "pattern": [{"LOWER": "religious"}], "id": "character-bias"}
752
- {"label": "adjectives", "pattern": [{"LOWER": "respectful"}], "id": "character-bias"}
753
- {"label": "adjectives", "pattern": [{"LOWER": "rude"}], "id": "character-bias"}
754
- {"label": "adjectives", "pattern": [{"LOWER": "savage"}], "id": "character-bias"}
755
- {"label": "adjectives", "pattern": [{"LOWER": "selfish"}], "id": "character-bias"}
756
- {"label": "adjectives", "pattern": [{"LOWER": "sensitive"}], "id": "character-bias"}
757
- {"label": "adjectives", "pattern": [{"LOWER": "serious"}], "id": "character-bias"}
758
- {"label": "adjectives", "pattern": [{"LOWER": "shrewd"}], "id": "character-bias"}
759
- {"label": "adjectives", "pattern": [{"LOWER": "silly"}], "id": "character-bias"}
760
- {"label": "adjectives", "pattern": [{"LOWER": "simple"}], "id": "character-bias"}
761
- {"label": "adjectives", "pattern": [{"LOWER": "smart"}], "id": "character-bias"}
762
- {"label": "adjectives", "pattern": [{"LOWER": "soft"}], "id": "character-bias"}
763
- {"label": "adjectives", "pattern": [{"LOWER": "sophisticated"}], "id": "character-bias"}
764
- {"label": "adjectives", "pattern": [{"LOWER": "stern"}], "id": "character-bias"}
765
- {"label": "adjectives", "pattern": [{"LOWER": "strong"}], "id": "character-bias"}
766
- {"label": "adjectives", "pattern": [{"LOWER": "stubborn"}], "id": "character-bias"}
767
- {"label": "adjectives", "pattern": [{"LOWER": "tender"}], "id": "character-bias"}
768
- {"label": "adjectives", "pattern": [{"LOWER": "tense"}], "id": "character-bias"}
769
- {"label": "adjectives", "pattern": [{"LOWER": "timid"}], "id": "character-bias"}
770
- {"label": "adjectives", "pattern": [{"LOWER": "tough"}], "id": "character-bias"}
771
- {"label": "adjectives", "pattern": [{"LOWER": "trusting"}], "id": "character-bias"}
772
- {"label": "adjectives", "pattern": [{"LOWER": "urbane"}], "id": "character-bias"}
773
- {"label": "adjectives", "pattern": [{"LOWER": "vain"}], "id": "character-bias"}
774
- {"label": "adjectives", "pattern": [{"LOWER": "vicious"}], "id": "character-bias"}
775
- {"label": "adjectives", "pattern": [{"LOWER": "violent"}], "id": "character-bias"}
776
- {"label": "adjectives", "pattern": [{"LOWER": "warm"}], "id": "character-bias"}
777
- {"label": "adjectives", "pattern": [{"LOWER": "wise"}], "id": "character-bias"}
778
- {"label": "adjectives", "pattern": [{"LOWER": "witty"}], "id": "character-bias"}
779
- {"label": "adjectives", "pattern": [{"LOWER": "acidic"}], "id": "food-bias"}
780
- {"label": "adjectives", "pattern": [{"LOWER": "baked"}], "id": "food-bias"}
781
- {"label": "adjectives", "pattern": [{"LOWER": "bitter"}], "id": "food-bias"}
782
- {"label": "adjectives", "pattern": [{"LOWER": "bland"}], "id": "food-bias"}
783
- {"label": "adjectives", "pattern": [{"LOWER": "blended"}], "id": "food-bias"}
784
- {"label": "adjectives", "pattern": [{"LOWER": "briny"}], "id": "food-bias"}
785
- {"label": "adjectives", "pattern": [{"LOWER": "buttery"}], "id": "food-bias"}
786
- {"label": "adjectives", "pattern": [{"LOWER": "candied"}], "id": "food-bias"}
787
- {"label": "adjectives", "pattern": [{"LOWER": "cheesy"}], "id": "food-bias"}
788
- {"label": "adjectives", "pattern": [{"LOWER": "chewy"}], "id": "food-bias"}
789
- {"label": "adjectives", "pattern": [{"LOWER": "chocolaty"}], "id": "food-bias"}
790
- {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "food-bias"}
791
- {"label": "adjectives", "pattern": [{"LOWER": "creamy"}], "id": "food-bias"}
792
- {"label": "adjectives", "pattern": [{"LOWER": "crispy"}], "id": "food-bias"}
793
- {"label": "adjectives", "pattern": [{"LOWER": "crunchy"}], "id": "food-bias"}
794
- {"label": "adjectives", "pattern": [{"LOWER": "delicious"}], "id": "food-bias"}
795
- {"label": "adjectives", "pattern": [{"LOWER": "doughy"}], "id": "food-bias"}
796
- {"label": "adjectives", "pattern": [{"LOWER": "dry"}], "id": "food-bias"}
797
- {"label": "adjectives", "pattern": [{"LOWER": "flavorful"}], "id": "food-bias"}
798
- {"label": "adjectives", "pattern": [{"LOWER": "frozen"}], "id": "food-bias"}
799
- {"label": "adjectives", "pattern": [{"LOWER": "golden"}], "id": "food-bias"}
800
- {"label": "adjectives", "pattern": [{"LOWER": "gourmet"}], "id": "food-bias"}
801
- {"label": "adjectives", "pattern": [{"LOWER": "greasy"}], "id": "food-bias"}
802
- {"label": "adjectives", "pattern": [{"LOWER": "grilled"}], "id": "food-bias"}
803
- {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "food-bias"}
804
- {"label": "adjectives", "pattern": [{"LOWER": "intense"}], "id": "food-bias"}
805
- {"label": "adjectives", "pattern": [{"LOWER": "jellied"}], "id": "food-bias"}
806
- {"label": "adjectives", "pattern": [{"LOWER": "juicy"}], "id": "food-bias"}
807
- {"label": "adjectives", "pattern": [{"LOWER": "jumbo"}], "id": "food-bias"}
808
- {"label": "adjectives", "pattern": [{"LOWER": "lean"}], "id": "food-bias"}
809
- {"label": "adjectives", "pattern": [{"LOWER": "marinated"}], "id": "food-bias"}
810
- {"label": "adjectives", "pattern": [{"LOWER": "mashed"}], "id": "food-bias"}
811
- {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "food-bias"}
812
- {"label": "adjectives", "pattern": [{"LOWER": "minty"}], "id": "food-bias"}
813
- {"label": "adjectives", "pattern": [{"LOWER": "nutty"}], "id": "food-bias"}
814
- {"label": "adjectives", "pattern": [{"LOWER": "organic"}], "id": "food-bias"}
815
- {"label": "adjectives", "pattern": [{"LOWER": "piquant"}], "id": "food-bias"}
816
- {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "food-bias"}
817
- {"label": "adjectives", "pattern": [{"LOWER": "poached"}], "id": "food-bias"}
818
- {"label": "adjectives", "pattern": [{"LOWER": "pounded"}], "id": "food-bias"}
819
- {"label": "adjectives", "pattern": [{"LOWER": "prepared"}], "id": "food-bias"}
820
- {"label": "adjectives", "pattern": [{"LOWER": "pureed"}], "id": "food-bias"}
821
- {"label": "adjectives", "pattern": [{"LOWER": "rancid"}], "id": "food-bias"}
822
- {"label": "adjectives", "pattern": [{"LOWER": "rank"}], "id": "food-bias"}
823
- {"label": "adjectives", "pattern": [{"LOWER": "rich"}], "id": "food-bias"}
824
- {"label": "adjectives", "pattern": [{"LOWER": "ripe"}], "id": "food-bias"}
825
- {"label": "adjectives", "pattern": [{"LOWER": "rubbery"}], "id": "food-bias"}
826
- {"label": "adjectives", "pattern": [{"LOWER": "salty"}], "id": "food-bias"}
827
- {"label": "adjectives", "pattern": [{"LOWER": "saucy"}], "id": "food-bias"}
828
- {"label": "adjectives", "pattern": [{"LOWER": "savory"}], "id": "food-bias"}
829
- {"label": "adjectives", "pattern": [{"LOWER": "seasoned"}], "id": "food-bias"}
830
- {"label": "adjectives", "pattern": [{"LOWER": "sharp"}], "id": "food-bias"}
831
- {"label": "adjectives", "pattern": [{"LOWER": "simmered"}], "id": "food-bias"}
832
- {"label": "adjectives", "pattern": [{"LOWER": "smoked"}], "id": "food-bias"}
833
- {"label": "adjectives", "pattern": [{"LOWER": "smoky"}], "id": "food-bias"}
834
- {"label": "adjectives", "pattern": [{"LOWER": "sour"}], "id": "food-bias"}
835
- {"label": "adjectives", "pattern": [{"LOWER": "spicy"}], "id": "food-bias"}
836
- {"label": "adjectives", "pattern": [{"LOWER": "steamed"}], "id": "food-bias"}
837
- {"label": "adjectives", "pattern": [{"LOWER": "sticky"}], "id": "food-bias"}
838
- {"label": "adjectives", "pattern": [{"LOWER": "stringy"}], "id": "food-bias"}
839
- {"label": "adjectives", "pattern": [{"LOWER": "strong"}], "id": "food-bias"}
840
- {"label": "adjectives", "pattern": [{"LOWER": "succulent"}], "id": "food-bias"}
841
- {"label": "adjectives", "pattern": [{"LOWER": "sugary"}], "id": "food-bias"}
842
- {"label": "adjectives", "pattern": [{"LOWER": "sweet"}], "id": "food-bias"}
843
- {"label": "adjectives", "pattern": [{"LOWER": "syrupy"}], "id": "food-bias"}
844
- {"label": "adjectives", "pattern": [{"LOWER": "tangy"}], "id": "food-bias"}
845
- {"label": "adjectives", "pattern": [{"LOWER": "tart"}], "id": "food-bias"}
846
- {"label": "adjectives", "pattern": [{"LOWER": "tender"}], "id": "food-bias"}
847
- {"label": "adjectives", "pattern": [{"LOWER": "toasted"}], "id": "food-bias"}
848
- {"label": "adjectives", "pattern": [{"LOWER": "topped"}], "id": "food-bias"}
849
- {"label": "adjectives", "pattern": [{"LOWER": "tossed"}], "id": "food-bias"}
850
- {"label": "adjectives", "pattern": [{"LOWER": "yummy"}], "id": "food-bias"}
851
- {"label": "adjectives", "pattern": [{"LOWER": "zingy"}], "id": "food-bias"}
852
- {"label": "adjectives", "pattern": [{"LOWER": "braised"}], "id": "food-bias"}
853
- {"label": "adjectives", "pattern": [{"LOWER": "fried"}], "id": "food-bias"}
854
- {"label": "adjectives", "pattern": [{"LOWER": "fermented"}], "id": "food-bias"}
855
- {"label": "adjectives", "pattern": [{"LOWER": "milky"}], "id": "food-bias"}
856
- {"label": "adjectives", "pattern": [{"LOWER": "damaged"}], "id": "food-bias"}
857
- {"label": "adjectives", "pattern": [{"LOWER": "spicy"}], "id": "food-bias"}
858
- {"label": "adjectives", "pattern": [{"LOWER": "edible"}], "id": "food-bias"}
859
- {"label": "adjectives", "pattern": [{"LOWER": "nutritious"}], "id": "food-bias"}
860
- {"label": "adjectives", "pattern": [{"LOWER": "citric"}], "id": "food-bias"}
861
- {"label": "adjectives", "pattern": [{"LOWER": "cloying"}], "id": "food-bias"}
862
- {"label": "adjectives", "pattern": [{"LOWER": "caramelized"}], "id": "food-bias"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
NLselector.py CHANGED
@@ -2,7 +2,7 @@
2
  import pandas as pd, spacy, nltk, numpy as np, re
3
  from spacy.matcher import Matcher
4
  #!python -m spacy download en_core_web_md #Not sure if we need this so I'm going to keep it just in case
5
- nlp = spacy.load("en_core_web_lg")
6
  import altair as alt
7
  import streamlit as st
8
  from annotated_text import annotated_text as ant
 
2
  import pandas as pd, spacy, nltk, numpy as np, re
3
  from spacy.matcher import Matcher
4
  #!python -m spacy download en_core_web_md #Not sure if we need this so I'm going to keep it just in case
5
+ nlp = spacy.load("Assets/Models/en_core_web_lg")
6
  import altair as alt
7
  import streamlit as st
8
  from annotated_text import annotated_text as ant
Pipfile DELETED
@@ -1,40 +0,0 @@
1
- [[source]]
2
- url = "https://pypi.org/simple"
3
- verify_ssl = true
4
- name = "pypi"
5
-
6
- [packages]
7
- streamlit = "*"
8
- pandas = "*"
9
- numpy = "*"
10
- altair = "*"
11
- sklearn = "*"
12
- streamlit-vega-lite = "*"
13
- plotly = "*"
14
- gensim = "*"
15
- nltk = "*"
16
- spacy = "*"
17
- lime = "*"
18
- xlrd = "*"
19
- colorama = "*"
20
- st-annotated-text = "*"
21
- shap = "*"
22
- transformers = "*"
23
- torch = "*"
24
- black = "==19.3b0"
25
- pylint = "*"
26
- watchdog = "*"
27
- jupyterlab = "*"
28
- jupyter = "*"
29
-
30
-
31
- [requires]
32
- python_version = "3.8"
33
-
34
- [scripts]
35
- format = "black ."
36
- format_check = "black --check ."
37
- lint = "pylint app.py"
38
- app= "streamlit run app.py"
39
- clear_cache = "streamlit cache clear"
40
- notebook = "jupyter notebook"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Pipfile.lock DELETED
The diff for this file is too large to render. See raw diff
 
README OG.md DELETED
@@ -1,34 +0,0 @@
1
- # NLC-Gen
2
- ### A Natural Language Counterfactual Generator for Exploring Bias in Sentiment Analysis Algorithms
3
-
4
- ##### Overview
5
- This project is an extension of [Interactive Model Cards](https://github.com/amcrisan/interactive-model-cards). It focuses on providing a person more ways to explore the bias of a model through the generation of alternatives (technically [counterfactuals](https://plato.stanford.edu/entries/counterfactuals/#WhatCoun)). We believe the use of alternatives people can better understand the limitations of a model and develop productive skepticism around its usage and trustworthiness.
6
-
7
- ##### Set up
8
-
9
- Download the files from Github then perform the commands below in
10
- ```sh
11
- cd NLC-Gen
12
- pipenv install
13
- pipenv shell
14
- python -m spacy download en_core_web_lg
15
- streamlit run NLC-app.py
16
- ```
17
-
18
- ##### Known Limitations
19
- * Words not in the spaCy vocab for `en_core_web_lg` won't have vectors and so won't have the ability to create similarity scores.
20
- * WordNet provides many limitations due to its age and lack of funding for ongoing maintenance. It provides access to a large variety of the English language but certain words simply do not exist.
21
- * There are currently only 2 lists (Countries and Professions). We would like to find community curated lists for: Race, Sexual Orientation and Gender Identity (SOGI), Religion, age, and protected status.
22
-
23
-
24
- ##### Key Dependencies and Packages
25
-
26
- 1. [Hugging Face Transformers](https://huggingface.co/) - the model we've designed this iteration for is hosted on hugging face. It is: [distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
27
- 2. [Streamlit](https://streamlit.io) - This is the library we're using to build the prototype app because it is easy to stand up and quick to fix.
28
- 3. [spaCy](https://spacy.io) - This is the main NLP Library we're using and it runs most of the text manipulation we're doing as part of the project.
29
- 4. [NLTK + WordNet](https://www.nltk.org/howto/wordnet.html) - This is the initial lexical database we're using because it is accessible directly through Python and it is free. We will be considering a move to [ConceptNet](https://conceptnet.io/) for future iterations based on better lateral movement across edges.
30
- 5. [Lime](https://github.com/marcotcr/lime) - We chose Lime over Shap because Lime has more of the functionality we need. Shap appears to provide greater performance but is not as easily suited to our original designs.
31
- 6. [Altair](https://altair-viz.github.io/user_guide/encoding.html) - We're using Altair because it's well integrated into Streamlit.
32
-
33
-
34
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -10,4 +10,23 @@ pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  license: mit
11
  ---
12
 
13
+ # NLC-Explorer
14
+ ### A Natural Language Counterfactual Generator for Exploring Bias in Sentiment Analysis Algorithms
15
+
16
+ ##### Overview
17
+ This project is an extension of [Interactive Model Cards](https://github.com/amcrisan/interactive-model-cards). It focuses on providing a person more ways to explore the bias of a model through the generation of alternatives (technically [counterfactuals](https://plato.stanford.edu/entries/counterfactuals/#WhatCoun)). We believe the use of alternatives people can better understand the limitations of a model and develop productive skepticism around its usage and trustworthiness.
18
+
19
+ ##### Known Limitations
20
+ * Words not in the spaCy vocab for `en_core_web_lg` won't have vectors and so won't have the ability to create similarity scores.
21
+ * WordNet provides many limitations due to its age and lack of funding for ongoing maintenance. It provides access to a large variety of the English language but certain words simply do not exist.
22
+ * There are currently only 2 lists (Countries and Professions). We would like to find community curated lists for: Race, Sexual Orientation and Gender Identity (SOGI), Religion, age, and protected status.
23
+
24
+
25
+ ##### Key Dependencies and Packages
26
+
27
+ 1. [Hugging Face Transformers](https://huggingface.co/) - the model we've designed this iteration for is hosted on hugging face. It is: [distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
28
+ 2. [Streamlit](https://streamlit.io) - This is the library we're using to build the prototype app because it is easy to stand up and quick to fix.
29
+ 3. [spaCy](https://spacy.io) - This is the main NLP Library we're using and it runs most of the text manipulation we're doing as part of the project.
30
+ 4. [NLTK + WordNet](https://www.nltk.org/howto/wordnet.html) - This is the initial lexical database we're using because it is accessible directly through Python and it is free. We will be considering a move to [ConceptNet](https://conceptnet.io/) for future iterations based on better lateral movement across edges.
31
+ 5. [Lime](https://github.com/marcotcr/lime) - We chose Lime over Shap because Lime has more of the functionality we need. Shap appears to provide greater performance but is not as easily suited to our original designs.
32
+ 6. [Altair](https://altair-viz.github.io/user_guide/encoding.html) - We're using Altair because it's well integrated into Streamlit.
VizNLC-duct-tape-pipeline.ipynb DELETED
@@ -1,934 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "8ea54fcd-ef4a-42cb-ae26-cbdc6f6ffc64",
6
- "metadata": {
7
- "tags": []
8
- },
9
- "source": [
10
- "# Duct Tape Pipeline\n",
11
- "To explore how users may interact with interactive visualizations of counterfactuals for evolving the Interactive Model Card, we will need to first find a way to generate counterfactuals based on a given input. We want the user to be able to provide their input and direct the system to generate counterfactuals based on a part of speech that is significant to the model. The system should then provide a data frame of counterfactuals to be used in an interactive visualization. Below is an example wireframe of the experience based on previous research.\n",
12
- "\n",
13
- "![wireframe](Assets/VizNLC-Wireframe-example.png)\n",
14
- "\n",
15
- "## Goals of this notebook\n",
16
- "* Test which libraries (Ex. [spaCy](https://spacy.io/) and [NLTK](https://www.nltk.org/)) will work\n",
17
- "* Identify defaults to use\n",
18
- "* Build a rudimentary script for generating counterfactuals from user input\n",
19
- "* Ensure the counterfactuals are in a useable format for visualization"
20
- ]
21
- },
22
- {
23
- "cell_type": "markdown",
24
- "id": "736e6375-dd6d-4188-b8b1-92bded2bcd02",
25
- "metadata": {},
26
- "source": [
27
- "## Loading the libraries and models"
28
- ]
29
- },
30
- {
31
- "cell_type": "code",
32
- "execution_count": 3,
33
- "id": "7f581785-e642-4f74-9f67-06a63820eaf2",
34
- "metadata": {},
35
- "outputs": [],
36
- "source": [
37
- "#Import the libraries we know we'll need for the Generator.\n",
38
- "import pandas as pd, spacy, nltk, numpy as np\n",
39
- "from spacy import displacy\n",
40
- "from spacy.matcher import Matcher\n",
41
- "#!python -m spacy download en_core_web_sm\n",
42
- "nlp = spacy.load(\"en_core_web_sm\")\n",
43
- "lemmatizer = nlp.get_pipe(\"lemmatizer\")\n",
44
- "\n",
45
- "#Import the libraries to support the model, predictions, and LIME.\n",
46
- "from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline\n",
47
- "import lime\n",
48
- "import torch\n",
49
- "import torch.nn.functional as F\n",
50
- "from lime.lime_text import LimeTextExplainer\n",
51
- "\n",
52
- "#Import the libraries for generating interactive visualizations.\n",
53
- "import altair as alt"
54
- ]
55
- },
56
- {
57
- "cell_type": "code",
58
- "execution_count": null,
59
- "id": "cbe2b292-e33e-4915-8e61-bba5327fb643",
60
- "metadata": {},
61
- "outputs": [],
62
- "source": [
63
- "#Defining all necessary variables and instances.\n",
64
- "tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-finetuned-sst-2-english\")\n",
65
- "model = AutoModelForSequenceClassification.from_pretrained(\"distilbert-base-uncased-finetuned-sst-2-english\")\n",
66
- "class_names = ['negative', 'positive']\n",
67
- "explainer = LimeTextExplainer(class_names=class_names)"
68
- ]
69
- },
70
- {
71
- "cell_type": "code",
72
- "execution_count": null,
73
- "id": "197c3e26-0fdf-49c6-9135-57f1fd55d3e3",
74
- "metadata": {},
75
- "outputs": [],
76
- "source": [
77
- "#Defining a Predictor required for LIME to function.\n",
78
- "def predictor(texts):\n",
79
- " outputs = model(**tokenizer(texts, return_tensors=\"pt\", padding=True))\n",
80
- " probas = F.softmax(outputs.logits, dim=1).detach().numpy()\n",
81
- " return probas"
82
- ]
83
- },
84
- {
85
- "cell_type": "markdown",
86
- "id": "e731dcbb-4fcf-41c6-9493-edef02fdb1b6",
87
- "metadata": {},
88
- "source": [
89
- "## Exploring concepts to see what might work\n",
90
- "To begin building the pipeline I started by identifying whether or not I needed to build my own matcher or if spaCy has something built in that would allow us to make it easier. Having to build our own matcher, to account for each of the possible patterns, would be exceptionally cumbersome with all of the variations we need to look out for. Instead, I found that using the built in `noun_chunks` attribute allows for a simplification to the parts of speech we most care about. \n",
91
- "* I built a few helper functions from tutorials to explore the parts-of-speech within given sentences and the way `noun_chunks` work\n",
92
- "* I explore dusing `displacy` as a means of visualizing sentences to call out what the pre-trained models already understand"
93
- ]
94
- },
95
- {
96
- "cell_type": "code",
97
- "execution_count": null,
98
- "id": "1f2eca3c-525c-4e29-8cc1-c87e89a3fadf",
99
- "metadata": {},
100
- "outputs": [],
101
- "source": [
102
- "#A quick test of Noun Chunks\n",
103
- "text = \"The movie was filmed in New Zealand.\"\n",
104
- "doc = nlp(text)\n",
105
- "def n_chunk(doc):\n",
106
- " for chunk in doc.noun_chunks:\n",
107
- " print(f\"Text: {chunk.text:<12}| Root:{chunk.root.text:<12}| Root Dependency: {chunk.root.dep_:<12}| Root Head: {chunk.root.head.text:<12}\")\n",
108
- "n_chunk(doc)"
109
- ]
110
- },
111
- {
112
- "cell_type": "code",
113
- "execution_count": null,
114
- "id": "98978c29-a39c-48e3-bdbb-b74388ded6bc",
115
- "metadata": {},
116
- "outputs": [],
117
- "source": [
118
- "#The user will need to enter text. For now, we're going to provide a series of sentences generated to have things we care about. For clarity \"upt\" means \"user provide text\".\n",
119
- "upt1 = \"I like movies starring black actors.\"\n",
120
- "upt2 = \"I am a black trans-woman.\"\n",
121
- "upt3 = \"Native Americans deserve to have their land back.\"\n",
122
- "upt4 = \"This movie was filmed in Iraq.\"\n",
123
- "\n",
124
- "#Here I provide a larger text with mixed messages one sentence per line.\n",
125
- "text1 = (\n",
126
- "\"I like movies starring black actors.\"\n",
127
- "\"I am a black trans-woman.\"\n",
128
- "\"Native Americans deserve to have their land back.\"\n",
129
- "\"This movie was filmed in Iraq.\"\n",
130
- "\"The Chinese cat and the African bat walked into a Jamaican bar.\"\n",
131
- "\"There once was a flexible pole that met an imovable object.\"\n",
132
- "\"A Catholic nun, a Buddhist monk, a satanic cultist, and a Wiccan walk into your garage.\")\n",
133
- "\n",
134
- "doc1 = nlp(upt1)\n",
135
- "doc2 = nlp(upt2)\n",
136
- "doc3 = nlp(upt3)\n",
137
- "doc4 = nlp(upt4)\n",
138
- "doct = nlp(text1)"
139
- ]
140
- },
141
- {
142
- "cell_type": "code",
143
- "execution_count": null,
144
- "id": "38023eca-b224-412d-aa71-02bd694530e0",
145
- "metadata": {},
146
- "outputs": [],
147
- "source": [
148
- "#Using displacy to explore how the NLP model views sentences.\n",
149
- "displacy.render(doc, style=\"ent\")"
150
- ]
151
- },
152
- {
153
- "cell_type": "code",
154
- "execution_count": null,
155
- "id": "c28edec8-dc30-4ef9-8c1e-131b0e1b1a45",
156
- "metadata": {},
157
- "outputs": [],
158
- "source": [
159
- "#Another visual for understanding how the model views sentences.\n",
160
- "displacy.render(doc, style=\"dep\")"
161
- ]
162
- },
163
- {
164
- "cell_type": "code",
165
- "execution_count": 4,
166
- "id": "dd0d5f8e-ee80-48f7-be92-effa5f84c723",
167
- "metadata": {},
168
- "outputs": [],
169
- "source": [
170
- "#A simple token to print out the \n",
171
- "def text_pos(doc):\n",
172
- " for token in doc:\n",
173
- " # Get the token text, part-of-speech tag and dependency label\n",
174
- " token_text = token.text\n",
175
- " token_pos = token.pos_\n",
176
- " token_dep = token.dep_\n",
177
- " token_ent = token.ent_type_\n",
178
- " token_morph = token.morph\n",
179
- " # This is for formatting only\n",
180
- " print(f\"Text: {token_text:<12}| Part of Speech: {token_pos:<10}| Dependency: {token_dep:<10}| Entity: {token_ent:<10} | Morph: {token_morph}\")"
181
- ]
182
- },
183
- {
184
- "cell_type": "code",
185
- "execution_count": 6,
186
- "id": "5dfee095-3852-4dba-a7dc-5519e8ec6eaa",
187
- "metadata": {},
188
- "outputs": [
189
- {
190
- "name": "stdout",
191
- "output_type": "stream",
192
- "text": [
193
- "Text: Who | Part of Speech: PRON | Dependency: nsubj | Entity: | Morph: \n",
194
- "Text: put | Part of Speech: VERB | Dependency: ROOT | Entity: | Morph: Tense=Past|VerbForm=Fin\n",
195
- "Text: a | Part of Speech: DET | Dependency: det | Entity: | Morph: Definite=Ind|PronType=Art\n",
196
- "Text: tiny | Part of Speech: ADJ | Dependency: amod | Entity: | Morph: Degree=Pos\n",
197
- "Text: pickle | Part of Speech: NOUN | Dependency: dobj | Entity: | Morph: Number=Sing\n",
198
- "Text: in | Part of Speech: ADP | Dependency: prep | Entity: | Morph: \n",
199
- "Text: the | Part of Speech: DET | Dependency: det | Entity: | Morph: Definite=Def|PronType=Art\n",
200
- "Text: jar | Part of Speech: NOUN | Dependency: pobj | Entity: | Morph: Number=Sing\n"
201
- ]
202
- }
203
- ],
204
- "source": [
205
- "x = nlp(\"Who put a tiny pickle in the jar\")\n",
206
- "text_pos(x)"
207
- ]
208
- },
209
- {
210
- "cell_type": "code",
211
- "execution_count": 11,
212
- "id": "2485d88d-2dd4-4fa3-9d62-4dcbec4e9138",
213
- "metadata": {},
214
- "outputs": [
215
- {
216
- "data": {
217
- "text/plain": [
218
- "0"
219
- ]
220
- },
221
- "execution_count": 11,
222
- "metadata": {},
223
- "output_type": "execute_result"
224
- }
225
- ],
226
- "source": [
227
- "len(x[0].morph)"
228
- ]
229
- },
230
- {
231
- "cell_type": "code",
232
- "execution_count": null,
233
- "id": "013af6ac-f7d1-41d2-a601-b0f9a4870815",
234
- "metadata": {},
235
- "outputs": [],
236
- "source": [
237
- "#Instantiate a matcher and use it to test some patterns.\n",
238
- "matcher = Matcher(nlp.vocab)\n",
239
- "pattern = [{\"ENT_TYPE\": {\"IN\":[\"NORP\",\"GPE\"]}}]\n",
240
- "matcher.add(\"proper_noun\", [pattern])\n",
241
- "pattern_test = [{\"DEP\": \"amod\"},{\"DEP\":\"attr\"},{\"TEXT\":\"-\"},{\"DEP\":\"attr\",\"OP\":\"+\"}]\n",
242
- "matcher.add(\"amod_attr\",[pattern_test])\n",
243
- "pattern_an = [{\"DEP\": \"amod\"},{\"POS\":{\"IN\":[\"NOUN\",\"PROPN\"]}},{\"DEP\":{\"NOT_IN\":[\"attr\"]}}]\n",
244
- "matcher.add(\"amod_noun\", [pattern_an])"
245
- ]
246
- },
247
- {
248
- "cell_type": "code",
249
- "execution_count": null,
250
- "id": "f6ac821d-7b56-446e-b9ca-42a5f5afd198",
251
- "metadata": {},
252
- "outputs": [],
253
- "source": [
254
- "def match_this(matcher, doc):\n",
255
- " matches = matcher(doc)\n",
256
- " for match_id, start, end in matches:\n",
257
- " matched_span = doc[start:end]\n",
258
- " print(f\"Mached {matched_span.text} by the rule {nlp.vocab.strings[match_id]}.\")\n",
259
- " return matches"
260
- ]
261
- },
262
- {
263
- "cell_type": "code",
264
- "execution_count": null,
265
- "id": "958e4dc8-6652-4f32-b7ae-6aa5ee287cf7",
266
- "metadata": {},
267
- "outputs": [],
268
- "source": [
269
- "match_this(matcher, doct)"
270
- ]
271
- },
272
- {
273
- "cell_type": "code",
274
- "execution_count": null,
275
- "id": "5bf40fa5-b636-47f7-98b2-e872c78e7114",
276
- "metadata": {},
277
- "outputs": [],
278
- "source": [
279
- "text_pos(doc3)"
280
- ]
281
- },
282
- {
283
- "cell_type": "code",
284
- "execution_count": null,
285
- "id": "c5365304-5edb-428d-abf5-d579dcfbc269",
286
- "metadata": {},
287
- "outputs": [],
288
- "source": [
289
- "n_chunk(doc3)"
290
- ]
291
- },
292
- {
293
- "cell_type": "code",
294
- "execution_count": null,
295
- "id": "b7f3d3c8-65a1-433f-a47c-adcaaa2353e2",
296
- "metadata": {},
297
- "outputs": [],
298
- "source": [
299
- "displacy.render(doct, style=\"ent\")"
300
- ]
301
- },
302
- {
303
- "cell_type": "code",
304
- "execution_count": null,
305
- "id": "84df8e30-d142-4e5b-b3a9-02e3133ceba9",
306
- "metadata": {},
307
- "outputs": [],
308
- "source": [
309
- "txt = \"Savannah is a city in Georgia, in the United States\"\n",
310
- "doc = nlp(txt)\n",
311
- "displacy.render(doc, style=\"ent\")"
312
- ]
313
- },
314
- {
315
- "cell_type": "code",
316
- "execution_count": null,
317
- "id": "4a85f713-92bc-48ba-851e-de627d7e8c77",
318
- "metadata": {},
319
- "outputs": [],
320
- "source": [
321
- "displacy.render(doc2, style='dep')"
322
- ]
323
- },
324
- {
325
- "cell_type": "code",
326
- "execution_count": null,
327
- "id": "032f1134-7560-400b-824b-bc0196058b66",
328
- "metadata": {},
329
- "outputs": [],
330
- "source": [
331
- "n_chunk(doct)"
332
- ]
333
- },
334
- {
335
- "cell_type": "markdown",
336
- "id": "188044a1-4cf4-4141-a520-c5f11198aed8",
337
- "metadata": {},
338
- "source": [
339
- "* The Model does not recognize `wiccan` as a NORP but it will recognize `Wiccan` as NORP\n",
340
- "* The Model does not know what to do with `-` and makes a mess of `trans-woman` because of this"
341
- ]
342
- },
343
- {
344
- "cell_type": "code",
345
- "execution_count": null,
346
- "id": "2dc82250-e26e-49d5-a7f2-d4eeda170e4e",
347
- "metadata": {},
348
- "outputs": [],
349
- "source": [
350
- "chunks = list(doc1.noun_chunks)\n",
351
- "print(chunks[-1][-2].pos_)"
352
- ]
353
- },
354
- {
355
- "cell_type": "markdown",
356
- "id": "c23d48c4-f5ab-4428-9244-0786e9903a8e",
357
- "metadata": {},
358
- "source": [
359
- "## Building the Duct-Tape Pipeline cell-by-cell"
360
- ]
361
- },
362
- {
363
- "cell_type": "code",
364
- "execution_count": null,
365
- "id": "7ed22421-4401-482e-b54a-ee70d3187037",
366
- "metadata": {},
367
- "outputs": [],
368
- "source": [
369
- "#Lists of important words\n",
370
- "gender = [\"man\", \"woman\",\"girl\",\"boy\",\"male\",\"female\",\"husband\",\"wife\",\"girlfriend\",\"boyfriend\",\"brother\",\"sister\",\"aunt\",\"uncle\",\"grandma\",\"grandpa\",\"granny\",\"granps\",\"grandmother\",\"grandfather\",\"mama\",\"dada\",\"Ma\",\"Pa\",\"lady\",\"gentleman\"]\n",
371
- "#consider pulling ethnicities from https://github.com/cgio/global-ethnicities"
372
- ]
373
- },
374
- {
375
- "cell_type": "code",
376
- "execution_count": null,
377
- "id": "8b02a5d4-8a6b-4e5e-8f15-4f9182fe341f",
378
- "metadata": {},
379
- "outputs": [],
380
- "source": [
381
- "def select_crit(document, options=False, limelist=False):\n",
382
- " '''This function is meant to select the critical part of a sentence. Critical, in this context means\n",
383
- " the part of the sentence that is either: A) a PROPN from the correct entity group; B) an ADJ associated with a NOUN;\n",
384
- " C) a NOUN that represents gender. It also checks this against what the model thinks is important if the user defines \"options\" as \"LIME\" or True.'''\n",
385
- " chunks = list(document.noun_chunks)\n",
386
- " pos_options = []\n",
387
- " lime_options = []\n",
388
- " \n",
389
- " #Identify what the model cares about.\n",
390
- " if options:\n",
391
- " exp = explainer.explain_instance(document.text, predictor, num_features=20, num_samples=2000)\n",
392
- " results = exp.as_list()[:10]\n",
393
- " #prints the results from lime for QA.\n",
394
- " if limelist == True:\n",
395
- " print(results)\n",
396
- " for feature in results:\n",
397
- " lime_options.append(feature[0])\n",
398
- " \n",
399
- " #Identify what we care about \"parts of speech\"\n",
400
- " for chunk in chunks:\n",
401
- " #The use of chunk[-1] is due to testing that it appears to always match the root\n",
402
- " root = chunk[-1]\n",
403
- " #This currently matches to a list I've created. I don't know the best way to deal with this so I'm leaving it as is for the moment.\n",
404
- " if root.text.lower() in gender:\n",
405
- " cur_values = [token.text for token in chunk if token.pos_ in [\"NOUN\",\"ADJ\"]]\n",
406
- " if (all(elem in lime_options for elem in cur_values) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
407
- " pos_options.extend(cur_values)\n",
408
- " #print(f\"From {chunk.text}, {cur_values} added to pos_options due to gender.\") #for QA\n",
409
- " #This is currently set to pick up entities in a particular set of groups (which I recently expanded). Should it just pick up all named entities?\n",
410
- " elif root.ent_type_ in [\"GPE\",\"NORP\",\"DATE\",\"EVENT\"]:\n",
411
- " cur_values = []\n",
412
- " if (len(chunk) > 1) and (chunk[-2].dep_ == \"compound\"):\n",
413
- " #creates the compound element of the noun\n",
414
- " compound = [x.text for x in chunk if x.dep_ == \"compound\"]\n",
415
- " print(f\"This is the contents of {compound} and it is {all(elem in lime_options for elem in compound)} that all elements are present in {lime_options}.\") #for QA\n",
416
- " #checks to see all elements in the compound are important to the model or use the compound if not checking importance.\n",
417
- " if (all(elem in lime_options for elem in compound) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
418
- " #creates a span for the entirety of the compound noun and adds it to the list.\n",
419
- " span = -1 * (1 + len(compound))\n",
420
- " pos_options.append(chunk[span:].text)\n",
421
- " cur_values + [token.text for token in chunk if token.pos_ == \"ADJ\"]\n",
422
- " else: \n",
423
- " cur_values = [token.text for token in chunk if (token.ent_type_ in [\"GPE\",\"NORP\",\"DATE\",\"EVENT\"]) or (token.pos_ == \"ADJ\")]\n",
424
- " if (all(elem in lime_options for elem in cur_values) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
425
- " pos_options.extend(cur_values)\n",
426
- " print(f\"From {chunk.text}, {cur_values} and {pos_options} added to pos_options due to entity recognition.\") #for QA\n",
427
- " elif len(chunk) > 1:\n",
428
- " cur_values = [token.text for token in chunk if token.pos_ in [\"NOUN\",\"ADJ\"]]\n",
429
- " if (all(elem in lime_options for elem in cur_values) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
430
- " pos_options.extend(cur_values)\n",
431
- " print(f\"From {chunk.text}, {cur_values} added to pos_options due to wildcard.\") #for QA\n",
432
- " else:\n",
433
- " print(f\"No options added for \\'{chunk.text}\\' \")\n",
434
- " \n",
435
- " #Return the correct set of options based on user input, defaults to POS for simplicity.\n",
436
- " if options == \"LIME\":\n",
437
- " return lime_options\n",
438
- " else:\n",
439
- " return pos_options"
440
- ]
441
- },
442
- {
443
- "cell_type": "code",
444
- "execution_count": null,
445
- "id": "fa95e9fe-36ea-4b95-ab51-6bb82f745c23",
446
- "metadata": {},
447
- "outputs": [],
448
- "source": [
449
- "#Testing a method to make sure I had the ability to match one list inside the other. Now incorporated in the above function's logic.\n",
450
- "one = ['a','b','c']\n",
451
- "two = ['a','c']\n",
452
- "all(elem in one for elem in two)"
453
- ]
454
- },
455
- {
456
- "cell_type": "code",
457
- "execution_count": null,
458
- "id": "d43e202e-64b9-4cea-b117-82492c9ee5f4",
459
- "metadata": {},
460
- "outputs": [],
461
- "source": [
462
- "#Test to make sure all three options work\n",
463
- "pos4 = select_crit(doc4)\n",
464
- "lime4 = select_crit(doc4,options=\"LIME\")\n",
465
- "final4 = select_crit(doc4,options=True,limelist=True)\n",
466
- "print(pos4, lime4, final4)"
467
- ]
468
- },
469
- {
470
- "cell_type": "code",
471
- "execution_count": null,
472
- "id": "5623015e-fdb2-44f0-b5ac-812203b639b3",
473
- "metadata": {},
474
- "outputs": [],
475
- "source": [
476
- "#This is a test to make sure compounds of any length are captured. \n",
477
- "txt = \"I went to Papua New Guinea for Christmas Eve and New Years.\"\n",
478
- "doc_t = nlp(txt)\n",
479
- "select_crit(doc_t)"
480
- ]
481
- },
482
- {
483
- "cell_type": "code",
484
- "execution_count": null,
485
- "id": "58be22eb-a5c3-4a01-820b-45d190fce52d",
486
- "metadata": {},
487
- "outputs": [],
488
- "source": [
489
- "#Test to make sure all three options work. A known issue is that if we combine the compounds then they will not end up in the final_options...\n",
490
- "pos_t = select_crit(doc_t)\n",
491
- "lime_t = select_crit(doc_t,options=\"LIME\")\n",
492
- "final_t = select_crit(doc_t,options=True,limelist=True)\n",
493
- "print(pos_t, lime_t, final_t)"
494
- ]
495
- },
496
- {
497
- "cell_type": "code",
498
- "execution_count": null,
499
- "id": "1158de94-1472-4001-b3a1-42a488bcb20f",
500
- "metadata": {},
501
- "outputs": [],
502
- "source": [
503
- "select_crit(doc_t,options=True)"
504
- ]
505
- },
506
- {
507
- "cell_type": "markdown",
508
- "id": "05063ede-422f-4536-8408-ceb5441adbe8",
509
- "metadata": {},
510
- "source": [
511
- "> Note `Papua` and `Eve` have such low impact on the model that they do not always appear... so there will always be limitations to matching."
512
- ]
513
- },
514
- {
515
- "cell_type": "code",
516
- "execution_count": null,
517
- "id": "2c7c1ca9-4962-4fbe-b18b-1e20a223aff9",
518
- "metadata": {},
519
- "outputs": [],
520
- "source": [
521
- "select_crit(doc_t,options=\"LIME\")"
522
- ]
523
- },
524
- {
525
- "cell_type": "code",
526
- "execution_count": null,
527
- "id": "c70387a5-c431-43a5-a3b8-7533268a94e3",
528
- "metadata": {},
529
- "outputs": [],
530
- "source": [
531
- "displacy.render(doc_t, style=\"ent\")"
532
- ]
533
- },
534
- {
535
- "cell_type": "code",
536
- "execution_count": null,
537
- "id": "4b92d276-7d67-4c1c-940b-d3b2dcc756b9",
538
- "metadata": {},
539
- "outputs": [],
540
- "source": [
541
- "#This run clearly indicates that this pipeline from spaCy does not know what to do with hyphens(\"-\") and that we need to be aware of that.\n",
542
- "choices = select_crit(doct)\n",
543
- "choices"
544
- ]
545
- },
546
- {
547
- "cell_type": "code",
548
- "execution_count": null,
549
- "id": "ea6b29d0-d0fa-4eb3-af9c-970759124145",
550
- "metadata": {},
551
- "outputs": [],
552
- "source": [
553
- "user_choice = choices[2]\n",
554
- "matcher2 = Matcher(nlp.vocab)\n",
555
- "pattern = [{\"TEXT\": user_choice}]\n",
556
- "matcher2.add(\"user choice\", [pattern])"
557
- ]
558
- },
559
- {
560
- "cell_type": "code",
561
- "execution_count": null,
562
- "id": "d32754b8-f1fa-4781-a6b0-829ad7ec2e50",
563
- "metadata": {},
564
- "outputs": [],
565
- "source": [
566
- "#consider using https://github.com/writerai/replaCy instead\n",
567
- "match_id, start, end = match_this(matcher2,doc2)[0]"
568
- ]
569
- },
570
- {
571
- "cell_type": "code",
572
- "execution_count": null,
573
- "id": "a0362734-020b-49ad-b566-fdc7196e705c",
574
- "metadata": {},
575
- "outputs": [],
576
- "source": [
577
- "docx = doc2.text.replace(user_choice,\"man\")\n",
578
- "docx"
579
- ]
580
- },
581
- {
582
- "cell_type": "markdown",
583
- "id": "bf0512b6-336e-4842-9bde-34e03a1ca7c6",
584
- "metadata": {},
585
- "source": [
586
- "### Testing predictions and visualization\n",
587
- "Here I will attempt to import the model from huggingface, generate predictions for each of the sentences, and then visualize those predictions into a dot plot. If I can get this to work then I will move on to testing a full pipeline for letting the user pick which part of the sentence they wish to generate counterfactuals for."
588
- ]
589
- },
590
- {
591
- "cell_type": "code",
592
- "execution_count": null,
593
- "id": "e0bd4134-3b22-4ae8-870c-3a66c1cf8b23",
594
- "metadata": {},
595
- "outputs": [],
596
- "source": [
597
- "#Testing to see how to get predictions from the model. Ultimately, this did not work.\n",
598
- "token = tokenizer(upt4, return_tensors=\"pt\")\n",
599
- "labels = torch.tensor([1]).unsqueeze(0) # Batch size 1\n",
600
- "outputs = model(**token, labels=labels)"
601
- ]
602
- },
603
- {
604
- "cell_type": "code",
605
- "execution_count": null,
606
- "id": "74c639bb-e74a-4a46-8047-3552265ae6a4",
607
- "metadata": {},
608
- "outputs": [],
609
- "source": [
610
- "#Discovering that there's a pipeline specifically to provide scores. \n",
611
- "#I used it to get a list of lists of dictionaries that I can then manipulate to calculate the proper prediction score.\n",
612
- "pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)"
613
- ]
614
- },
615
- {
616
- "cell_type": "code",
617
- "execution_count": null,
618
- "id": "8e1ff15d-0fb9-475b-bd24-4548c0782343",
619
- "metadata": {},
620
- "outputs": [],
621
- "source": [
622
- "preds = pipe(upt4)\n",
623
- "print(preds[0][0])"
624
- ]
625
- },
626
- {
627
- "cell_type": "code",
628
- "execution_count": null,
629
- "id": "d8abb9ca-36cf-441a-9236-1f7e44331b53",
630
- "metadata": {},
631
- "outputs": [],
632
- "source": [
633
- "score_1 = preds[0][0]['score']\n",
634
- "score_2 = (score_1 - .5) * 2\n",
635
- "print(score_1, score_2)"
636
- ]
637
- },
638
- {
639
- "cell_type": "code",
640
- "execution_count": null,
641
- "id": "8726a284-99bd-47f1-9756-1c3ae603db10",
642
- "metadata": {},
643
- "outputs": [],
644
- "source": [
645
- "def eval_pred(text):\n",
646
- " '''A basic function for evaluating the prediction from the model and turning it into a visualization friendly number.'''\n",
647
- " preds = pipe(text)\n",
648
- " neg_score = preds[0][0]['score']\n",
649
- " pos_score = preds[0][1]['score']\n",
650
- " if pos_score >= neg_score:\n",
651
- " return pos_score\n",
652
- " if neg_score >= pos_score:\n",
653
- " return -1 * neg_score"
654
- ]
655
- },
656
- {
657
- "cell_type": "code",
658
- "execution_count": null,
659
- "id": "f38f5061-f30a-4c81-9465-37951c3ad9f4",
660
- "metadata": {},
661
- "outputs": [],
662
- "source": [
663
- "def eval_pred_test(text, return_all = False):\n",
664
- " '''A basic function for evaluating the prediction from the model and turning it into a visualization friendly number.'''\n",
665
- " preds = pipe(text)\n",
666
- " neg_score = -1 * preds[0][0]['score']\n",
667
- " sent_neg = preds[0][0]['label']\n",
668
- " pos_score = preds[0][1]['score']\n",
669
- " sent_pos = preds[0][1]['label']\n",
670
- " prediction = 0\n",
671
- " sentiment = ''\n",
672
- " if pos_score > abs(neg_score):\n",
673
- " prediction = pos_score\n",
674
- " sentiment = sent_pos\n",
675
- " elif abs(neg_score) > pos_score:\n",
676
- " prediction = neg_score\n",
677
- " sentiment = sent_neg\n",
678
- " \n",
679
- " if return_all:\n",
680
- " return prediction, sentiment\n",
681
- " else:\n",
682
- " return prediction"
683
- ]
684
- },
685
- {
686
- "cell_type": "code",
687
- "execution_count": null,
688
- "id": "abd5dd8c-8cff-4865-abf1-f5a744f2203b",
689
- "metadata": {},
690
- "outputs": [],
691
- "source": [
692
- "score = eval_pred(upt4)\n",
693
- "og_data = {'Country': ['Iraq'], 'Continent': ['Asia'], 'text':[upt4], 'pred':[score]}\n",
694
- "og_df = pd.DataFrame(og_data)\n",
695
- "og_df"
696
- ]
697
- },
698
- {
699
- "cell_type": "markdown",
700
- "id": "8b349a87-fe83-4045-a63a-d054489bb461",
701
- "metadata": {},
702
- "source": [
703
- "## Load the dummy countries I created to test generating counterfactuals\n",
704
- "I decided to test the pipeline with a known problem space. Taking the text from Aurélien Géron's observations in twitter, I built a built a small scale test using the learnings I had to prove that we can identify a particular part of speech, use it to generate counterfactuals, and then build a visualization off it."
705
- ]
706
- },
707
- {
708
- "cell_type": "code",
709
- "execution_count": null,
710
- "id": "46ab3332-964c-449f-8cef-a9ff7df397a4",
711
- "metadata": {},
712
- "outputs": [],
713
- "source": [
714
- "#load my test data from https://github.com/dbouquin/IS_608/blob/master/NanosatDB_munging/Countries-Continents.csv\n",
715
- "df = pd.read_csv(\"Assets/Countries/countries.csv\")\n",
716
- "df.head()"
717
- ]
718
- },
719
- {
720
- "cell_type": "code",
721
- "execution_count": null,
722
- "id": "51c75894-80af-4625-8ce8-660e500b496b",
723
- "metadata": {},
724
- "outputs": [],
725
- "source": [
726
- "#Note: we will need to build the function that lets the user choose from the options available. For now I have hard coded it as \"selection\", from \"user_options\".\n",
727
- "user_options = select_crit(doc4)\n",
728
- "print(user_options)\n",
729
- "selection = user_options[1]\n",
730
- "selection"
731
- ]
732
- },
733
- {
734
- "cell_type": "code",
735
- "execution_count": null,
736
- "id": "3d6419f1-bf7d-44bc-afb8-ac26ef9002df",
737
- "metadata": {},
738
- "outputs": [],
739
- "source": [
740
- "#Create a function that generates the counterfactuals within a data frame.\n",
741
- "def gen_cf_country(df,document,selection):\n",
742
- " df['text'] = df.Country.apply(lambda x: document.text.replace(selection,x))\n",
743
- " df['prediction'] = df.text.apply(eval_pred_test)\n",
744
- " #added this because I think it will make the end results better if we ensure the seed is in the data we generate counterfactuals from.\n",
745
- " df['seed'] = df.Country.apply(lambda x: 'seed' if x == selection else 'alternative')\n",
746
- " return df\n",
747
- "\n",
748
- "df = gen_cf_country(df,doc4,selection)\n",
749
- "df.head()"
750
- ]
751
- },
752
- {
753
- "cell_type": "code",
754
- "execution_count": null,
755
- "id": "aec241a6-48c3-48c6-9e7f-d22612eaedff",
756
- "metadata": {},
757
- "outputs": [],
758
- "source": [
759
- "#Display Counterfactuals and Original in a layered chart. I couldn't get this to provide a legend.\n",
760
- "og = alt.Chart(og_df).encode(\n",
761
- " x='Continent:N',\n",
762
- " y='pred:Q'\n",
763
- ").mark_square(color='green', size = 200, opacity=.5)\n",
764
- "\n",
765
- "cf = alt.Chart(df).encode(\n",
766
- " x='Continent:N', # specify nominal data\n",
767
- " y='prediction:Q', # specify quantitative data\n",
768
- ").mark_circle(color='blue', size=50, opacity =.25)\n",
769
- "\n",
770
- "alt_plot = alt.LayerChart(layer=[cf,og], width = 300)\n",
771
- "alt_plot"
772
- ]
773
- },
774
- {
775
- "cell_type": "code",
776
- "execution_count": null,
777
- "id": "ecb9dd41-2fab-49bd-bae5-30300ce39e41",
778
- "metadata": {},
779
- "outputs": [],
780
- "source": [
781
- "single_nearest = alt.selection_single(on='mouseover', nearest=True)\n",
782
- "full = alt.Chart(df).encode(\n",
783
- " alt.X('Continent:N'), # specify nominal data\n",
784
- " alt.Y('prediction:Q'), # specify quantitative data\n",
785
- " color=alt.Color('seed:N', legend=alt.Legend(title=\"Seed or Alternative\")),\n",
786
- " size=alt.Size('seed:N', alt.scale(domain=[50,100])),\n",
787
- " tooltip=('Country','prediction')\n",
788
- ").mark_circle(opacity=.5).properties(width=300).add_selection(single_nearest)\n",
789
- "\n",
790
- "full"
791
- ]
792
- },
793
- {
794
- "cell_type": "code",
795
- "execution_count": null,
796
- "id": "56bc30d7-03a5-43ff-9dfe-878197628305",
797
- "metadata": {},
798
- "outputs": [],
799
- "source": [
800
- "df2 = df.nlargest(5, 'prediction')\n",
801
- "df3 = df.nsmallest(5, 'prediction')\n",
802
- "frames = [df2,df3]\n",
803
- "results = pd.concat(frames)"
804
- ]
805
- },
806
- {
807
- "cell_type": "code",
808
- "execution_count": null,
809
- "id": "1610bb48-c9b9-4bee-bcb5-999886acb9e3",
810
- "metadata": {},
811
- "outputs": [],
812
- "source": [
813
- "bar = alt.Chart(results).encode( \n",
814
- " alt.X('prediction:Q'), \n",
815
- " alt.Y('Country:N', sort=\"-x\"),\n",
816
- " color=alt.Color('seed:N', legend=alt.Legend(title=\"Seed or Alternative\")),\n",
817
- " size='seed:N',\n",
818
- " tooltip=('Country','prediction')\n",
819
- ").mark_circle().properties(width=300).add_selection(single_nearest)\n",
820
- "\n",
821
- "bar"
822
- ]
823
- },
824
- {
825
- "cell_type": "markdown",
826
- "id": "84c40b74-95be-4c19-bd57-74e6004b950c",
827
- "metadata": {},
828
- "source": [
829
- "#### QA"
830
- ]
831
- },
832
- {
833
- "cell_type": "code",
834
- "execution_count": null,
835
- "id": "7d15c7d8-9fdb-4c5b-84fa-599839cbceac",
836
- "metadata": {},
837
- "outputs": [],
838
- "source": [
839
- "qa_txt = \"They serve halal food in Iraq and Egypt.\"\n",
840
- "qa_doc = nlp(qa_txt)"
841
- ]
842
- },
843
- {
844
- "cell_type": "code",
845
- "execution_count": null,
846
- "id": "d6956ddf-9287-419a-bb08-a3618f77700a",
847
- "metadata": {},
848
- "outputs": [],
849
- "source": [
850
- "displacy.render(qa_doc, style=\"dep\")"
851
- ]
852
- },
853
- {
854
- "cell_type": "code",
855
- "execution_count": null,
856
- "id": "88768d68-fe44-49ab-ac12-d41e6716b3b3",
857
- "metadata": {},
858
- "outputs": [],
859
- "source": [
860
- "select_crit(qa_doc)"
861
- ]
862
- },
863
- {
864
- "cell_type": "markdown",
865
- "id": "7bbc6c2e-df5d-4076-8532-8648fd818be4",
866
- "metadata": {},
867
- "source": [
868
- "# NLC-Gen\n",
869
- "### A Natural Language Counterfactual Generator for Exploring Bias in Sentiment Analysis Algorithms\n",
870
- "\n",
871
- "##### Overview\n",
872
- "This project is an extension of [Interactive Model Cards](https://github.com/amcrisan/interactive-model-cards). It focuses on providing a person more ways to explore the bias of a model through the generation of alternatives (technically [counterfactuals](https://plato.stanford.edu/entries/counterfactuals/#WhatCoun)). We believe the use of alternatives people can better understand the limitations of a model and develop productive skepticism around its usage and trustworthiness.\n",
873
- "\n",
874
- "##### Set up\n",
875
- "\n",
876
- "Download the files from Github then perform the commands below in \n",
877
- "```sh\n",
878
- "cd NLC-Gen\n",
879
- "pipenv install\n",
880
- "pipenv shell\n",
881
- "python -m spacy download en_core_web_lg\n",
882
- "streamlit run NLC-app.py\n",
883
- "```\n",
884
- "\n",
885
- "##### Known Limitations\n",
886
- "* Words not in the spaCy vocab for `en_core_web_lg` won't have vectors and so won't have the ability to create similarity scores.\n",
887
- "* WordNet provides many limitations due to its age and lack of funding for ongoing maintenance. It provides access to a large variety of the English language but certain words simply do not exist.\n",
888
- "* There are currently only 2 lists (Countries and Professions). We would like to find community curated lists for: Race, Sexual Orientation and Gender Identity (SOGI), Religion, age, and protected status.\n",
889
- "\n",
890
- "\n",
891
- "##### Key Dependencies and Packages\n",
892
- "\n",
893
- "1. [Hugging Face Transformers](https://huggingface.co/) - the model we've designed this iteration for is hosted on hugging face. It is: [distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).\n",
894
- "2. [Streamlit](https://streamlit.io) - This is the library we're using to build the prototype app because it is easy to stand up and quick to fix.\n",
895
- "3. [spaCy](https://spacy.io) - This is the main NLP Library we're using and it runs most of the text manipulation we're doing as part of the project.\n",
896
- "4. [NLTK + WordNet](https://www.nltk.org/howto/wordnet.html) - This is the initial lexical database we're using because it is accessible directly through Python and it is free. We will be considering a move to [ConceptNet](https://conceptnet.io/) for future iterations based on better lateral movement across edges.\n",
897
- "5. [Lime](https://github.com/marcotcr/lime) - We chose Lime over Shap because Lime has more of the functionality we need. Shap appears to provide greater performance but is not as easily suited to our original designs.\n",
898
- "6. [Altair](https://altair-viz.github.io/user_guide/encoding.html) - We're using Altair because it's well integrated into Streamlit.\n",
899
- "\n",
900
- "\n",
901
- "\n"
902
- ]
903
- },
904
- {
905
- "cell_type": "code",
906
- "execution_count": null,
907
- "id": "fa224bed-3630-4485-8dbc-670aaf5e6b0a",
908
- "metadata": {},
909
- "outputs": [],
910
- "source": []
911
- }
912
- ],
913
- "metadata": {
914
- "kernelspec": {
915
- "display_name": "Python 3 (ipykernel)",
916
- "language": "python",
917
- "name": "python3"
918
- },
919
- "language_info": {
920
- "codemirror_mode": {
921
- "name": "ipython",
922
- "version": 3
923
- },
924
- "file_extension": ".py",
925
- "mimetype": "text/x-python",
926
- "name": "python",
927
- "nbconvert_exporter": "python",
928
- "pygments_lexer": "ipython3",
929
- "version": "3.8.8"
930
- }
931
- },
932
- "nbformat": 4,
933
- "nbformat_minor": 5
934
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
VizNLC-gen-pipeline.ipynb DELETED
@@ -1,1175 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "8ea54fcd-ef4a-42cb-ae26-cbdc6f6ffc64",
6
- "metadata": {
7
- "tags": []
8
- },
9
- "source": [
10
- "# Duct Tape Pipeline\n",
11
- "To explore how users may interact with interactive visualizations of counterfactuals for evolving the Interactive Model Card, we will need to first find a way to generate counterfactuals based on a given input. We want the user to be able to provide their input and direct the system to generate counterfactuals based on a part of speech that is significant to the model. The system should then provide a data frame of counterfactuals to be used in an interactive visualization. Below is an example wireframe of the experience based on previous research.\n",
12
- "\n",
13
- "![wireframe](Assets/VizNLC-Wireframe-example.png)\n",
14
- "\n",
15
- "## Goals of this notebook\n",
16
- "* Clean up the flow in the \"duct tape pipeline\".\n",
17
- "* See if I can extract the LIME list for visualization"
18
- ]
19
- },
20
- {
21
- "cell_type": "markdown",
22
- "id": "736e6375-dd6d-4188-b8b1-92bded2bcd02",
23
- "metadata": {},
24
- "source": [
25
- "## Loading the libraries and models"
26
- ]
27
- },
28
- {
29
- "cell_type": "code",
30
- "execution_count": 1,
31
- "id": "7f581785-e642-4f74-9f67-06a63820eaf2",
32
- "metadata": {},
33
- "outputs": [],
34
- "source": [
35
- "#Import the libraries we know we'll need for the Generator.\n",
36
- "import pandas as pd, spacy, nltk, numpy as np\n",
37
- "from spacy import displacy\n",
38
- "from spacy.matcher import Matcher\n",
39
- "#!python -m spacy download en_core_web_sm\n",
40
- "nlp = spacy.load(\"en_core_web_md\")\n",
41
- "lemmatizer = nlp.get_pipe(\"lemmatizer\")\n",
42
- "\n",
43
- "#Import the libraries to support the model, predictions, and LIME.\n",
44
- "from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline\n",
45
- "import lime\n",
46
- "import torch\n",
47
- "import torch.nn.functional as F\n",
48
- "from lime.lime_text import LimeTextExplainer\n",
49
- "\n",
50
- "#Import the libraries for generating interactive visualizations.\n",
51
- "import altair as alt"
52
- ]
53
- },
54
- {
55
- "cell_type": "code",
56
- "execution_count": 2,
57
- "id": "cbe2b292-e33e-4915-8e61-bba5327fb643",
58
- "metadata": {},
59
- "outputs": [],
60
- "source": [
61
- "#Defining all necessary variables and instances.\n",
62
- "tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-finetuned-sst-2-english\")\n",
63
- "model = AutoModelForSequenceClassification.from_pretrained(\"distilbert-base-uncased-finetuned-sst-2-english\")\n",
64
- "class_names = ['negative', 'positive']\n",
65
- "explainer = LimeTextExplainer(class_names=class_names)"
66
- ]
67
- },
68
- {
69
- "cell_type": "code",
70
- "execution_count": 3,
71
- "id": "197c3e26-0fdf-49c6-9135-57f1fd55d3e3",
72
- "metadata": {},
73
- "outputs": [],
74
- "source": [
75
- "#Defining a Predictor required for LIME to function.\n",
76
- "def predictor(texts):\n",
77
- " outputs = model(**tokenizer(texts, return_tensors=\"pt\", padding=True))\n",
78
- " probas = F.softmax(outputs.logits, dim=1).detach().numpy()\n",
79
- " return probas"
80
- ]
81
- },
82
- {
83
- "cell_type": "code",
84
- "execution_count": 4,
85
- "id": "013af6ac-f7d1-41d2-a601-b0f9a4870815",
86
- "metadata": {},
87
- "outputs": [],
88
- "source": [
89
- "#Instantiate a matcher and use it to test some patterns.\n",
90
- "matcher = Matcher(nlp.vocab)\n",
91
- "pattern = [{\"ENT_TYPE\": {\"IN\":[\"NORP\",\"GPE\"]}}]\n",
92
- "matcher.add(\"proper_noun\", [pattern])\n",
93
- "pattern_test = [{\"DEP\": \"amod\"},{\"DEP\":\"attr\"},{\"TEXT\":\"-\"},{\"DEP\":\"attr\",\"OP\":\"+\"}]\n",
94
- "matcher.add(\"amod_attr\",[pattern_test])\n",
95
- "pattern_an = [{\"DEP\": \"amod\"},{\"POS\":{\"IN\":[\"NOUN\",\"PROPN\"]}},{\"DEP\":{\"NOT_IN\":[\"attr\"]}}]\n",
96
- "matcher.add(\"amod_noun\", [pattern_an])"
97
- ]
98
- },
99
- {
100
- "cell_type": "code",
101
- "execution_count": 5,
102
- "id": "f6ac821d-7b56-446e-b9ca-42a5f5afd198",
103
- "metadata": {},
104
- "outputs": [],
105
- "source": [
106
- "def match_this(matcher, doc):\n",
107
- " matches = matcher(doc)\n",
108
- " for match_id, start, end in matches:\n",
109
- " matched_span = doc[start:end]\n",
110
- " print(f\"Mached {matched_span.text} by the rule {nlp.vocab.strings[match_id]}.\")\n",
111
- " return matches"
112
- ]
113
- },
114
- {
115
- "cell_type": "markdown",
116
- "id": "c23d48c4-f5ab-4428-9244-0786e9903a8e",
117
- "metadata": {
118
- "tags": []
119
- },
120
- "source": [
121
- "## Building the Duct-Tape Pipeline cell-by-cell"
122
- ]
123
- },
124
- {
125
- "cell_type": "code",
126
- "execution_count": 6,
127
- "id": "a373fc00-401a-4def-9f09-de73d485ac13",
128
- "metadata": {},
129
- "outputs": [],
130
- "source": [
131
- "gender = [\"man\", \"woman\",\"girl\",\"boy\",\"male\",\"female\",\"husband\",\"wife\",\"girlfriend\",\"boyfriend\",\"brother\",\"sister\",\"aunt\",\"uncle\",\"grandma\",\"grandpa\",\"granny\",\"granps\",\"grandmother\",\"grandfather\",\"mama\",\"dada\",\"Ma\",\"Pa\",\"lady\",\"gentleman\"]"
132
- ]
133
- },
134
- {
135
- "cell_type": "code",
136
- "execution_count": 7,
137
- "id": "8b02a5d4-8a6b-4e5e-8f15-4f9182fe341f",
138
- "metadata": {},
139
- "outputs": [],
140
- "source": [
141
- "def select_crit(document, options=False, limelist=False):\n",
142
- " '''This function is meant to select the critical part of a sentence. Critical, in this context means\n",
143
- " the part of the sentence that is either: A) a PROPN from the correct entity group; B) an ADJ associated with a NOUN;\n",
144
- " C) a NOUN that represents gender. It also checks this against what the model thinks is important if the user defines \"options\" as \"LIME\" or True.'''\n",
145
- " chunks = list(document.noun_chunks)\n",
146
- " pos_options = []\n",
147
- " lime_options = []\n",
148
- " \n",
149
- " #Identify what the model cares about.\n",
150
- " if options:\n",
151
- " exp = explainer.explain_instance(document.text, predictor, num_features=15, num_samples=2000)\n",
152
- " lime_results = exp.as_list()\n",
153
- " #prints the results from lime for QA.\n",
154
- " if limelist == True:\n",
155
- " print(lime_results)\n",
156
- " for feature in lime_results:\n",
157
- " lime_options.append(feature[0])\n",
158
- " lime_results = pd.DataFrame(lime_results, columns=[\"Word\",\"Weight\"])\n",
159
- " \n",
160
- " #Identify what we care about \"parts of speech\"\n",
161
- " for chunk in chunks:\n",
162
- " #The use of chunk[-1] is due to testing that it appears to always match the root\n",
163
- " root = chunk[-1]\n",
164
- " #This currently matches to a list I've created. I don't know the best way to deal with this so I'm leaving it as is for the moment.\n",
165
- " if root.text.lower() in gender:\n",
166
- " cur_values = [token.text for token in chunk if token.pos_ in [\"NOUN\",\"ADJ\"]]\n",
167
- " if (all(elem in lime_options for elem in cur_values) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
168
- " pos_options.extend(cur_values)\n",
169
- " #print(f\"From {chunk.text}, {cur_values} added to pos_options due to gender.\") #for QA\n",
170
- " #This is currently set to pick up entities in a particular set of groups (which I recently expanded). Should it just pick up all named entities?\n",
171
- " elif root.ent_type_ in [\"GPE\",\"NORP\",\"DATE\",\"EVENT\"]:\n",
172
- " cur_values = []\n",
173
- " if (len(chunk) > 1) and (chunk[-2].dep_ == \"compound\"):\n",
174
- " #creates the compound element of the noun\n",
175
- " compound = [x.text for x in chunk if x.dep_ == \"compound\"]\n",
176
- " print(f\"This is the contents of {compound} and it is {all(elem in lime_options for elem in compound)} that all elements are present in {lime_options}.\") #for QA\n",
177
- " #checks to see all elements in the compound are important to the model or use the compound if not checking importance.\n",
178
- " if (all(elem in lime_options for elem in compound) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
179
- " #creates a span for the entirety of the compound noun and adds it to the list.\n",
180
- " span = -1 * (1 + len(compound))\n",
181
- " pos_options.append(chunk[span:].text)\n",
182
- " cur_values + [token.text for token in chunk if token.pos_ == \"ADJ\"]\n",
183
- " else: \n",
184
- " cur_values = [token.text for token in chunk if (token.ent_type_ in [\"GPE\",\"NORP\",\"DATE\",\"EVENT\"]) or (token.pos_ == \"ADJ\")]\n",
185
- " if (all(elem in lime_options for elem in cur_values) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
186
- " pos_options.extend(cur_values)\n",
187
- " print(f\"From {chunk.text}, {cur_values} and {pos_options} added to pos_options due to entity recognition.\") #for QA\n",
188
- " elif len(chunk) > 1:\n",
189
- " cur_values = [token.text for token in chunk if token.pos_ in [\"NOUN\",\"ADJ\"]]\n",
190
- " if (all(elem in lime_options for elem in cur_values) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
191
- " pos_options.extend(cur_values)\n",
192
- " print(f\"From {chunk.text}, {cur_values} added to pos_options due to wildcard.\") #for QA\n",
193
- " else:\n",
194
- " print(f\"No options added for \\'{chunk.text}\\' \")\n",
195
- " \n",
196
- " \n",
197
- " #Return the correct set of options based on user input, defaults to POS for simplicity.\n",
198
- " if options == \"LIME\":\n",
199
- " return pos_options, lime_results\n",
200
- " else:\n",
201
- " return pos_options"
202
- ]
203
- },
204
- {
205
- "cell_type": "code",
206
- "execution_count": 8,
207
- "id": "d43e202e-64b9-4cea-b117-82492c9ee5f4",
208
- "metadata": {},
209
- "outputs": [
210
- {
211
- "name": "stdout",
212
- "output_type": "stream",
213
- "text": [
214
- "From This film, ['film'] added to pos_options due to wildcard.\n",
215
- "From Iraq, ['Iraq'] and ['film', 'Iraq'] added to pos_options due to entity recognition.\n"
216
- ]
217
- }
218
- ],
219
- "source": [
220
- "#Test to make sure all three options work\n",
221
- "text4 = \"This film was filmed in Iraq.\"\n",
222
- "doc4 = nlp(text4)\n",
223
- "lime4, limedf = select_crit(doc4,options=\"LIME\")"
224
- ]
225
- },
226
- {
227
- "cell_type": "code",
228
- "execution_count": 9,
229
- "id": "a0e55a24-65df-429e-a0cd-8daf91a5d242",
230
- "metadata": {},
231
- "outputs": [
232
- {
233
- "data": {
234
- "text/html": [
235
- "\n",
236
- "<div id=\"altair-viz-23e37c16acf34cbead4ebdbe2bddfdb5\"></div>\n",
237
- "<script type=\"text/javascript\">\n",
238
- " var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
239
- " (function(spec, embedOpt){\n",
240
- " let outputDiv = document.currentScript.previousElementSibling;\n",
241
- " if (outputDiv.id !== \"altair-viz-23e37c16acf34cbead4ebdbe2bddfdb5\") {\n",
242
- " outputDiv = document.getElementById(\"altair-viz-23e37c16acf34cbead4ebdbe2bddfdb5\");\n",
243
- " }\n",
244
- " const paths = {\n",
245
- " \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
246
- " \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
247
- " \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
248
- " \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
249
- " };\n",
250
- "\n",
251
- " function maybeLoadScript(lib, version) {\n",
252
- " var key = `${lib.replace(\"-\", \"\")}_version`;\n",
253
- " return (VEGA_DEBUG[key] == version) ?\n",
254
- " Promise.resolve(paths[lib]) :\n",
255
- " new Promise(function(resolve, reject) {\n",
256
- " var s = document.createElement('script');\n",
257
- " document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
258
- " s.async = true;\n",
259
- " s.onload = () => {\n",
260
- " VEGA_DEBUG[key] = version;\n",
261
- " return resolve(paths[lib]);\n",
262
- " };\n",
263
- " s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
264
- " s.src = paths[lib];\n",
265
- " });\n",
266
- " }\n",
267
- "\n",
268
- " function showError(err) {\n",
269
- " outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
270
- " throw err;\n",
271
- " }\n",
272
- "\n",
273
- " function displayChart(vegaEmbed) {\n",
274
- " vegaEmbed(outputDiv, spec, embedOpt)\n",
275
- " .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
276
- " }\n",
277
- "\n",
278
- " if(typeof define === \"function\" && define.amd) {\n",
279
- " requirejs.config({paths});\n",
280
- " require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
281
- " } else {\n",
282
- " maybeLoadScript(\"vega\", \"5\")\n",
283
- " .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
284
- " .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
285
- " .catch(showError)\n",
286
- " .then(() => displayChart(vegaEmbed));\n",
287
- " }\n",
288
- " })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300, \"strokeWidth\": 0}, \"axis\": {\"grid\": false}}, \"layer\": [{\"mark\": \"bar\", \"encoding\": {\"color\": {\"field\": \"Weight\", \"legend\": null, \"scale\": {\"domain\": [0], \"range\": \"diverging\", \"scheme\": \"blueorange\", \"type\": \"threshold\"}, \"type\": \"quantitative\"}, \"tooltip\": [{\"field\": \"Word\", \"type\": \"nominal\"}, {\"field\": \"Weight\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"Weight\", \"scale\": {\"domain\": [-1, 1]}, \"type\": \"quantitative\"}, \"y\": {\"axis\": null, \"field\": \"Word\", \"sort\": \"x\", \"type\": \"nominal\"}}, \"title\": \"Importance of individual words\"}, {\"mark\": {\"type\": \"text\", \"align\": \"right\", \"baseline\": \"middle\", \"fill\": \"black\"}, \"encoding\": {\"color\": {\"field\": \"Weight\", \"legend\": null, \"scale\": {\"domain\": [0], \"range\": \"diverging\", \"scheme\": \"blueorange\", \"type\": \"threshold\"}, \"type\": \"quantitative\"}, \"text\": {\"field\": \"Word\", \"type\": \"nominal\"}, \"tooltip\": [{\"field\": \"Word\", \"type\": \"nominal\"}, {\"field\": \"Weight\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"Weight\", \"scale\": {\"domain\": [-1, 1]}, \"type\": \"quantitative\"}, \"y\": {\"axis\": null, \"field\": \"Word\", \"sort\": \"x\", \"type\": \"nominal\"}}, \"title\": \"Importance of individual words\"}], \"data\": {\"name\": \"data-1b001587c028498e70538ed310063e51\"}, \"width\": 300, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-1b001587c028498e70538ed310063e51\": [{\"Word\": \"Iraq\", \"Weight\": -0.9358529031331603}, {\"Word\": \"was\", \"Weight\": -0.0358845002692577}, {\"Word\": \"in\", \"Weight\": -0.017416213388210394}, {\"Word\": \"filmed\", \"Weight\": 0.00802450706528586}, {\"Word\": \"film\", \"Weight\": 0.0077573875142285895}, {\"Word\": \"This\", \"Weight\": 0.0031263867499817305}]}}, {\"mode\": \"vega-lite\"});\n",
289
- "</script>"
290
- ],
291
- "text/plain": [
292
- "alt.LayerChart(...)"
293
- ]
294
- },
295
- "execution_count": 9,
296
- "metadata": {},
297
- "output_type": "execute_result"
298
- }
299
- ],
300
- "source": [
301
- "single_nearest = alt.selection_single(on='mouseover', nearest=True)\n",
302
- "viz = alt.Chart(limedf).encode(\n",
303
- " alt.X('Weight:Q', scale=alt.Scale(domain=(-1, 1))),\n",
304
- " alt.Y('Word:N', sort='x', axis=None),\n",
305
- " color=alt.Color(\"Weight\", scale=alt.Scale(scheme='blueorange', domain=[0], type=\"threshold\", range='diverging'), legend=None),\n",
306
- " tooltip = (\"Word\",\"Weight\")\n",
307
- ").mark_bar().properties(title =\"Importance of individual words\")\n",
308
- "\n",
309
- "text = viz.mark_text(\n",
310
- " fill=\"black\",\n",
311
- " align='right',\n",
312
- " baseline='middle'\n",
313
- ").encode(\n",
314
- " text='Word:N'\n",
315
- ")\n",
316
- "limeplot = alt.LayerChart(layer=[viz,text], width = 300).configure_axis(grid=False).configure_view(strokeWidth=0)\n",
317
- "limeplot"
318
- ]
319
- },
320
- {
321
- "cell_type": "markdown",
322
- "id": "bf0512b6-336e-4842-9bde-34e03a1ca7c6",
323
- "metadata": {},
324
- "source": [
325
- "### Testing predictions and visualization\n",
326
- "Here I will attempt to import the model from huggingface, generate predictions for each of the sentences, and then visualize those predictions into a dot plot. If I can get this to work then I will move on to testing a full pipeline for letting the user pick which part of the sentence they wish to generate counterfactuals for."
327
- ]
328
- },
329
- {
330
- "cell_type": "code",
331
- "execution_count": 10,
332
- "id": "74c639bb-e74a-4a46-8047-3552265ae6a4",
333
- "metadata": {},
334
- "outputs": [],
335
- "source": [
336
- "#Discovering that there's a pipeline specifically to provide scores. \n",
337
- "#I used it to get a list of lists of dictionaries that I can then manipulate to calculate the proper prediction score.\n",
338
- "pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)"
339
- ]
340
- },
341
- {
342
- "cell_type": "code",
343
- "execution_count": 11,
344
- "id": "8726a284-99bd-47f1-9756-1c3ae603db10",
345
- "metadata": {},
346
- "outputs": [],
347
- "source": [
348
- "def eval_pred(text):\n",
349
- " '''A basic function for evaluating the prediction from the model and turning it into a visualization friendly number.'''\n",
350
- " preds = pipe(text)\n",
351
- " neg_score = preds[0][0]['score']\n",
352
- " pos_score = preds[0][1]['score']\n",
353
- " if pos_score >= neg_score:\n",
354
- " return pos_score\n",
355
- " if neg_score >= pos_score:\n",
356
- " return -1 * neg_score"
357
- ]
358
- },
359
- {
360
- "cell_type": "code",
361
- "execution_count": 12,
362
- "id": "f38f5061-f30a-4c81-9465-37951c3ad9f4",
363
- "metadata": {},
364
- "outputs": [],
365
- "source": [
366
- "def eval_pred_test(text, return_all = False):\n",
367
- " '''A basic function for evaluating the prediction from the model and turning it into a visualization friendly number.'''\n",
368
- " preds = pipe(text)\n",
369
- " neg_score = -1 * preds[0][0]['score']\n",
370
- " sent_neg = preds[0][0]['label']\n",
371
- " pos_score = preds[0][1]['score']\n",
372
- " sent_pos = preds[0][1]['label']\n",
373
- " prediction = 0\n",
374
- " sentiment = ''\n",
375
- " if pos_score > abs(neg_score):\n",
376
- " prediction = pos_score\n",
377
- " sentiment = sent_pos\n",
378
- " elif abs(neg_score) > pos_score:\n",
379
- " prediction = neg_score\n",
380
- " sentiment = sent_neg\n",
381
- " \n",
382
- " if return_all:\n",
383
- " return prediction, sentiment\n",
384
- " else:\n",
385
- " return prediction"
386
- ]
387
- },
388
- {
389
- "cell_type": "markdown",
390
- "id": "8b349a87-fe83-4045-a63a-d054489bb461",
391
- "metadata": {},
392
- "source": [
393
- "## Load the dummy countries I created to test generating counterfactuals\n",
394
- "I decided to test the pipeline with a known problem space. Taking the text from Aurélien Géron's observations in twitter, I built a built a small scale test using the learnings I had to prove that we can identify a particular part of speech, use it to generate counterfactuals, and then build a visualization off it."
395
- ]
396
- },
397
- {
398
- "cell_type": "code",
399
- "execution_count": 13,
400
- "id": "46ab3332-964c-449f-8cef-a9ff7df397a4",
401
- "metadata": {},
402
- "outputs": [
403
- {
404
- "data": {
405
- "text/html": [
406
- "<div>\n",
407
- "<style scoped>\n",
408
- " .dataframe tbody tr th:only-of-type {\n",
409
- " vertical-align: middle;\n",
410
- " }\n",
411
- "\n",
412
- " .dataframe tbody tr th {\n",
413
- " vertical-align: top;\n",
414
- " }\n",
415
- "\n",
416
- " .dataframe thead th {\n",
417
- " text-align: right;\n",
418
- " }\n",
419
- "</style>\n",
420
- "<table border=\"1\" class=\"dataframe\">\n",
421
- " <thead>\n",
422
- " <tr style=\"text-align: right;\">\n",
423
- " <th></th>\n",
424
- " <th>Country</th>\n",
425
- " <th>Continent</th>\n",
426
- " </tr>\n",
427
- " </thead>\n",
428
- " <tbody>\n",
429
- " <tr>\n",
430
- " <th>0</th>\n",
431
- " <td>Algeria</td>\n",
432
- " <td>Africa</td>\n",
433
- " </tr>\n",
434
- " <tr>\n",
435
- " <th>1</th>\n",
436
- " <td>Angola</td>\n",
437
- " <td>Africa</td>\n",
438
- " </tr>\n",
439
- " <tr>\n",
440
- " <th>2</th>\n",
441
- " <td>Benin</td>\n",
442
- " <td>Africa</td>\n",
443
- " </tr>\n",
444
- " <tr>\n",
445
- " <th>3</th>\n",
446
- " <td>Botswana</td>\n",
447
- " <td>Africa</td>\n",
448
- " </tr>\n",
449
- " <tr>\n",
450
- " <th>4</th>\n",
451
- " <td>Burkina</td>\n",
452
- " <td>Africa</td>\n",
453
- " </tr>\n",
454
- " </tbody>\n",
455
- "</table>\n",
456
- "</div>"
457
- ],
458
- "text/plain": [
459
- " Country Continent\n",
460
- "0 Algeria Africa\n",
461
- "1 Angola Africa\n",
462
- "2 Benin Africa\n",
463
- "3 Botswana Africa\n",
464
- "4 Burkina Africa"
465
- ]
466
- },
467
- "execution_count": 13,
468
- "metadata": {},
469
- "output_type": "execute_result"
470
- }
471
- ],
472
- "source": [
473
- "#load my test data from https://github.com/dbouquin/IS_608/blob/master/NanosatDB_munging/Countries-Continents.csv\n",
474
- "df = pd.read_csv(\"Assets/Countries/countries.csv\")\n",
475
- "df.head()"
476
- ]
477
- },
478
- {
479
- "cell_type": "code",
480
- "execution_count": 14,
481
- "id": "51c75894-80af-4625-8ce8-660e500b496b",
482
- "metadata": {},
483
- "outputs": [
484
- {
485
- "name": "stdout",
486
- "output_type": "stream",
487
- "text": [
488
- "From This film, ['film'] added to pos_options due to wildcard.\n",
489
- "From Iraq, ['Iraq'] and ['film', 'Iraq'] added to pos_options due to entity recognition.\n",
490
- "['film', 'Iraq']\n"
491
- ]
492
- },
493
- {
494
- "data": {
495
- "text/plain": [
496
- "'Iraq'"
497
- ]
498
- },
499
- "execution_count": 14,
500
- "metadata": {},
501
- "output_type": "execute_result"
502
- }
503
- ],
504
- "source": [
505
- "#Note: we will need to build the function that lets the user choose from the options available. For now I have hard coded it as \"selection\", from \"user_options\".\n",
506
- "user_options = select_crit(doc4)\n",
507
- "print(user_options)\n",
508
- "selection = user_options[1]\n",
509
- "selection"
510
- ]
511
- },
512
- {
513
- "cell_type": "code",
514
- "execution_count": 15,
515
- "id": "3d6419f1-bf7d-44bc-afb8-ac26ef9002df",
516
- "metadata": {},
517
- "outputs": [
518
- {
519
- "data": {
520
- "text/html": [
521
- "<div>\n",
522
- "<style scoped>\n",
523
- " .dataframe tbody tr th:only-of-type {\n",
524
- " vertical-align: middle;\n",
525
- " }\n",
526
- "\n",
527
- " .dataframe tbody tr th {\n",
528
- " vertical-align: top;\n",
529
- " }\n",
530
- "\n",
531
- " .dataframe thead th {\n",
532
- " text-align: right;\n",
533
- " }\n",
534
- "</style>\n",
535
- "<table border=\"1\" class=\"dataframe\">\n",
536
- " <thead>\n",
537
- " <tr style=\"text-align: right;\">\n",
538
- " <th></th>\n",
539
- " <th>Country</th>\n",
540
- " <th>Continent</th>\n",
541
- " <th>text</th>\n",
542
- " <th>prediction</th>\n",
543
- " <th>seed</th>\n",
544
- " </tr>\n",
545
- " </thead>\n",
546
- " <tbody>\n",
547
- " <tr>\n",
548
- " <th>0</th>\n",
549
- " <td>Algeria</td>\n",
550
- " <td>Africa</td>\n",
551
- " <td>This film was filmed in Algeria.</td>\n",
552
- " <td>0.806454</td>\n",
553
- " <td>alternative</td>\n",
554
- " </tr>\n",
555
- " <tr>\n",
556
- " <th>1</th>\n",
557
- " <td>Angola</td>\n",
558
- " <td>Africa</td>\n",
559
- " <td>This film was filmed in Angola.</td>\n",
560
- " <td>-0.775854</td>\n",
561
- " <td>alternative</td>\n",
562
- " </tr>\n",
563
- " <tr>\n",
564
- " <th>2</th>\n",
565
- " <td>Benin</td>\n",
566
- " <td>Africa</td>\n",
567
- " <td>This film was filmed in Benin.</td>\n",
568
- " <td>0.962272</td>\n",
569
- " <td>alternative</td>\n",
570
- " </tr>\n",
571
- " <tr>\n",
572
- " <th>3</th>\n",
573
- " <td>Botswana</td>\n",
574
- " <td>Africa</td>\n",
575
- " <td>This film was filmed in Botswana.</td>\n",
576
- " <td>0.785837</td>\n",
577
- " <td>alternative</td>\n",
578
- " </tr>\n",
579
- " <tr>\n",
580
- " <th>4</th>\n",
581
- " <td>Burkina</td>\n",
582
- " <td>Africa</td>\n",
583
- " <td>This film was filmed in Burkina.</td>\n",
584
- " <td>0.872980</td>\n",
585
- " <td>alternative</td>\n",
586
- " </tr>\n",
587
- " </tbody>\n",
588
- "</table>\n",
589
- "</div>"
590
- ],
591
- "text/plain": [
592
- " Country Continent text prediction \\\n",
593
- "0 Algeria Africa This film was filmed in Algeria. 0.806454 \n",
594
- "1 Angola Africa This film was filmed in Angola. -0.775854 \n",
595
- "2 Benin Africa This film was filmed in Benin. 0.962272 \n",
596
- "3 Botswana Africa This film was filmed in Botswana. 0.785837 \n",
597
- "4 Burkina Africa This film was filmed in Burkina. 0.872980 \n",
598
- "\n",
599
- " seed \n",
600
- "0 alternative \n",
601
- "1 alternative \n",
602
- "2 alternative \n",
603
- "3 alternative \n",
604
- "4 alternative "
605
- ]
606
- },
607
- "execution_count": 15,
608
- "metadata": {},
609
- "output_type": "execute_result"
610
- }
611
- ],
612
- "source": [
613
- "#Create a function that generates the counterfactuals within a data frame.\n",
614
- "def gen_cf_country(df,document,selection):\n",
615
- " df['text'] = df.Country.apply(lambda x: document.text.replace(selection,x))\n",
616
- " df['prediction'] = df.text.apply(eval_pred_test)\n",
617
- " #added this because I think it will make the end results better if we ensure the seed is in the data we generate counterfactuals from.\n",
618
- " df['seed'] = df.Country.apply(lambda x: 'seed' if x == selection else 'alternative')\n",
619
- " return df\n",
620
- "\n",
621
- "df = gen_cf_country(df,doc4,selection)\n",
622
- "df.head()"
623
- ]
624
- },
625
- {
626
- "cell_type": "code",
627
- "execution_count": 16,
628
- "id": "ecb9dd41-2fab-49bd-bae5-30300ce39e41",
629
- "metadata": {},
630
- "outputs": [
631
- {
632
- "data": {
633
- "text/html": [
634
- "\n",
635
- "<div id=\"altair-viz-b04081e2f48148ebbc743fff61e76f2f\"></div>\n",
636
- "<script type=\"text/javascript\">\n",
637
- " var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
638
- " (function(spec, embedOpt){\n",
639
- " let outputDiv = document.currentScript.previousElementSibling;\n",
640
- " if (outputDiv.id !== \"altair-viz-b04081e2f48148ebbc743fff61e76f2f\") {\n",
641
- " outputDiv = document.getElementById(\"altair-viz-b04081e2f48148ebbc743fff61e76f2f\");\n",
642
- " }\n",
643
- " const paths = {\n",
644
- " \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
645
- " \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
646
- " \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
647
- " \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
648
- " };\n",
649
- "\n",
650
- " function maybeLoadScript(lib, version) {\n",
651
- " var key = `${lib.replace(\"-\", \"\")}_version`;\n",
652
- " return (VEGA_DEBUG[key] == version) ?\n",
653
- " Promise.resolve(paths[lib]) :\n",
654
- " new Promise(function(resolve, reject) {\n",
655
- " var s = document.createElement('script');\n",
656
- " document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
657
- " s.async = true;\n",
658
- " s.onload = () => {\n",
659
- " VEGA_DEBUG[key] = version;\n",
660
- " return resolve(paths[lib]);\n",
661
- " };\n",
662
- " s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
663
- " s.src = paths[lib];\n",
664
- " });\n",
665
- " }\n",
666
- "\n",
667
- " function showError(err) {\n",
668
- " outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
669
- " throw err;\n",
670
- " }\n",
671
- "\n",
672
- " function displayChart(vegaEmbed) {\n",
673
- " vegaEmbed(outputDiv, spec, embedOpt)\n",
674
- " .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
675
- " }\n",
676
- "\n",
677
- " if(typeof define === \"function\" && define.amd) {\n",
678
- " requirejs.config({paths});\n",
679
- " require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
680
- " } else {\n",
681
- " maybeLoadScript(\"vega\", \"5\")\n",
682
- " .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
683
- " .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
684
- " .catch(showError)\n",
685
- " .then(() => displayChart(vegaEmbed));\n",
686
- " }\n",
687
- " })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-d6144c20ed1c104065f226d393d7e424\"}, \"mark\": {\"type\": \"circle\", \"opacity\": 0.5}, \"encoding\": {\"color\": {\"field\": \"seed\", \"legend\": {\"title\": \"Seed or Alternative\"}, \"type\": \"nominal\"}, \"size\": {\"field\": \"seed\", \"type\": \"nominal\"}, \"tooltip\": [{\"field\": \"Country\", \"type\": \"nominal\"}, {\"field\": \"prediction\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"Continent\", \"type\": \"nominal\"}, \"y\": {\"field\": \"prediction\", \"type\": \"quantitative\"}}, \"selection\": {\"selector002\": {\"type\": \"single\", \"on\": \"mouseover\", \"nearest\": true}}, \"width\": 300, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-d6144c20ed1c104065f226d393d7e424\": [{\"Country\": \"Algeria\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Algeria.\", \"prediction\": 0.8064541816711426, \"seed\": \"alternative\"}, {\"Country\": \"Angola\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Angola.\", \"prediction\": -0.7758541703224182, \"seed\": \"alternative\"}, {\"Country\": \"Benin\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Benin.\", \"prediction\": 0.9622722268104553, \"seed\": \"alternative\"}, {\"Country\": \"Botswana\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Botswana.\", \"prediction\": 0.7858365774154663, \"seed\": \"alternative\"}, {\"Country\": \"Burkina\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Burkina.\", \"prediction\": 0.8729804754257202, \"seed\": \"alternative\"}, {\"Country\": \"Burundi\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Burundi.\", \"prediction\": -0.6306232810020447, \"seed\": \"alternative\"}, {\"Country\": \"Cameroon\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Cameroon.\", \"prediction\": 0.5283073782920837, \"seed\": \"alternative\"}, {\"Country\": \"Cape Verde\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Cape Verde.\", \"prediction\": 0.8932027220726013, \"seed\": \"alternative\"}, {\"Country\": \"Central African Republic\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Central African Republic.\", \"prediction\": 0.9326885342597961, \"seed\": \"alternative\"}, {\"Country\": \"Chad\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Chad.\", \"prediction\": 0.788737952709198, \"seed\": \"alternative\"}, {\"Country\": \"Comoros\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Comoros.\", \"prediction\": 0.9623100757598877, \"seed\": \"alternative\"}, {\"Country\": \"Congo\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Congo.\", \"prediction\": 0.6309685707092285, \"seed\": \"alternative\"}, {\"Country\": \"Congo, Democratic Republic of\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Congo, Democratic Republic of.\", \"prediction\": -0.54060298204422, \"seed\": \"alternative\"}, {\"Country\": \"Djibouti\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Djibouti.\", \"prediction\": 0.8894529938697815, \"seed\": \"alternative\"}, {\"Country\": \"Egypt\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Egypt.\", \"prediction\": 0.9648140072822571, \"seed\": \"alternative\"}, {\"Country\": \"Equatorial Guinea\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Equatorial Guinea.\", \"prediction\": 0.6021467447280884, \"seed\": \"alternative\"}, {\"Country\": \"Eritrea\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Eritrea.\", \"prediction\": 0.5404142141342163, \"seed\": \"alternative\"}, {\"Country\": \"Ethiopia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Ethiopia.\", \"prediction\": 0.7997546195983887, \"seed\": \"alternative\"}, {\"Country\": \"Gabon\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Gabon.\", \"prediction\": -0.8517823219299316, \"seed\": \"alternative\"}, {\"Country\": \"Gambia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Gambia.\", \"prediction\": -0.5401656031608582, \"seed\": \"alternative\"}, {\"Country\": \"Ghana\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Ghana.\", \"prediction\": 0.9684805870056152, \"seed\": \"alternative\"}, {\"Country\": \"Guinea\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Guinea.\", \"prediction\": 0.6188081502914429, \"seed\": \"alternative\"}, {\"Country\": \"Guinea-Bissau\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Guinea-Bissau.\", \"prediction\": -0.500963032245636, \"seed\": \"alternative\"}, {\"Country\": \"Ivory Coast\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Ivory Coast.\", \"prediction\": 0.9872506856918335, \"seed\": \"alternative\"}, {\"Country\": \"Kenya\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Kenya.\", \"prediction\": 0.9789031744003296, \"seed\": \"alternative\"}, {\"Country\": \"Lesotho\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Lesotho.\", \"prediction\": 0.6674107313156128, \"seed\": \"alternative\"}, {\"Country\": \"Liberia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Liberia.\", \"prediction\": -0.6720185279846191, \"seed\": \"alternative\"}, {\"Country\": \"Libya\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Libya.\", \"prediction\": 0.53217613697052, \"seed\": \"alternative\"}, {\"Country\": \"Madagascar\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Madagascar.\", \"prediction\": 0.9730344414710999, \"seed\": \"alternative\"}, {\"Country\": \"Malawi\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Malawi.\", \"prediction\": -0.7816339135169983, \"seed\": \"alternative\"}, {\"Country\": \"Mali\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Mali.\", \"prediction\": -0.6651991009712219, \"seed\": \"alternative\"}, {\"Country\": \"Mauritania\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Mauritania.\", \"prediction\": 0.6149344444274902, \"seed\": \"alternative\"}, {\"Country\": \"Mauritius\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Mauritius.\", \"prediction\": 0.9310740828514099, \"seed\": \"alternative\"}, {\"Country\": \"Morocco\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Morocco.\", \"prediction\": 0.9121577143669128, \"seed\": \"alternative\"}, {\"Country\": \"Mozambique\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Mozambique.\", \"prediction\": -0.7047757506370544, \"seed\": \"alternative\"}, {\"Country\": \"Namibia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Namibia.\", \"prediction\": -0.5836523175239563, \"seed\": \"alternative\"}, {\"Country\": \"Niger\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Niger.\", \"prediction\": -0.6313472390174866, \"seed\": \"alternative\"}, {\"Country\": \"Nigeria\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Nigeria.\", \"prediction\": 0.7361583113670349, \"seed\": \"alternative\"}, {\"Country\": \"Rwanda\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Rwanda.\", \"prediction\": -0.7642565965652466, \"seed\": \"alternative\"}, {\"Country\": \"Sao Tome and Principe\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Sao Tome and Principe.\", \"prediction\": 0.6587044596672058, \"seed\": \"alternative\"}, {\"Country\": \"Senegal\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Senegal.\", \"prediction\": 0.8155898451805115, \"seed\": \"alternative\"}, {\"Country\": \"Seychelles\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Seychelles.\", \"prediction\": 0.8802894949913025, \"seed\": \"alternative\"}, {\"Country\": \"Sierra Leone\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Sierra Leone.\", \"prediction\": 0.9483919143676758, \"seed\": \"alternative\"}, {\"Country\": \"Somalia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Somalia.\", \"prediction\": -0.6477505564689636, \"seed\": \"alternative\"}, {\"Country\": \"South Africa\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in South Africa.\", \"prediction\": 0.5048943161964417, \"seed\": \"alternative\"}, {\"Country\": \"South Sudan\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in South Sudan.\", \"prediction\": -0.8506219983100891, \"seed\": \"alternative\"}, {\"Country\": \"Sudan\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Sudan.\", \"prediction\": -0.8910807967185974, \"seed\": \"alternative\"}, {\"Country\": \"Swaziland\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Swaziland.\", \"prediction\": 0.7761040925979614, \"seed\": \"alternative\"}, {\"Country\": \"Tanzania\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Tanzania.\", \"prediction\": 0.669053316116333, \"seed\": \"alternative\"}, {\"Country\": \"Togo\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Togo.\", \"prediction\": 0.9404287934303284, \"seed\": \"alternative\"}, {\"Country\": \"Tunisia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Tunisia.\", \"prediction\": 0.8345948457717896, \"seed\": \"alternative\"}, {\"Country\": \"Uganda\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Uganda.\", \"prediction\": 0.7823328971862793, \"seed\": \"alternative\"}, {\"Country\": \"Zambia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Zambia.\", \"prediction\": -0.6479448080062866, \"seed\": \"alternative\"}, {\"Country\": \"Zimbabwe\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Zimbabwe.\", \"prediction\": 0.7163158059120178, \"seed\": \"alternative\"}, {\"Country\": \"Afghanistan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Afghanistan.\", \"prediction\": -0.8350331783294678, \"seed\": \"alternative\"}, {\"Country\": \"Bahrain\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Bahrain.\", \"prediction\": 0.9627965092658997, \"seed\": \"alternative\"}, {\"Country\": \"Bangladesh\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Bangladesh.\", \"prediction\": 0.6659616231918335, \"seed\": \"alternative\"}, {\"Country\": \"Bhutan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Bhutan.\", \"prediction\": 0.9108285307884216, \"seed\": \"alternative\"}, {\"Country\": \"Brunei\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Brunei.\", \"prediction\": 0.7673805952072144, \"seed\": \"alternative\"}, {\"Country\": \"Burma (Myanmar)\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Burma (Myanmar).\", \"prediction\": 0.5261574387550354, \"seed\": \"alternative\"}, {\"Country\": \"Cambodia\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Cambodia.\", \"prediction\": 0.9706045389175415, \"seed\": \"alternative\"}, {\"Country\": \"China\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in China.\", \"prediction\": 0.6985915303230286, \"seed\": \"alternative\"}, {\"Country\": \"East Timor\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in East Timor.\", \"prediction\": -0.7553014159202576, \"seed\": \"alternative\"}, {\"Country\": \"India\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in India.\", \"prediction\": 0.9856906533241272, \"seed\": \"alternative\"}, {\"Country\": \"Indonesia\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Indonesia.\", \"prediction\": 0.9617947936058044, \"seed\": \"alternative\"}, {\"Country\": \"Iran\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Iran.\", \"prediction\": 0.935718834400177, \"seed\": \"alternative\"}, {\"Country\": \"Iraq\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Iraq.\", \"prediction\": -0.9768388867378235, \"seed\": \"seed\"}, {\"Country\": \"Israel\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Israel.\", \"prediction\": 0.8940765261650085, \"seed\": \"alternative\"}, {\"Country\": \"Japan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Japan.\", \"prediction\": 0.8561221957206726, \"seed\": \"alternative\"}, {\"Country\": \"Jordan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Jordan.\", \"prediction\": 0.5632433891296387, \"seed\": \"alternative\"}, {\"Country\": \"Kazakhstan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Kazakhstan.\", \"prediction\": 0.8813521862030029, \"seed\": \"alternative\"}, {\"Country\": \"Korea, North\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Korea, North.\", \"prediction\": -0.692742645740509, \"seed\": \"alternative\"}, {\"Country\": \"Korea, South\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Korea, South.\", \"prediction\": 0.7591306567192078, \"seed\": \"alternative\"}, {\"Country\": \"Kuwait\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Kuwait.\", \"prediction\": 0.9136238098144531, \"seed\": \"alternative\"}, {\"Country\": \"Kyrgyzstan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Kyrgyzstan.\", \"prediction\": 0.9416173100471497, \"seed\": \"alternative\"}, {\"Country\": \"Laos\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Laos.\", \"prediction\": 0.7455804347991943, \"seed\": \"alternative\"}, {\"Country\": \"Lebanon\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Lebanon.\", \"prediction\": 0.9018603563308716, \"seed\": \"alternative\"}, {\"Country\": \"Malaysia\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Malaysia.\", \"prediction\": 0.9053533673286438, \"seed\": \"alternative\"}, {\"Country\": \"Maldives\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Maldives.\", \"prediction\": 0.8150556087493896, \"seed\": \"alternative\"}, {\"Country\": \"Mongolia\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Mongolia.\", \"prediction\": 0.9706059098243713, \"seed\": \"alternative\"}, {\"Country\": \"Nepal\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Nepal.\", \"prediction\": 0.9837730526924133, \"seed\": \"alternative\"}, {\"Country\": \"Oman\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Oman.\", \"prediction\": 0.8641175627708435, \"seed\": \"alternative\"}, {\"Country\": \"Pakistan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Pakistan.\", \"prediction\": 0.8881147503852844, \"seed\": \"alternative\"}, {\"Country\": \"Philippines\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Philippines.\", \"prediction\": 0.9892238974571228, \"seed\": \"alternative\"}, {\"Country\": \"Qatar\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Qatar.\", \"prediction\": 0.9696690440177917, \"seed\": \"alternative\"}, {\"Country\": \"Russian Federation\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Russian Federation.\", \"prediction\": 0.9777944087982178, \"seed\": \"alternative\"}, {\"Country\": \"Saudi Arabia\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Saudi Arabia.\", \"prediction\": -0.7760475873947144, \"seed\": \"alternative\"}, {\"Country\": \"Singapore\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Singapore.\", \"prediction\": 0.9684174060821533, \"seed\": \"alternative\"}, {\"Country\": \"Sri Lanka\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Sri Lanka.\", \"prediction\": 0.9552921056747437, \"seed\": \"alternative\"}, {\"Country\": \"Syria\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Syria.\", \"prediction\": -0.8887014985084534, \"seed\": \"alternative\"}, {\"Country\": \"Tajikistan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Tajikistan.\", \"prediction\": 0.8012317419052124, \"seed\": \"alternative\"}, {\"Country\": \"Thailand\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Thailand.\", \"prediction\": 0.8334980607032776, \"seed\": \"alternative\"}, {\"Country\": \"Turkey\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Turkey.\", \"prediction\": 0.5693907141685486, \"seed\": \"alternative\"}, {\"Country\": \"Turkmenistan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Turkmenistan.\", \"prediction\": 0.8194981813430786, \"seed\": \"alternative\"}, {\"Country\": \"United Arab Emirates\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in United Arab Emirates.\", \"prediction\": 0.921615719795227, \"seed\": \"alternative\"}, {\"Country\": \"Uzbekistan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Uzbekistan.\", \"prediction\": 0.8483680486679077, \"seed\": \"alternative\"}, {\"Country\": \"Vietnam\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Vietnam.\", \"prediction\": -0.9427406191825867, \"seed\": \"alternative\"}, {\"Country\": \"Yemen\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Yemen.\", \"prediction\": -0.8567103743553162, \"seed\": \"alternative\"}, {\"Country\": \"Albania\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Albania.\", \"prediction\": 0.9874222278594971, \"seed\": \"alternative\"}, {\"Country\": \"Andorra\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Andorra.\", \"prediction\": 0.9597309231758118, \"seed\": \"alternative\"}, {\"Country\": \"Armenia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Armenia.\", \"prediction\": 0.986950695514679, \"seed\": \"alternative\"}, {\"Country\": \"Austria\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Austria.\", \"prediction\": 0.8858200907707214, \"seed\": \"alternative\"}, {\"Country\": \"Azerbaijan\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Azerbaijan.\", \"prediction\": 0.9770861268043518, \"seed\": \"alternative\"}, {\"Country\": \"Belarus\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Belarus.\", \"prediction\": 0.5220555663108826, \"seed\": \"alternative\"}, {\"Country\": \"Belgium\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Belgium.\", \"prediction\": 0.9663146138191223, \"seed\": \"alternative\"}, {\"Country\": \"Bosnia and Herzegovina\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Bosnia and Herzegovina.\", \"prediction\": 0.9699962139129639, \"seed\": \"alternative\"}, {\"Country\": \"Bulgaria\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Bulgaria.\", \"prediction\": 0.8968954086303711, \"seed\": \"alternative\"}, {\"Country\": \"Croatia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Croatia.\", \"prediction\": 0.8545156717300415, \"seed\": \"alternative\"}, {\"Country\": \"Cyprus\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Cyprus.\", \"prediction\": 0.9457007646560669, \"seed\": \"alternative\"}, {\"Country\": \"CZ\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in CZ.\", \"prediction\": -0.9620359539985657, \"seed\": \"alternative\"}, {\"Country\": \"Denmark\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Denmark.\", \"prediction\": 0.9433714747428894, \"seed\": \"alternative\"}, {\"Country\": \"Estonia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Estonia.\", \"prediction\": 0.9754448533058167, \"seed\": \"alternative\"}, {\"Country\": \"Finland\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Finland.\", \"prediction\": 0.9832987189292908, \"seed\": \"alternative\"}, {\"Country\": \"France\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in France.\", \"prediction\": 0.9652075171470642, \"seed\": \"alternative\"}, {\"Country\": \"Georgia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Georgia.\", \"prediction\": 0.9579687714576721, \"seed\": \"alternative\"}, {\"Country\": \"Germany\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Germany.\", \"prediction\": -0.7719752192497253, \"seed\": \"alternative\"}, {\"Country\": \"Greece\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Greece.\", \"prediction\": 0.974821925163269, \"seed\": \"alternative\"}, {\"Country\": \"Hungary\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Hungary.\", \"prediction\": 0.9794204831123352, \"seed\": \"alternative\"}, {\"Country\": \"Iceland\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Iceland.\", \"prediction\": 0.9596456289291382, \"seed\": \"alternative\"}, {\"Country\": \"Ireland\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Ireland.\", \"prediction\": 0.9691770076751709, \"seed\": \"alternative\"}, {\"Country\": \"Italy\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Italy.\", \"prediction\": 0.973678469657898, \"seed\": \"alternative\"}, {\"Country\": \"Latvia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Latvia.\", \"prediction\": 0.9340384006500244, \"seed\": \"alternative\"}, {\"Country\": \"Liechtenstein\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Liechtenstein.\", \"prediction\": 0.9714267253875732, \"seed\": \"alternative\"}, {\"Country\": \"Lithuania\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Lithuania.\", \"prediction\": 0.9562608599662781, \"seed\": \"alternative\"}, {\"Country\": \"Luxembourg\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Luxembourg.\", \"prediction\": 0.9322720170021057, \"seed\": \"alternative\"}, {\"Country\": \"Macedonia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Macedonia.\", \"prediction\": 0.8895869255065918, \"seed\": \"alternative\"}, {\"Country\": \"Malta\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Malta.\", \"prediction\": 0.979903519153595, \"seed\": \"alternative\"}, {\"Country\": \"Moldova\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Moldova.\", \"prediction\": 0.8919235467910767, \"seed\": \"alternative\"}, {\"Country\": \"Monaco\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Monaco.\", \"prediction\": 0.9971835017204285, \"seed\": \"alternative\"}, {\"Country\": \"Montenegro\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Montenegro.\", \"prediction\": 0.9382426738739014, \"seed\": \"alternative\"}, {\"Country\": \"Netherlands\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Netherlands.\", \"prediction\": 0.9562605023384094, \"seed\": \"alternative\"}, {\"Country\": \"Norway\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Norway.\", \"prediction\": 0.9528943300247192, \"seed\": \"alternative\"}, {\"Country\": \"Poland\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Poland.\", \"prediction\": 0.9124379754066467, \"seed\": \"alternative\"}, {\"Country\": \"Portugal\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Portugal.\", \"prediction\": 0.9363807439804077, \"seed\": \"alternative\"}, {\"Country\": \"Romania\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Romania.\", \"prediction\": 0.982775866985321, \"seed\": \"alternative\"}, {\"Country\": \"San Marino\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in San Marino.\", \"prediction\": 0.924018144607544, \"seed\": \"alternative\"}, {\"Country\": \"Serbia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Serbia.\", \"prediction\": 0.740748405456543, \"seed\": \"alternative\"}, {\"Country\": \"Slovakia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Slovakia.\", \"prediction\": 0.5953425168991089, \"seed\": \"alternative\"}, {\"Country\": \"Slovenia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Slovenia.\", \"prediction\": 0.8840153217315674, \"seed\": \"alternative\"}, {\"Country\": \"Spain\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Spain.\", \"prediction\": 0.9535741209983826, \"seed\": \"alternative\"}, {\"Country\": \"Sweden\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Sweden.\", \"prediction\": 0.9694980382919312, \"seed\": \"alternative\"}, {\"Country\": \"Switzerland\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Switzerland.\", \"prediction\": 0.7584144473075867, \"seed\": \"alternative\"}, {\"Country\": \"Ukraine\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Ukraine.\", \"prediction\": 0.7340573668479919, \"seed\": \"alternative\"}, {\"Country\": \"United Kingdom\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in United Kingdom.\", \"prediction\": 0.8982904553413391, \"seed\": \"alternative\"}, {\"Country\": \"Vatican City\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Vatican City.\", \"prediction\": 0.7796335816383362, \"seed\": \"alternative\"}, {\"Country\": \"Antigua and Barbuda\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Antigua and Barbuda.\", \"prediction\": 0.9056354761123657, \"seed\": \"alternative\"}, {\"Country\": \"Bahamas\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Bahamas.\", \"prediction\": 0.9206929802894592, \"seed\": \"alternative\"}, {\"Country\": \"Barbados\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Barbados.\", \"prediction\": 0.9170283079147339, \"seed\": \"alternative\"}, {\"Country\": \"Belize\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Belize.\", \"prediction\": 0.9203323125839233, \"seed\": \"alternative\"}, {\"Country\": \"Canada\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Canada.\", \"prediction\": 0.9400970339775085, \"seed\": \"alternative\"}, {\"Country\": \"Costa Rica\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Costa Rica.\", \"prediction\": 0.9815211892127991, \"seed\": \"alternative\"}, {\"Country\": \"Cuba\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Cuba.\", \"prediction\": 0.7347409725189209, \"seed\": \"alternative\"}, {\"Country\": \"Dominica\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Dominica.\", \"prediction\": 0.5335615277290344, \"seed\": \"alternative\"}, {\"Country\": \"Dominican Republic\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Dominican Republic.\", \"prediction\": 0.9594704508781433, \"seed\": \"alternative\"}, {\"Country\": \"El Salvador\", \"Continent\": \"North America\", \"text\": \"This film was filmed in El Salvador.\", \"prediction\": 0.9804539084434509, \"seed\": \"alternative\"}, {\"Country\": \"Grenada\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Grenada.\", \"prediction\": 0.6266372799873352, \"seed\": \"alternative\"}, {\"Country\": \"Guatemala\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Guatemala.\", \"prediction\": 0.7368012070655823, \"seed\": \"alternative\"}, {\"Country\": \"Haiti\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Haiti.\", \"prediction\": 0.9208669662475586, \"seed\": \"alternative\"}, {\"Country\": \"Honduras\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Honduras.\", \"prediction\": 0.7440645098686218, \"seed\": \"alternative\"}, {\"Country\": \"Jamaica\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Jamaica.\", \"prediction\": 0.8702073097229004, \"seed\": \"alternative\"}, {\"Country\": \"Mexico\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Mexico.\", \"prediction\": 0.9770798683166504, \"seed\": \"alternative\"}, {\"Country\": \"Nicaragua\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Nicaragua.\", \"prediction\": -0.6681438684463501, \"seed\": \"alternative\"}, {\"Country\": \"Panama\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Panama.\", \"prediction\": 0.737115740776062, \"seed\": \"alternative\"}, {\"Country\": \"Saint Kitts and Nevis\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Saint Kitts and Nevis.\", \"prediction\": 0.9829047918319702, \"seed\": \"alternative\"}, {\"Country\": \"Saint Lucia\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Saint Lucia.\", \"prediction\": 0.7933508157730103, \"seed\": \"alternative\"}, {\"Country\": \"Saint Vincent and the Grenadines\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Saint Vincent and the Grenadines.\", \"prediction\": 0.8782792091369629, \"seed\": \"alternative\"}, {\"Country\": \"Trinidad and Tobago\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Trinidad and Tobago.\", \"prediction\": 0.9884806871414185, \"seed\": \"alternative\"}, {\"Country\": \"US\", \"Continent\": \"North America\", \"text\": \"This film was filmed in US.\", \"prediction\": 0.926520586013794, \"seed\": \"alternative\"}, {\"Country\": \"Australia\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Australia.\", \"prediction\": 0.9371141195297241, \"seed\": \"alternative\"}, {\"Country\": \"Fiji\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Fiji.\", \"prediction\": 0.9061108827590942, \"seed\": \"alternative\"}, {\"Country\": \"Kiribati\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Kiribati.\", \"prediction\": 0.9559115767478943, \"seed\": \"alternative\"}, {\"Country\": \"Marshall Islands\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Marshall Islands.\", \"prediction\": 0.96001136302948, \"seed\": \"alternative\"}, {\"Country\": \"Micronesia\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Micronesia.\", \"prediction\": -0.57024085521698, \"seed\": \"alternative\"}, {\"Country\": \"Nauru\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Nauru.\", \"prediction\": 0.9323841333389282, \"seed\": \"alternative\"}, {\"Country\": \"New Zealand\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in New Zealand.\", \"prediction\": 0.9654895663261414, \"seed\": \"alternative\"}, {\"Country\": \"Palau\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Palau.\", \"prediction\": 0.7104437351226807, \"seed\": \"alternative\"}, {\"Country\": \"Papua New Guinea\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Papua New Guinea.\", \"prediction\": 0.5819137692451477, \"seed\": \"alternative\"}, {\"Country\": \"Samoa\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Samoa.\", \"prediction\": 0.9161322712898254, \"seed\": \"alternative\"}, {\"Country\": \"Solomon Islands\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Solomon Islands.\", \"prediction\": 0.9441730976104736, \"seed\": \"alternative\"}, {\"Country\": \"Tonga\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Tonga.\", \"prediction\": 0.550994873046875, \"seed\": \"alternative\"}, {\"Country\": \"Tuvalu\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Tuvalu.\", \"prediction\": 0.9912257790565491, \"seed\": \"alternative\"}, {\"Country\": \"Vanuatu\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Vanuatu.\", \"prediction\": 0.9395317435264587, \"seed\": \"alternative\"}, {\"Country\": \"Argentina\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Argentina.\", \"prediction\": 0.9719653129577637, \"seed\": \"alternative\"}, {\"Country\": \"Bolivia\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Bolivia.\", \"prediction\": 0.8009489178657532, \"seed\": \"alternative\"}, {\"Country\": \"Brazil\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Brazil.\", \"prediction\": 0.968963086605072, \"seed\": \"alternative\"}, {\"Country\": \"Chile\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Chile.\", \"prediction\": 0.8917940258979797, \"seed\": \"alternative\"}, {\"Country\": \"Colombia\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Colombia.\", \"prediction\": 0.731931746006012, \"seed\": \"alternative\"}, {\"Country\": \"Ecuador\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Ecuador.\", \"prediction\": 0.845059335231781, \"seed\": \"alternative\"}, {\"Country\": \"Guyana\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Guyana.\", \"prediction\": 0.6705957055091858, \"seed\": \"alternative\"}, {\"Country\": \"Paraguay\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Paraguay.\", \"prediction\": 0.6165609359741211, \"seed\": \"alternative\"}, {\"Country\": \"Peru\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Peru.\", \"prediction\": 0.7860054969787598, \"seed\": \"alternative\"}, {\"Country\": \"Suriname\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Suriname.\", \"prediction\": 0.9488070607185364, \"seed\": \"alternative\"}, {\"Country\": \"Uruguay\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Uruguay.\", \"prediction\": 0.744226336479187, \"seed\": \"alternative\"}, {\"Country\": \"Venezuela\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Venezuela.\", \"prediction\": 0.8343830108642578, \"seed\": \"alternative\"}]}}, {\"mode\": \"vega-lite\"});\n",
688
- "</script>"
689
- ],
690
- "text/plain": [
691
- "alt.Chart(...)"
692
- ]
693
- },
694
- "execution_count": 16,
695
- "metadata": {},
696
- "output_type": "execute_result"
697
- }
698
- ],
699
- "source": [
700
- "single_nearest = alt.selection_single(on='mouseover', nearest=True)\n",
701
- "full = alt.Chart(df).encode(\n",
702
- " alt.X('Continent:N'), # specify nominal data\n",
703
- " alt.Y('prediction:Q'), # specify quantitative data\n",
704
- " color=alt.Color('seed:N', legend=alt.Legend(title=\"Seed or Alternative\")),\n",
705
- " size='seed:N',\n",
706
- " tooltip=('Country','prediction')\n",
707
- ").mark_circle(opacity=.5).properties(width=300).add_selection(single_nearest)\n",
708
- "\n",
709
- "full"
710
- ]
711
- },
712
- {
713
- "cell_type": "code",
714
- "execution_count": 17,
715
- "id": "56bc30d7-03a5-43ff-9dfe-878197628305",
716
- "metadata": {},
717
- "outputs": [],
718
- "source": [
719
- "df2 = df.nlargest(5, 'prediction')\n",
720
- "df3 = df.nsmallest(5, 'prediction')\n",
721
- "frames = [df2,df3]\n",
722
- "results = pd.concat(frames)"
723
- ]
724
- },
725
- {
726
- "cell_type": "code",
727
- "execution_count": 18,
728
- "id": "1610bb48-c9b9-4bee-bcb5-999886acb9e3",
729
- "metadata": {},
730
- "outputs": [
731
- {
732
- "data": {
733
- "text/html": [
734
- "\n",
735
- "<div id=\"altair-viz-948f4471f5ee4ed8bb2720ca7dd085a7\"></div>\n",
736
- "<script type=\"text/javascript\">\n",
737
- " var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
738
- " (function(spec, embedOpt){\n",
739
- " let outputDiv = document.currentScript.previousElementSibling;\n",
740
- " if (outputDiv.id !== \"altair-viz-948f4471f5ee4ed8bb2720ca7dd085a7\") {\n",
741
- " outputDiv = document.getElementById(\"altair-viz-948f4471f5ee4ed8bb2720ca7dd085a7\");\n",
742
- " }\n",
743
- " const paths = {\n",
744
- " \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
745
- " \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
746
- " \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
747
- " \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
748
- " };\n",
749
- "\n",
750
- " function maybeLoadScript(lib, version) {\n",
751
- " var key = `${lib.replace(\"-\", \"\")}_version`;\n",
752
- " return (VEGA_DEBUG[key] == version) ?\n",
753
- " Promise.resolve(paths[lib]) :\n",
754
- " new Promise(function(resolve, reject) {\n",
755
- " var s = document.createElement('script');\n",
756
- " document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
757
- " s.async = true;\n",
758
- " s.onload = () => {\n",
759
- " VEGA_DEBUG[key] = version;\n",
760
- " return resolve(paths[lib]);\n",
761
- " };\n",
762
- " s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
763
- " s.src = paths[lib];\n",
764
- " });\n",
765
- " }\n",
766
- "\n",
767
- " function showError(err) {\n",
768
- " outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
769
- " throw err;\n",
770
- " }\n",
771
- "\n",
772
- " function displayChart(vegaEmbed) {\n",
773
- " vegaEmbed(outputDiv, spec, embedOpt)\n",
774
- " .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
775
- " }\n",
776
- "\n",
777
- " if(typeof define === \"function\" && define.amd) {\n",
778
- " requirejs.config({paths});\n",
779
- " require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
780
- " } else {\n",
781
- " maybeLoadScript(\"vega\", \"5\")\n",
782
- " .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
783
- " .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
784
- " .catch(showError)\n",
785
- " .then(() => displayChart(vegaEmbed));\n",
786
- " }\n",
787
- " })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-09f850c452d77d8e274c73526803ae5c\"}, \"mark\": \"circle\", \"encoding\": {\"color\": {\"field\": \"seed\", \"legend\": {\"title\": \"Seed or Alternative\"}, \"type\": \"nominal\"}, \"size\": {\"field\": \"seed\", \"type\": \"nominal\"}, \"tooltip\": [{\"field\": \"Country\", \"type\": \"nominal\"}, {\"field\": \"prediction\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"prediction\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Country\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"selection\": {\"selector002\": {\"type\": \"single\", \"on\": \"mouseover\", \"nearest\": true}}, \"width\": 300, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-09f850c452d77d8e274c73526803ae5c\": [{\"Country\": \"Monaco\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Monaco.\", \"prediction\": 0.9971835017204285, \"seed\": \"alternative\"}, {\"Country\": \"Tuvalu\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Tuvalu.\", \"prediction\": 0.9912257790565491, \"seed\": \"alternative\"}, {\"Country\": \"Philippines\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Philippines.\", \"prediction\": 0.9892238974571228, \"seed\": \"alternative\"}, {\"Country\": \"Trinidad and Tobago\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Trinidad and Tobago.\", \"prediction\": 0.9884806871414185, \"seed\": \"alternative\"}, {\"Country\": \"Albania\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Albania.\", \"prediction\": 0.9874222278594971, \"seed\": \"alternative\"}, {\"Country\": \"Iraq\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Iraq.\", \"prediction\": -0.9768388867378235, \"seed\": \"seed\"}, {\"Country\": \"CZ\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in CZ.\", \"prediction\": -0.9620359539985657, \"seed\": \"alternative\"}, {\"Country\": \"Vietnam\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Vietnam.\", \"prediction\": -0.9427406191825867, \"seed\": \"alternative\"}, {\"Country\": \"Sudan\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Sudan.\", \"prediction\": -0.8910807967185974, \"seed\": \"alternative\"}, {\"Country\": \"Syria\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Syria.\", \"prediction\": -0.8887014985084534, \"seed\": \"alternative\"}]}}, {\"mode\": \"vega-lite\"});\n",
788
- "</script>"
789
- ],
790
- "text/plain": [
791
- "alt.Chart(...)"
792
- ]
793
- },
794
- "execution_count": 18,
795
- "metadata": {},
796
- "output_type": "execute_result"
797
- }
798
- ],
799
- "source": [
800
- "bar = alt.Chart(results).encode( \n",
801
- " alt.X('prediction:Q'), \n",
802
- " alt.Y('Country:N', sort=\"-x\"),\n",
803
- " color=alt.Color('seed:N', legend=alt.Legend(title=\"Seed or Alternative\")),\n",
804
- " size='seed:N',\n",
805
- " tooltip=('Country','prediction')\n",
806
- ").mark_circle().properties(width=300).add_selection(single_nearest)\n",
807
- "\n",
808
- "bar"
809
- ]
810
- },
811
- {
812
- "cell_type": "code",
813
- "execution_count": 34,
814
- "id": "96cd0798-5ac5-4ede-8373-e8ed71ab07b3",
815
- "metadata": {},
816
- "outputs": [],
817
- "source": [
818
- "def critical_words(document, options=False):\n",
819
- " '''This function is meant to select the critical part of a sentence. Critical, in this context means\n",
820
- " the part of the sentence that is either: A) a PROPN from the correct entity group; B) an ADJ associated with a NOUN;\n",
821
- " C) a NOUN that represents gender. It also checks this against what the model thinks is important if the user defines \"options\" as \"LIME\" or True.'''\n",
822
- " if type(document) is not spacy.tokens.doc.Doc:\n",
823
- " document = nlp(document)\n",
824
- " chunks = list(document.noun_chunks)\n",
825
- " pos_options = []\n",
826
- " lime_options = []\n",
827
- " \n",
828
- " #Identify what the model cares about.\n",
829
- " if options:\n",
830
- " exp = explainer.explain_instance(document.text, predictor, num_features=15, num_samples=2000)\n",
831
- " lime_results = exp.as_list()\n",
832
- " for feature in lime_results:\n",
833
- " lime_options.append(feature[0])\n",
834
- " lime_results = pd.DataFrame(lime_results, columns=[\"Word\",\"Weight\"])\n",
835
- " \n",
836
- " #Identify what we care about \"parts of speech\". The first section focuses on NOUNs and related ADJ.\n",
837
- " for chunk in chunks:\n",
838
- " #The use of chunk[-1] is due to testing that it appears to always match the root\n",
839
- " root = chunk[-1]\n",
840
- " #This currently matches to a list I've created. I don't know the best way to deal with this so I'm leaving it as is for the moment.\n",
841
- " if root.ent_type_:\n",
842
- " cur_values = []\n",
843
- " if (len(chunk) > 1) and (chunk[-2].dep_ == \"compound\"):\n",
844
- " #creates the compound element of the noun\n",
845
- " compound = [x.text for x in chunk if x.dep_ == \"compound\"]\n",
846
- " print(f\"This is the contents of {compound} and it is {all(elem in lime_options for elem in compound)} that all elements are present in {lime_options}.\") #for QA\n",
847
- " #checks to see all elements in the compound are important to the model or use the compound if not checking importance.\n",
848
- " if (all(elem in lime_options for elem in cur_values) and (options is True)) or ((options is False)):\n",
849
- " #creates a span for the entirety of the compound noun and adds it to the list.\n",
850
- " span = -1 * (1 + len(compound))\n",
851
- " pos_options.append(chunk[span:].text)\n",
852
- " cur_values + [token.text for token in chunk if token.pos_ == \"ADJ\"]\n",
853
- " else:\n",
854
- " print(f\"The elmenents in {compound} could not be added to the final list because they are not all relevant to the model.\")\n",
855
- " else: \n",
856
- " cur_values = [token.text for token in chunk if (token.ent_type_) or (token.pos_ == \"ADJ\")]\n",
857
- " if (all(elem in lime_options for elem in cur_values) and (options is True)) or ((options is False)):\n",
858
- " pos_options.extend(cur_values)\n",
859
- " print(f\"From {chunk.text}, {cur_values} added to pos_options due to entity recognition.\") #for QA\n",
860
- " elif len(chunk) >= 1:\n",
861
- " cur_values = [token.text for token in chunk if token.pos_ in [\"NOUN\",\"ADJ\"]]\n",
862
- " if (all(elem in lime_options for elem in cur_values) and (options is True)) or ((options is False)):\n",
863
- " pos_options.extend(cur_values)\n",
864
- " print(f\"From {chunk.text}, {cur_values} added to pos_options due to wildcard.\") #for QA\n",
865
- " else:\n",
866
- " print(f\"No options added for \\'{chunk.text}\\' \")\n",
867
- " # Here I am going to try to pick up pronouns, which are people, and Adjectival Compliments.\n",
868
- " for token in document:\n",
869
- " if (token.text not in pos_options) and ((token.text in lime_options) or (options == False)):\n",
870
- " #print(f\"executed {token.text} with {token.pos_} and {token.dep_}\") #QA\n",
871
- " if (token.pos_ == \"ADJ\") and (token.dep_ in [\"acomp\",\"conj\"]):\n",
872
- " pos_options.append(token.text) \n",
873
- " elif (token.pos_ == \"PRON\") and (token.morph.get(\"PronType\")[0] == \"Prs\"):\n",
874
- " pos_options.append(token.text)\n",
875
- " \n",
876
- " #Return the correct set of options based on user input, defaults to POS for simplicity.\n",
877
- " if options:\n",
878
- " return pos_options, lime_results\n",
879
- " else:\n",
880
- " return pos_options"
881
- ]
882
- },
883
- {
884
- "cell_type": "code",
885
- "execution_count": 20,
886
- "id": "b04e7783-e51b-49b0-8165-afe1d5a1c576",
887
- "metadata": {},
888
- "outputs": [],
889
- "source": [
890
- "#Testing new code\n",
891
- "a = \"People are fat and lazy.\"\n",
892
- "b = \"I think she is beautiful.\"\n",
893
- "doca = nlp(a)\n",
894
- "docb = nlp(b)"
895
- ]
896
- },
897
- {
898
- "cell_type": "code",
899
- "execution_count": 21,
900
- "id": "0a6bc521-9282-41ad-82c9-29e447d77635",
901
- "metadata": {},
902
- "outputs": [
903
- {
904
- "name": "stdout",
905
- "output_type": "stream",
906
- "text": [
907
- "No options added for 'People' \n"
908
- ]
909
- },
910
- {
911
- "data": {
912
- "text/plain": [
913
- "['fat', 'lazy']"
914
- ]
915
- },
916
- "execution_count": 21,
917
- "metadata": {},
918
- "output_type": "execute_result"
919
- }
920
- ],
921
- "source": [
922
- "optsa, limea = critical_words(doca, True)\n",
923
- "optsa"
924
- ]
925
- },
926
- {
927
- "cell_type": "code",
928
- "execution_count": 22,
929
- "id": "042e94d3-65a5-4a20-b69a-96ec3296d7d4",
930
- "metadata": {},
931
- "outputs": [],
932
- "source": [
933
- "def lime_viz(df):\n",
934
- " single_nearest = alt.selection_single(on='mouseover', nearest=True)\n",
935
- " viz = alt.Chart(df).encode(\n",
936
- " alt.X('Weight:Q', scale=alt.Scale(domain=(-1, 1))),\n",
937
- " alt.Y('Word:N', sort='x', axis=None),\n",
938
- " color=alt.Color(\"Weight\", scale=alt.Scale(scheme='blueorange', domain=[0], type=\"threshold\", range='diverging'), legend=None),\n",
939
- " tooltip = (\"Word\",\"Weight\")\n",
940
- " ).mark_bar().properties(title =\"Importance of individual words\")\n",
941
- "\n",
942
- " text = viz.mark_text(\n",
943
- " fill=\"black\",\n",
944
- " align='right',\n",
945
- " baseline='middle'\n",
946
- " ).encode(\n",
947
- " text='Word:N'\n",
948
- " )\n",
949
- " limeplot = alt.LayerChart(layer=[viz,text], width = 300).configure_axis(grid=False).configure_view(strokeWidth=0)\n",
950
- " return limeplot"
951
- ]
952
- },
953
- {
954
- "cell_type": "code",
955
- "execution_count": 23,
956
- "id": "924eeea8-1d5d-4fe7-8308-164521919269",
957
- "metadata": {},
958
- "outputs": [
959
- {
960
- "name": "stdout",
961
- "output_type": "stream",
962
- "text": [
963
- "No options added for 'I' \n",
964
- "From a white woman, ['white', 'woman'] added to pos_options due to wildcard.\n",
965
- "From the street, ['street'] added to pos_options due to wildcard.\n",
966
- "From an asian man, ['asian', 'man'] added to pos_options due to wildcard.\n"
967
- ]
968
- },
969
- {
970
- "data": {
971
- "text/plain": [
972
- "['white', 'woman', 'street', 'asian', 'man', 'I']"
973
- ]
974
- },
975
- "execution_count": 23,
976
- "metadata": {},
977
- "output_type": "execute_result"
978
- }
979
- ],
980
- "source": [
981
- "test8 = \"I saw a white woman walking down the street with an asian man.\"\n",
982
- "opts8, lime8 = critical_words(test8,True)\n",
983
- "opts8"
984
- ]
985
- },
986
- {
987
- "cell_type": "code",
988
- "execution_count": 24,
989
- "id": "734366df-ad99-4d80-87e1-51793e150681",
990
- "metadata": {},
991
- "outputs": [
992
- {
993
- "data": {
994
- "text/html": [
995
- "\n",
996
- "<div id=\"altair-viz-adaa380d0d924bb594dd3aaee854acfd\"></div>\n",
997
- "<script type=\"text/javascript\">\n",
998
- " var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
999
- " (function(spec, embedOpt){\n",
1000
- " let outputDiv = document.currentScript.previousElementSibling;\n",
1001
- " if (outputDiv.id !== \"altair-viz-adaa380d0d924bb594dd3aaee854acfd\") {\n",
1002
- " outputDiv = document.getElementById(\"altair-viz-adaa380d0d924bb594dd3aaee854acfd\");\n",
1003
- " }\n",
1004
- " const paths = {\n",
1005
- " \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
1006
- " \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
1007
- " \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
1008
- " \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
1009
- " };\n",
1010
- "\n",
1011
- " function maybeLoadScript(lib, version) {\n",
1012
- " var key = `${lib.replace(\"-\", \"\")}_version`;\n",
1013
- " return (VEGA_DEBUG[key] == version) ?\n",
1014
- " Promise.resolve(paths[lib]) :\n",
1015
- " new Promise(function(resolve, reject) {\n",
1016
- " var s = document.createElement('script');\n",
1017
- " document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
1018
- " s.async = true;\n",
1019
- " s.onload = () => {\n",
1020
- " VEGA_DEBUG[key] = version;\n",
1021
- " return resolve(paths[lib]);\n",
1022
- " };\n",
1023
- " s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
1024
- " s.src = paths[lib];\n",
1025
- " });\n",
1026
- " }\n",
1027
- "\n",
1028
- " function showError(err) {\n",
1029
- " outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
1030
- " throw err;\n",
1031
- " }\n",
1032
- "\n",
1033
- " function displayChart(vegaEmbed) {\n",
1034
- " vegaEmbed(outputDiv, spec, embedOpt)\n",
1035
- " .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
1036
- " }\n",
1037
- "\n",
1038
- " if(typeof define === \"function\" && define.amd) {\n",
1039
- " requirejs.config({paths});\n",
1040
- " require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
1041
- " } else {\n",
1042
- " maybeLoadScript(\"vega\", \"5\")\n",
1043
- " .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
1044
- " .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
1045
- " .catch(showError)\n",
1046
- " .then(() => displayChart(vegaEmbed));\n",
1047
- " }\n",
1048
- " })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300, \"strokeWidth\": 0}, \"axis\": {\"grid\": false}}, \"layer\": [{\"mark\": \"bar\", \"encoding\": {\"color\": {\"field\": \"Weight\", \"legend\": null, \"scale\": {\"domain\": [0], \"range\": \"diverging\", \"scheme\": \"blueorange\", \"type\": \"threshold\"}, \"type\": \"quantitative\"}, \"tooltip\": [{\"field\": \"Word\", \"type\": \"nominal\"}, {\"field\": \"Weight\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"Weight\", \"scale\": {\"domain\": [-1, 1]}, \"type\": \"quantitative\"}, \"y\": {\"axis\": null, \"field\": \"Word\", \"sort\": \"x\", \"type\": \"nominal\"}}, \"title\": \"Importance of individual words\"}, {\"mark\": {\"type\": \"text\", \"align\": \"right\", \"baseline\": \"middle\", \"fill\": \"black\"}, \"encoding\": {\"color\": {\"field\": \"Weight\", \"legend\": null, \"scale\": {\"domain\": [0], \"range\": \"diverging\", \"scheme\": \"blueorange\", \"type\": \"threshold\"}, \"type\": \"quantitative\"}, \"text\": {\"field\": \"Word\", \"type\": \"nominal\"}, \"tooltip\": [{\"field\": \"Word\", \"type\": \"nominal\"}, {\"field\": \"Weight\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"Weight\", \"scale\": {\"domain\": [-1, 1]}, \"type\": \"quantitative\"}, \"y\": {\"axis\": null, \"field\": \"Word\", \"sort\": \"x\", \"type\": \"nominal\"}}, \"title\": \"Importance of individual words\"}], \"data\": {\"name\": \"data-d686d7fc533c26b0bdc6066e4351f840\"}, \"width\": 300, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-d686d7fc533c26b0bdc6066e4351f840\": [{\"Word\": \"with\", \"Weight\": 0.3289028288853927}, {\"Word\": \"woman\", \"Weight\": -0.26094440033196564}, {\"Word\": \"asian\", \"Weight\": 0.24561077002890458}, {\"Word\": \"walking\", \"Weight\": 0.19194218998931795}, {\"Word\": \"white\", \"Weight\": -0.14942503537339621}, {\"Word\": \"down\", \"Weight\": -0.14547403123420313}, {\"Word\": \"the\", \"Weight\": 0.14096934306553166}, {\"Word\": \"I\", \"Weight\": -0.08672932329874143}, {\"Word\": \"street\", \"Weight\": 0.06704680513000527}, {\"Word\": \"a\", \"Weight\": -0.03171807940472653}, {\"Word\": \"an\", \"Weight\": -0.006746730007490843}, {\"Word\": \"saw\", \"Weight\": 0.0019276122088497296}, {\"Word\": \"man\", \"Weight\": -0.0005652423244728638}]}}, {\"mode\": \"vega-lite\"});\n",
1049
- "</script>"
1050
- ],
1051
- "text/plain": [
1052
- "alt.LayerChart(...)"
1053
- ]
1054
- },
1055
- "execution_count": 24,
1056
- "metadata": {},
1057
- "output_type": "execute_result"
1058
- }
1059
- ],
1060
- "source": [
1061
- "lime_viz(lime8)"
1062
- ]
1063
- },
1064
- {
1065
- "cell_type": "code",
1066
- "execution_count": 25,
1067
- "id": "816e1c4b-7f02-41b1-b430-2f3750ae6c4a",
1068
- "metadata": {},
1069
- "outputs": [
1070
- {
1071
- "name": "stdout",
1072
- "output_type": "stream",
1073
- "text": [
1074
- "No options added for 'I' \n",
1075
- "From a white woman, ['white', 'woman'] added to pos_options due to wildcard.\n",
1076
- "From the street, ['street'] added to pos_options due to wildcard.\n",
1077
- "From an asian man, ['asian', 'man'] added to pos_options due to wildcard.\n"
1078
- ]
1079
- }
1080
- ],
1081
- "source": [
1082
- "probability, sentiment = eval_pred_test(test8, return_all=True)\n",
1083
- "options, lime = critical_words(test8,options=True)"
1084
- ]
1085
- },
1086
- {
1087
- "cell_type": "code",
1088
- "execution_count": 38,
1089
- "id": "a437a4eb-73b3-4b3c-a719-8dde2ad6dd3c",
1090
- "metadata": {},
1091
- "outputs": [
1092
- {
1093
- "name": "stdout",
1094
- "output_type": "stream",
1095
- "text": [
1096
- "From I, [] added to pos_options due to wildcard.\n",
1097
- "From men, ['men'] added to pos_options due to wildcard.\n",
1098
- "From women, ['women'] added to pos_options due to wildcard.\n",
1099
- "From the same respect, ['same', 'respect'] added to pos_options due to wildcard.\n"
1100
- ]
1101
- }
1102
- ],
1103
- "source": [
1104
- "bug = \"I find men and women deserve the same respect.\"\n",
1105
- "options = critical_words(bug)"
1106
- ]
1107
- },
1108
- {
1109
- "cell_type": "code",
1110
- "execution_count": 29,
1111
- "id": "8676defd-0908-4218-a1d6-218de3fb7119",
1112
- "metadata": {},
1113
- "outputs": [],
1114
- "source": [
1115
- "bug_doc = nlp(bug)"
1116
- ]
1117
- },
1118
- {
1119
- "cell_type": "code",
1120
- "execution_count": 35,
1121
- "id": "21b9e39b-2fcd-4c6f-8fe6-0d571cd79cca",
1122
- "metadata": {},
1123
- "outputs": [
1124
- {
1125
- "name": "stdout",
1126
- "output_type": "stream",
1127
- "text": [
1128
- "I\n",
1129
- "PRON\n",
1130
- "a man\n",
1131
- "NOUN\n",
1132
- "woman\n",
1133
- "NOUN\n",
1134
- "the same respect\n",
1135
- "NOUN\n"
1136
- ]
1137
- }
1138
- ],
1139
- "source": [
1140
- "for chunk in bug_doc.noun_chunks:\n",
1141
- " print(chunk.text)\n",
1142
- " print(chunk[-1].pos_)"
1143
- ]
1144
- },
1145
- {
1146
- "cell_type": "code",
1147
- "execution_count": null,
1148
- "id": "38279d2d-e763-4329-a65e-1a67d6f5ebb8",
1149
- "metadata": {},
1150
- "outputs": [],
1151
- "source": []
1152
- }
1153
- ],
1154
- "metadata": {
1155
- "kernelspec": {
1156
- "display_name": "Python 3 (ipykernel)",
1157
- "language": "python",
1158
- "name": "python3"
1159
- },
1160
- "language_info": {
1161
- "codemirror_mode": {
1162
- "name": "ipython",
1163
- "version": 3
1164
- },
1165
- "file_extension": ".py",
1166
- "mimetype": "text/x-python",
1167
- "name": "python",
1168
- "nbconvert_exporter": "python",
1169
- "pygments_lexer": "ipython3",
1170
- "version": "3.8.8"
1171
- }
1172
- },
1173
- "nbformat": 4,
1174
- "nbformat_minor": 5
1175
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
WNgen.py CHANGED
@@ -2,7 +2,7 @@
2
  import re, nltk, pandas as pd, numpy as np, ssl, streamlit as st
3
  from nltk.corpus import wordnet
4
  import spacy
5
- nlp = spacy.load("en_core_web_lg")
6
 
7
  #Import necessary parts for predicting things.
8
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
@@ -13,14 +13,14 @@ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-unca
13
  pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
14
 
15
  #If an error is thrown that the corpus "omw-1.4" isn't discoverable you can use this code. (https://stackoverflow.com/questions/38916452/nltk-download-ssl-certificate-verify-failed)
16
- '''try:
17
  _create_unverified_https_context = ssl._create_unverified_context
18
  except AttributeError:
19
  pass
20
  else:
21
  ssl._create_default_https_context = _create_unverified_https_context
22
 
23
- nltk.download('omw-1.4')'''
24
 
25
  # A simple function to pull synonyms and antonyms using spacy's POS
26
  def syn_ant(word,POS=False,human=True):
 
2
  import re, nltk, pandas as pd, numpy as np, ssl, streamlit as st
3
  from nltk.corpus import wordnet
4
  import spacy
5
+ nlp = spacy.load("Assets/Models/en_core_web_lg")
6
 
7
  #Import necessary parts for predicting things.
8
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
 
13
  pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
14
 
15
  #If an error is thrown that the corpus "omw-1.4" isn't discoverable you can use this code. (https://stackoverflow.com/questions/38916452/nltk-download-ssl-certificate-verify-failed)
16
+ try:
17
  _create_unverified_https_context = ssl._create_unverified_context
18
  except AttributeError:
19
  pass
20
  else:
21
  ssl._create_default_https_context = _create_unverified_https_context
22
 
23
+ nltk.download('omw-1.4')
24
 
25
  # A simple function to pull synonyms and antonyms using spacy's POS
26
  def syn_ant(word,POS=False,human=True):
app.py CHANGED
@@ -1,7 +1,7 @@
1
  #Import the libraries we know we'll need for the Generator.
2
  import pandas as pd, spacy, nltk, numpy as np
3
  from spacy.matcher import Matcher
4
- nlp = spacy.load("en_core_web_lg")
5
 
6
  #Import the libraries to support the model and predictions.
7
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
 
1
  #Import the libraries we know we'll need for the Generator.
2
  import pandas as pd, spacy, nltk, numpy as np
3
  from spacy.matcher import Matcher
4
+ nlp = spacy.load("Assets/Models/en_core_web_lg")
5
 
6
  #Import the libraries to support the model and predictions.
7
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
cf-gen-pipeline.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
custom-named-entity-recognition.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
dynamic-word-list-generation.ipynb DELETED
@@ -1,1287 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "eeaa927c-b8ef-4ee5-ab03-8257899152fd",
6
- "metadata": {},
7
- "source": [
8
- "# Building Dynamic Wordlists from WordNet as a fallback\n",
9
- "\n",
10
- "I am using an article from [GeeksforGeeks](https://www.geeksforgeeks.org/get-synonymsantonyms-nltk-wordnet-python/amp/) to guide building lists using NLTK's `WordNet`. I am considering that this may be a way to avoid having to build custom lists and want to test it out.\n",
11
- "\n",
12
- "# Builds a dataframe dynamically from WordNet using NLTK.\n",
13
- "def wordnet_df(word,POS=False,seed_definition=None):\n",
14
- " pos_options = ['NOUN','VERB','ADJ','ADV']\n",
15
- " synonyms, antonyms = syn_ant(word,POS,False)\n",
16
- " #print(synonyms, antonyms) #for QA purposes\n",
17
- " words = []\n",
18
- " cats = []\n",
19
- " #WordNet hates spaces so you have to remove them\n",
20
- " m_word = word.replace(\" \", \"_\")\n",
21
- " \n",
22
- " #Allow the user to pick a seed definition if it is not provided directly to the function.\n",
23
- " if seed_definition is None:\n",
24
- " if POS in pos_options:\n",
25
- " seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS))]\n",
26
- " else:\n",
27
- " seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word)]\n",
28
- " for d in range(len(seed_definitions)):\n",
29
- " print(f\"{d}: {seed_definitions[d]}\")\n",
30
- " choice = int(input(\"Which of the definitions above most aligns to your selection?\"))\n",
31
- " seed_definition = seed_definitions[choice]\n",
32
- " \n",
33
- " if POS in pos_options:\n",
34
- " for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS)):\n",
35
- " if check_sim(process_text(seed_definition),process_text(syn.definition())) > .7:\n",
36
- " cur_lemmas = syn.lemmas()\n",
37
- " hypos = syn.hyponyms()\n",
38
- " for hypo in hypos:\n",
39
- " cur_lemmas.extend(hypo.lemmas())\n",
40
- " for lemma in cur_lemmas:\n",
41
- " ll = lemma.name()\n",
42
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
43
- " words.append(re.sub(\"_\",\" \",ll))\n",
44
- "\n",
45
- " if len(synonyms) > 0:\n",
46
- " for w in synonyms:\n",
47
- " w = w.replace(\" \",\"_\")\n",
48
- " for syn in wordnet.synsets(w, pos=getattr(wordnet, POS)):\n",
49
- " if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:\n",
50
- " cur_lemmas = syn.lemmas()\n",
51
- " hypos = syn.hyponyms()\n",
52
- " for hypo in hypos:\n",
53
- " cur_lemmas.extend(hypo.lemmas())\n",
54
- " for lemma in cur_lemmas:\n",
55
- " ll = lemma.name()\n",
56
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
57
- " words.append(re.sub(\"_\",\" \",ll))\n",
58
- " if len(antonyms) > 0:\n",
59
- " for a in antonyms:\n",
60
- " a = a.replace(\" \",\"_\")\n",
61
- " for syn in wordnet.synsets(a, pos=getattr(wordnet, POS)):\n",
62
- " if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:\n",
63
- " cur_lemmas = syn.lemmas()\n",
64
- " hypos = syn.hyponyms()\n",
65
- " for hypo in hypos:\n",
66
- " cur_lemmas.extend(hypo.lemmas())\n",
67
- " for lemma in cur_lemmas:\n",
68
- " ll = lemma.name()\n",
69
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
70
- " words.append(re.sub(\"_\",\" \",ll))\n",
71
- " else:\n",
72
- " for syn in wordnet.synsets(m_word):\n",
73
- " if check_sim(process_text(seed_definition),process_text(syn.definition())) > .7:\n",
74
- " cur_lemmas = syn.lemmas()\n",
75
- " hypos = syn.hyponyms()\n",
76
- " for hypo in hypos:\n",
77
- " cur_lemmas.extend(hypo.lemmas())\n",
78
- " for lemma in cur_lemmas:\n",
79
- " ll = lemma.name()\n",
80
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
81
- " words.append(re.sub(\"_\",\" \",ll)) \n",
82
- " if len(synonyms) > 0:\n",
83
- " for w in synonyms:\n",
84
- " w = w.replace(\" \",\"_\")\n",
85
- " for syn in wordnet.synsets(w):\n",
86
- " if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:\n",
87
- " cur_lemmas = syn.lemmas()\n",
88
- " hypos = syn.hyponyms()\n",
89
- " for hypo in hypos:\n",
90
- " cur_lemmas.extend(hypo.lemmas())\n",
91
- " for lemma in cur_lemmas:\n",
92
- " ll = lemma.name()\n",
93
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
94
- " words.append(re.sub(\"_\",\" \",ll))\n",
95
- " if len(antonyms) > 0:\n",
96
- " for a in antonyms:\n",
97
- " a = a.replace(\" \",\"_\")\n",
98
- " for syn in wordnet.synsets(a):\n",
99
- " if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:\n",
100
- " cur_lemmas = syn.lemmas()\n",
101
- " hypos = syn.hyponyms()\n",
102
- " for hypo in hypos:\n",
103
- " cur_lemmas.extend(hypo.lemmas())\n",
104
- " for lemma in cur_lemmas:\n",
105
- " ll = lemma.name()\n",
106
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
107
- " words.append(re.sub(\"_\",\" \",ll))\n",
108
- "\n",
109
- " df = {\"Categories\":cats, \"Words\":words}\n",
110
- " df = pd.DataFrame(df) \n",
111
- " df = df.drop_duplicates().reset_index()\n",
112
- " df = df.drop(\"index\", axis=1)\n",
113
- " return df"
114
- ]
115
- },
116
- {
117
- "cell_type": "markdown",
118
- "id": "4048815e-8434-4db9-bbb2-652fe0076df3",
119
- "metadata": {},
120
- "source": [
121
- "# Building Dynamic Wordlists from WordNet as a fallback\n",
122
- "\n",
123
- "I am using an article from [GeeksforGeeks](https://www.geeksforgeeks.org/get-synonymsantonyms-nltk-wordnet-python/amp/) to guide building lists using NLTK's `WordNet`. I am considering that this may be a way to avoid having to build custom lists and want to test it out."
124
- ]
125
- },
126
- {
127
- "cell_type": "markdown",
128
- "id": "41374b5c-12c0-4e4d-aa73-20db04b280ff",
129
- "metadata": {},
130
- "source": [
131
- "# Building Dynamic Wordlists from WordNet as a fallback\n",
132
- "\n",
133
- "I am using an article from [GeeksforGeeks](https://www.geeksforgeeks.org/get-synonymsantonyms-nltk-wordnet-python/amp/) to guide building lists using NLTK's `WordNet`. I am considering that this may be a way to avoid having to build custom lists and want to test it out."
134
- ]
135
- },
136
- {
137
- "cell_type": "code",
138
- "execution_count": 1,
139
- "id": "26a97377-67be-4903-9bfa-e8660aeb8c90",
140
- "metadata": {},
141
- "outputs": [],
142
- "source": [
143
- "#Import necessary libraries.\n",
144
- "import re, nltk, pandas as pd, numpy as np, ssl\n",
145
- "from nltk.corpus import wordnet\n",
146
- "import spacy\n",
147
- "nlp = spacy.load(\"en_core_web_lg\")"
148
- ]
149
- },
150
- {
151
- "cell_type": "code",
152
- "execution_count": 2,
153
- "id": "42e7a838-bb82-4736-8f70-127c53fea68b",
154
- "metadata": {},
155
- "outputs": [
156
- {
157
- "name": "stderr",
158
- "output_type": "stream",
159
- "text": [
160
- "[nltk_data] Downloading package omw-1.4 to\n",
161
- "[nltk_data] /Users/nbutters/nltk_data...\n",
162
- "[nltk_data] Package omw-1.4 is already up-to-date!\n"
163
- ]
164
- },
165
- {
166
- "data": {
167
- "text/plain": [
168
- "True"
169
- ]
170
- },
171
- "execution_count": 2,
172
- "metadata": {},
173
- "output_type": "execute_result"
174
- }
175
- ],
176
- "source": [
177
- "#If an error is thrown that the corpus \"omw-1.4\" isn't discoverable you can use this code. (https://stackoverflow.com/questions/38916452/nltk-download-ssl-certificate-verify-failed)\n",
178
- "try:\n",
179
- " _create_unverified_https_context = ssl._create_unverified_context\n",
180
- "except AttributeError:\n",
181
- " pass\n",
182
- "else:\n",
183
- " ssl._create_default_https_context = _create_unverified_https_context\n",
184
- " \n",
185
- "nltk.download('omw-1.4')"
186
- ]
187
- },
188
- {
189
- "cell_type": "code",
190
- "execution_count": 15,
191
- "id": "14918489-e5fe-4898-8d4a-8bc0f7b1d9e0",
192
- "metadata": {},
193
- "outputs": [
194
- {
195
- "name": "stdout",
196
- "output_type": "stream",
197
- "text": [
198
- "[Synset('bantam.s.01')]\n",
199
- "16 [Synset('bantam.n.01'), Synset('bantam.s.01'), Synset('diminutive.n.01'), Synset('bantam.s.01'), Synset('lilliputian.n.01'), Synset('lilliputian.n.02'), Synset('lilliputian.a.01'), Synset('bantam.s.01'), Synset('fiddling.s.01'), Synset('dwarf.n.01'), Synset('bantam.s.01'), Synset('petite.n.01'), Synset('bantam.s.01'), Synset('bantam.s.01'), Synset('flyspeck.n.01'), Synset('bantam.s.01')]\n"
200
- ]
201
- }
202
- ],
203
- "source": [
204
- "hypos = wordnet.synsets(\"tiny\")\n",
205
- "print(hypos)\n",
206
- "new_list = []\n",
207
- "for syn in hypos:\n",
208
- " cur_lemmas = syn.lemmas()\n",
209
- " hypos = syn.hyponyms()\n",
210
- " for hypo in hypos:\n",
211
- " cur_lemmas.extend(hypo.lemmas())\n",
212
- " for lemma in cur_lemmas:\n",
213
- " ll = lemma.name()\n",
214
- " new_list.append(ll)\n",
215
- "syns = []\n",
216
- "for lemma in new_list:\n",
217
- " syns.extend(wordnet.synsets(lemma))\n",
218
- "print(len(syns),syns)"
219
- ]
220
- },
221
- {
222
- "cell_type": "code",
223
- "execution_count": null,
224
- "id": "c3047d11-0512-41af-9db7-62daa8cbb60d",
225
- "metadata": {},
226
- "outputs": [],
227
- "source": [
228
- "#Here I define a few test sentences from the Duct-Tape-Pipeline.\n",
229
- "upt1 = \"I like movies starring black actors.\"\n",
230
- "upt2 = \"I am a black trans-woman.\"\n",
231
- "upt3 = \"Native Americans deserve to have their land back.\"\n",
232
- "upt4 = \"This movie was filmed in Iraq.\""
233
- ]
234
- },
235
- {
236
- "cell_type": "code",
237
- "execution_count": 16,
238
- "id": "b52425b5-2c4d-4a31-a240-feabc319198b",
239
- "metadata": {},
240
- "outputs": [],
241
- "source": [
242
- "# A simple function to pull synonyms and antonyms using spacy's POS\n",
243
- "def syn_ant(word,POS=False,human=True):\n",
244
- " pos_options = ['NOUN','VERB','ADJ','ADV']\n",
245
- " synonyms = [] \n",
246
- " antonyms = []\n",
247
- " #WordNet hates spaces so you have to remove them\n",
248
- " if \" \" in word:\n",
249
- " word = word.replace(\" \", \"_\")\n",
250
- " \n",
251
- " if POS in pos_options:\n",
252
- " for syn in wordnet.synsets(word, pos=getattr(wordnet, POS)): \n",
253
- " for l in syn.lemmas(): \n",
254
- " current = l.name()\n",
255
- " if human:\n",
256
- " current = re.sub(\"_\",\" \",current)\n",
257
- " synonyms.append(current) \n",
258
- " if l.antonyms():\n",
259
- " for ant in l.antonyms():\n",
260
- " cur_ant = ant.name()\n",
261
- " if human:\n",
262
- " cur_ant = re.sub(\"_\",\" \",cur_ant)\n",
263
- " antonyms.append(cur_ant)\n",
264
- " else: \n",
265
- " for syn in wordnet.synsets(word): \n",
266
- " for l in syn.lemmas(): \n",
267
- " current = l.name()\n",
268
- " if human:\n",
269
- " current = re.sub(\"_\",\" \",current)\n",
270
- " synonyms.append(current) \n",
271
- " if l.antonyms():\n",
272
- " for ant in l.antonyms():\n",
273
- " cur_ant = ant.name()\n",
274
- " if human:\n",
275
- " cur_ant = re.sub(\"_\",\" \",cur_ant)\n",
276
- " antonyms.append(cur_ant)\n",
277
- " synonyms = list(set(synonyms))\n",
278
- " antonyms = list(set(antonyms))\n",
279
- " return synonyms, antonyms"
280
- ]
281
- },
282
- {
283
- "cell_type": "code",
284
- "execution_count": 22,
285
- "id": "7cd10cf1-bf0d-4baa-8588-9315cfbe760e",
286
- "metadata": {},
287
- "outputs": [
288
- {
289
- "name": "stdout",
290
- "output_type": "stream",
291
- "text": [
292
- "['man', \"gentleman's gentleman\", 'Isle of Man', 'Man', 'humanity', 'human', 'piece', 'valet de chambre', 'mankind', 'humans', 'military personnel', 'adult male', 'homo', 'human race', 'valet', 'humankind', 'military man', 'human being', 'serviceman', 'world', 'gentleman', 'human beings'] ['woman', 'civilian']\n"
293
- ]
294
- }
295
- ],
296
- "source": [
297
- "x, q = syn_ant(\"man\")\n",
298
- "print(x,q)"
299
- ]
300
- },
301
- {
302
- "cell_type": "code",
303
- "execution_count": null,
304
- "id": "3b15bcba-ca91-49a0-b873-aff1e61b3053",
305
- "metadata": {},
306
- "outputs": [],
307
- "source": [
308
- "doc1 = nlp(upt1)\n",
309
- "doc2 = nlp(upt2)\n",
310
- "doc3 = nlp(upt3)\n",
311
- "doc4 = nlp(upt4)"
312
- ]
313
- },
314
- {
315
- "cell_type": "code",
316
- "execution_count": null,
317
- "id": "1220c67b-1776-4a39-8335-c88b96379122",
318
- "metadata": {},
319
- "outputs": [],
320
- "source": [
321
- "syn_ant(doc3[0].text,doc3[0].pos_)"
322
- ]
323
- },
324
- {
325
- "cell_type": "code",
326
- "execution_count": null,
327
- "id": "4cc3ada2-90c1-4ecf-b704-9c4bf5146406",
328
- "metadata": {},
329
- "outputs": [],
330
- "source": [
331
- "#Discovering that NLTk WordNet uses \"_\" for compounds... and fixed it.\n",
332
- "syn_ant(\"Native_American\", \"NOUN\")"
333
- ]
334
- },
335
- {
336
- "cell_type": "code",
337
- "execution_count": null,
338
- "id": "52253e37-5eb1-42f8-a4b8-046542004349",
339
- "metadata": {},
340
- "outputs": [],
341
- "source": [
342
- "syn_ant(\"Papua_New_Guinea\")"
343
- ]
344
- },
345
- {
346
- "cell_type": "code",
347
- "execution_count": null,
348
- "id": "14eb3c2f-2a5a-4db0-a802-172d9902df70",
349
- "metadata": {},
350
- "outputs": [],
351
- "source": [
352
- "syn_ant(\"hate\")"
353
- ]
354
- },
355
- {
356
- "cell_type": "code",
357
- "execution_count": null,
358
- "id": "53a3d7af-980a-47bc-b70f-65c1411fba05",
359
- "metadata": {},
360
- "outputs": [],
361
- "source": [
362
- "russian = wordnet.synset('mother.n.01')\n",
363
- "print(russian.hyponyms())\n",
364
- "hypos = []\n",
365
- "[hypos.extend([re.sub(\"_\",\" \",lemma.name()) for lemma in hyponyms.lemmas()]) for hyponyms in russian.hyponyms()]\n",
366
- "hypos"
367
- ]
368
- },
369
- {
370
- "cell_type": "code",
371
- "execution_count": null,
372
- "id": "bd60bca8-96a9-45f5-8c60-e7177ead3f35",
373
- "metadata": {},
374
- "outputs": [],
375
- "source": [
376
- "hyper_list = wordnet.synset('woman.n.01')\n",
377
- "print(hyper_list.hypernyms())\n",
378
- "hypers = []\n",
379
- "[hypers.extend([re.sub(\"_\",\" \",lemma.name()) for lemma in hypernyms.lemmas()]) for hypernyms in hyper_list.hypernyms()]\n",
380
- "hypers"
381
- ]
382
- },
383
- {
384
- "cell_type": "code",
385
- "execution_count": null,
386
- "id": "9b3f4a7f-a4a6-4862-a321-42e1d4be406a",
387
- "metadata": {},
388
- "outputs": [],
389
- "source": [
390
- "hyper_list = wordnet.synset('man.n.01')\n",
391
- "print(hyper_list.hypernyms())\n",
392
- "hypers = []\n",
393
- "[hypers.extend([re.sub(\"_\",\" \",lemma.name()) for lemma in hypernyms.lemmas()]) for hypernyms in hyper_list.hypernyms()]\n",
394
- "hypers"
395
- ]
396
- },
397
- {
398
- "cell_type": "code",
399
- "execution_count": null,
400
- "id": "b9932230-e38d-444d-9814-8668d4bf596c",
401
- "metadata": {},
402
- "outputs": [],
403
- "source": [
404
- "parent = wordnet.synset('male.n.02')\n",
405
- "print(parent.hyponyms())\n",
406
- "hypos = []\n",
407
- "[hypos.extend([re.sub(\"_\",\" \",lemma.name()) for lemma in hyponyms.lemmas()]) for hyponyms in parent.hyponyms()]\n",
408
- "hypos"
409
- ]
410
- },
411
- {
412
- "cell_type": "code",
413
- "execution_count": null,
414
- "id": "51bf50a7-87e0-485e-b055-b5a59c44db06",
415
- "metadata": {},
416
- "outputs": [],
417
- "source": [
418
- "hypo2 = [[re.sub(\"_\",\" \",lemma.name()) for lemma in hyponym.lemmas()] for hyponym in parent.hyponyms()]\n",
419
- "hypo2"
420
- ]
421
- },
422
- {
423
- "cell_type": "code",
424
- "execution_count": null,
425
- "id": "96cce82d-854a-4c3f-b2a1-d8d224357b1d",
426
- "metadata": {},
427
- "outputs": [],
428
- "source": [
429
- "syn_ant(\"white supremacist\",\"NOUN\",human=False)"
430
- ]
431
- },
432
- {
433
- "cell_type": "raw",
434
- "id": "f6cc0a50-ec83-4951-a9bf-86e89e334945",
435
- "metadata": {},
436
- "source": [
437
- "## Here's an attempt to explore ConceptNet\n",
438
- "# I have currently commented it out because it is not as useful for where I'm trying to go.\n",
439
- "'''This is an attempt to use [ConceptNet](https://conceptnet.io/), specifically calling their API ([see documentation](https://github.com/commonsense/conceptnet5/wiki/API)). If I can figure out how to build a list of synonyms and antonyms from here then it may represent a good way to set defaults.\n",
440
- "\n",
441
- "#import the necessary library\n",
442
- "import requests\n",
443
- "\n",
444
- "obj = requests.get('http://api.conceptnet.io/c/en/black').json()\n",
445
- "obj.keys()'''"
446
- ]
447
- },
448
- {
449
- "cell_type": "code",
450
- "execution_count": null,
451
- "id": "310dfd86-d2df-4023-85bc-bed22099b890",
452
- "metadata": {},
453
- "outputs": [],
454
- "source": [
455
- "# Builds a list dynamically from WordNet using NLTK.\n",
456
- "def wordnet_list(word,POS=False):\n",
457
- " pos_options = ['NOUN','VERB','ADJ','ADV']\n",
458
- " synonyms, antonyms = syn_ant(word,POS,False)\n",
459
- " base = []\n",
460
- " final = [word]\n",
461
- " #WordNet hates spaces so you have to remove them\n",
462
- " m_word = word.replace(\" \", \"_\")\n",
463
- " \n",
464
- " if POS in pos_options:\n",
465
- " for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS)):\n",
466
- " base.extend(syn.hyponyms())\n",
467
- " base.append(syn)\n",
468
- " \n",
469
- " if len(synonyms) > 0:\n",
470
- " for w in synonyms:\n",
471
- " w = w.replace(\" \",\"_\")\n",
472
- " for syn in wordnet.synsets(w, pos=getattr(wordnet, POS)):\n",
473
- " base.extend(syn.hyponyms())\n",
474
- " base.append(syn)\n",
475
- " if len(antonyms) > 0:\n",
476
- " for a in antonyms:\n",
477
- " a = a.replace(\" \",\"_\")\n",
478
- " for syn in wordnet.synsets(a, pos=getattr(wordnet, POS)):\n",
479
- " base.extend(syn.hyponyms())\n",
480
- " base.append(syn)\n",
481
- " else:\n",
482
- " for syn in wordnet.synsets(m_word):\n",
483
- " base.extend(syn.hyponyms())\n",
484
- " base.append(syn)\n",
485
- " \n",
486
- " if len(synonyms) > 0:\n",
487
- " for w in synonyms:\n",
488
- " w = w.replace(\" \",\"_\")\n",
489
- " for syn in wordnet.synsets(w):\n",
490
- " base.extend(syn.hyponyms())\n",
491
- " base.append(syn)\n",
492
- " if len(antonyms) > 0:\n",
493
- " for a in antonyms:\n",
494
- " a = a.replace(\" \",\"_\")\n",
495
- " for syn in wordnet.synsets(a):\n",
496
- " base.extend(syn.hyponyms())\n",
497
- " base.append(syn)\n",
498
- " base = list(set(base))\n",
499
- " for b in base:\n",
500
- " cur_words = []\n",
501
- " cur_words.extend([re.sub(\"_\",\" \",lemma.name()) for lemma in b.lemmas()])\n",
502
- " final.extend(cur_words)\n",
503
- "\n",
504
- " \n",
505
- " \n",
506
- " final = list(set(final)) \n",
507
- " return final"
508
- ]
509
- },
510
- {
511
- "cell_type": "code",
512
- "execution_count": null,
513
- "id": "331ad5b4-15da-454f-8b3d-9fe7b131a6d4",
514
- "metadata": {},
515
- "outputs": [],
516
- "source": [
517
- "wordnet_list(\"white supremacist\", \"NOUN\")"
518
- ]
519
- },
520
- {
521
- "cell_type": "code",
522
- "execution_count": null,
523
- "id": "866e3ba9-6213-4725-9627-2b5054f996e8",
524
- "metadata": {},
525
- "outputs": [],
526
- "source": [
527
- "words = wordnet_list(\"girl\", \"NOUN\")\n",
528
- "print(f\"The length of the list is {len(words)}.\")"
529
- ]
530
- },
531
- {
532
- "cell_type": "code",
533
- "execution_count": null,
534
- "id": "d284ea9c-71d2-4860-882e-64b280d0d699",
535
- "metadata": {},
536
- "outputs": [],
537
- "source": [
538
- "text = \"The girl was brought to the front of the class.\"\n",
539
- "test_doc = nlp(text)"
540
- ]
541
- },
542
- {
543
- "cell_type": "code",
544
- "execution_count": null,
545
- "id": "f53cbe5f-50db-4b2c-b59a-66864669b244",
546
- "metadata": {},
547
- "outputs": [],
548
- "source": [
549
- "df = pd.DataFrame()\n",
550
- "df[\"Words\"] = words\n",
551
- "\n",
552
- "df[\"Sentences\"] = df.Words.apply(lambda x: text.replace(\"girl\",x))\n",
553
- "\n",
554
- "df[\"Similarity\"] = df.Words.apply(lambda x: nlp(\"girl\").similarity(nlp(x)[0]))"
555
- ]
556
- },
557
- {
558
- "cell_type": "code",
559
- "execution_count": null,
560
- "id": "5b4e37dd-f899-47c9-93ea-f92898760819",
561
- "metadata": {},
562
- "outputs": [],
563
- "source": [
564
- "df.sort_values(by='Similarity', ascending=False)"
565
- ]
566
- },
567
- {
568
- "cell_type": "code",
569
- "execution_count": null,
570
- "id": "27f912c6-bfa9-4604-8d9d-f4502a0f0ea7",
571
- "metadata": {},
572
- "outputs": [],
573
- "source": [
574
- "df2 = df[df.Similarity > 0].reset_index()"
575
- ]
576
- },
577
- {
578
- "cell_type": "code",
579
- "execution_count": null,
580
- "id": "59e35f36-f434-4377-a170-d109ca89dd77",
581
- "metadata": {},
582
- "outputs": [],
583
- "source": [
584
- "df2"
585
- ]
586
- },
587
- {
588
- "cell_type": "code",
589
- "execution_count": null,
590
- "id": "2c754d73-9d74-471d-a36f-84a404aa7093",
591
- "metadata": {},
592
- "outputs": [],
593
- "source": [
594
- "minimum = df2.Similarity.min()\n",
595
- "text2 = df2.loc[df2['Similarity'] == minimum, 'Words'].iloc[0]\n",
596
- "text2"
597
- ]
598
- },
599
- {
600
- "cell_type": "code",
601
- "execution_count": null,
602
- "id": "e2853e02-faea-4ce8-800c-70cc6273be02",
603
- "metadata": {},
604
- "outputs": [],
605
- "source": [
606
- "maximum = df2[df2.Words != \"girl\"].Similarity.max()\n",
607
- "text3 = df2.loc[df2['Similarity'] == maximum, 'Words'].iloc[0]\n",
608
- "text3"
609
- ]
610
- },
611
- {
612
- "cell_type": "code",
613
- "execution_count": null,
614
- "id": "3f03e090-6c99-41db-b013-a77f2fec6e4d",
615
- "metadata": {},
616
- "outputs": [],
617
- "source": [
618
- "df3 = df2[df.Similarity > .5].reset_index()"
619
- ]
620
- },
621
- {
622
- "cell_type": "code",
623
- "execution_count": null,
624
- "id": "2bab0a7f-27c7-4dbf-a92b-b0c13d25e5b0",
625
- "metadata": {},
626
- "outputs": [],
627
- "source": [
628
- "homo = wordnet.synsets('gay')"
629
- ]
630
- },
631
- {
632
- "cell_type": "code",
633
- "execution_count": null,
634
- "id": "1a1130a9-b5aa-47b4-9e33-738b08f92c7c",
635
- "metadata": {},
636
- "outputs": [],
637
- "source": [
638
- "for syn in homo:\n",
639
- " print(syn.lemmas())"
640
- ]
641
- },
642
- {
643
- "cell_type": "code",
644
- "execution_count": null,
645
- "id": "d6e48617-aefb-46f6-9961-21da7f81c8d4",
646
- "metadata": {},
647
- "outputs": [],
648
- "source": [
649
- "mother = wordnet.synsets('homo')\n",
650
- "cats = []\n",
651
- "words = []\n",
652
- "for syn in mother:\n",
653
- " lemmas = syn.lemmas()\n",
654
- " for lemma in lemmas:\n",
655
- " ll = lemma.name()\n",
656
- " print(ll)\n",
657
- " cats.append(syn.name().split(\".\")[0])\n",
658
- " words.append(ll)\n",
659
- " \n",
660
- "print(cats,words)\n",
661
- "print(len(cats),len(words))\n",
662
- "df = {\"Categories\":cats, \"Words\":words}\n",
663
- "df = pd.DataFrame(df)"
664
- ]
665
- },
666
- {
667
- "cell_type": "code",
668
- "execution_count": null,
669
- "id": "8d55ba70-a569-429e-88d0-84f99772b9be",
670
- "metadata": {},
671
- "outputs": [],
672
- "source": [
673
- "df"
674
- ]
675
- },
676
- {
677
- "cell_type": "code",
678
- "execution_count": null,
679
- "id": "097f927b-28e6-4d10-99a2-621bb758bb77",
680
- "metadata": {},
681
- "outputs": [],
682
- "source": [
683
- "def process_text(text):\n",
684
- " doc = nlp(text.lower())\n",
685
- " result = []\n",
686
- " for token in doc:\n",
687
- " if (token.is_stop) or (token.is_punct) or (token.lemma_ == '-PRON-'):\n",
688
- " continue\n",
689
- " result.append(token.lemma_)\n",
690
- " return \" \".join(result)"
691
- ]
692
- },
693
- {
694
- "cell_type": "code",
695
- "execution_count": null,
696
- "id": "18b4469e-4457-405e-9736-58ab9e8d8ac6",
697
- "metadata": {},
698
- "outputs": [],
699
- "source": [
700
- "def clean_definition(syn):\n",
701
- " #This function removes stop words from sentences to improve on document level similarity for differentiation.\n",
702
- " if type(syn) is str:\n",
703
- " synset = wordnet.synset(syn).definition()\n",
704
- " elif type(syn) is nltk.corpus.reader.wordnet.Synset:\n",
705
- " synset = syn.definition()\n",
706
- " definition = nlp(\" \".join(token.lemma_ for token in nlp(synset) if not token.is_stop))\n",
707
- " return definition\n",
708
- "\n",
709
- "def check_sim(a,b):\n",
710
- " if type(a) is str and type(b) is str:\n",
711
- " a = nlp(a)\n",
712
- " b = nlp(b)\n",
713
- " similarity = a.similarity(b)\n",
714
- " return similarity"
715
- ]
716
- },
717
- {
718
- "cell_type": "code",
719
- "execution_count": null,
720
- "id": "ed2323c6-cee1-4d6b-8d33-a53755036acd",
721
- "metadata": {},
722
- "outputs": [],
723
- "source": [
724
- "# Builds a dataframe dynamically from WordNet using NLTK.\n",
725
- "def wordnet_df(word,POS=False,seed_definition=None):\n",
726
- " pos_options = ['NOUN','VERB','ADJ','ADV']\n",
727
- " synonyms, antonyms = syn_ant(word,POS,False)\n",
728
- " #print(synonyms, antonyms) #for QA purposes\n",
729
- " words = []\n",
730
- " cats = []\n",
731
- " #WordNet hates spaces so you have to remove them\n",
732
- " m_word = word.replace(\" \", \"_\")\n",
733
- " \n",
734
- " #Allow the user to pick a seed definition if it is not provided directly to the function.\n",
735
- " if seed_definition is None:\n",
736
- " if POS in pos_options:\n",
737
- " seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS))]\n",
738
- " else:\n",
739
- " seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word)]\n",
740
- " for d in range(len(seed_definitions)):\n",
741
- " print(f\"{d}: {seed_definitions[d]}\")\n",
742
- " choice = int(input(\"Which of the definitions above most aligns to your selection?\"))\n",
743
- " seed_definition = seed_definitions[choice]\n",
744
- " \n",
745
- " if POS in pos_options:\n",
746
- " for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS)):\n",
747
- " if check_sim(process_text(seed_definition),process_text(syn.definition())) > .7:\n",
748
- " cur_lemmas = syn.lemmas()\n",
749
- " hypos = syn.hyponyms()\n",
750
- " for hypo in hypos:\n",
751
- " cur_lemmas.extend(hypo.lemmas())\n",
752
- " for lemma in cur_lemmas:\n",
753
- " ll = lemma.name()\n",
754
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
755
- " words.append(re.sub(\"_\",\" \",ll))\n",
756
- "\n",
757
- " if len(synonyms) > 0:\n",
758
- " for w in synonyms:\n",
759
- " w = w.replace(\" \",\"_\")\n",
760
- " for syn in wordnet.synsets(w, pos=getattr(wordnet, POS)):\n",
761
- " if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:\n",
762
- " cur_lemmas = syn.lemmas()\n",
763
- " hypos = syn.hyponyms()\n",
764
- " for hypo in hypos:\n",
765
- " cur_lemmas.extend(hypo.lemmas())\n",
766
- " for lemma in cur_lemmas:\n",
767
- " ll = lemma.name()\n",
768
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
769
- " words.append(re.sub(\"_\",\" \",ll))\n",
770
- " if len(antonyms) > 0:\n",
771
- " for a in antonyms:\n",
772
- " a = a.replace(\" \",\"_\")\n",
773
- " for syn in wordnet.synsets(a, pos=getattr(wordnet, POS)):\n",
774
- " if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:\n",
775
- " cur_lemmas = syn.lemmas()\n",
776
- " hypos = syn.hyponyms()\n",
777
- " for hypo in hypos:\n",
778
- " cur_lemmas.extend(hypo.lemmas())\n",
779
- " for lemma in cur_lemmas:\n",
780
- " ll = lemma.name()\n",
781
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
782
- " words.append(re.sub(\"_\",\" \",ll))\n",
783
- " else:\n",
784
- " for syn in wordnet.synsets(m_word):\n",
785
- " if check_sim(process_text(seed_definition),process_text(syn.definition())) > .7:\n",
786
- " cur_lemmas = syn.lemmas()\n",
787
- " hypos = syn.hyponyms()\n",
788
- " for hypo in hypos:\n",
789
- " cur_lemmas.extend(hypo.lemmas())\n",
790
- " for lemma in cur_lemmas:\n",
791
- " ll = lemma.name()\n",
792
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
793
- " words.append(re.sub(\"_\",\" \",ll)) \n",
794
- " if len(synonyms) > 0:\n",
795
- " for w in synonyms:\n",
796
- " w = w.replace(\" \",\"_\")\n",
797
- " for syn in wordnet.synsets(w):\n",
798
- " if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:\n",
799
- " cur_lemmas = syn.lemmas()\n",
800
- " hypos = syn.hyponyms()\n",
801
- " for hypo in hypos:\n",
802
- " cur_lemmas.extend(hypo.lemmas())\n",
803
- " for lemma in cur_lemmas:\n",
804
- " ll = lemma.name()\n",
805
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
806
- " words.append(re.sub(\"_\",\" \",ll))\n",
807
- " if len(antonyms) > 0:\n",
808
- " for a in antonyms:\n",
809
- " a = a.replace(\" \",\"_\")\n",
810
- " for syn in wordnet.synsets(a):\n",
811
- " if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:\n",
812
- " cur_lemmas = syn.lemmas()\n",
813
- " hypos = syn.hyponyms()\n",
814
- " for hypo in hypos:\n",
815
- " cur_lemmas.extend(hypo.lemmas())\n",
816
- " for lemma in cur_lemmas:\n",
817
- " ll = lemma.name()\n",
818
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
819
- " words.append(re.sub(\"_\",\" \",ll))\n",
820
- "\n",
821
- " df = {\"Categories\":cats, \"Words\":words}\n",
822
- " df = pd.DataFrame(df) \n",
823
- " df = df.drop_duplicates().reset_index()\n",
824
- " df = df.drop(\"index\", axis=1)\n",
825
- " return df"
826
- ]
827
- },
828
- {
829
- "cell_type": "code",
830
- "execution_count": null,
831
- "id": "2e9224f8-9620-464d-8a27-6b5b2ff27983",
832
- "metadata": {},
833
- "outputs": [],
834
- "source": [
835
- "df_mother = wordnet_df(\"gay\")\n",
836
- "df_mother"
837
- ]
838
- },
839
- {
840
- "cell_type": "code",
841
- "execution_count": null,
842
- "id": "003c8941-77e9-45de-a1f5-bd3ac8b6d4a2",
843
- "metadata": {},
844
- "outputs": [],
845
- "source": [
846
- "len(df_mother)"
847
- ]
848
- },
849
- {
850
- "cell_type": "code",
851
- "execution_count": null,
852
- "id": "3b196716-ee0d-479b-922c-9bac711dd535",
853
- "metadata": {},
854
- "outputs": [],
855
- "source": [
856
- "test = wordnet.synsets(\"mother\",wordnet.NOUN)"
857
- ]
858
- },
859
- {
860
- "cell_type": "code",
861
- "execution_count": null,
862
- "id": "bebb801e-6e43-463f-8a3d-7a09b709836e",
863
- "metadata": {},
864
- "outputs": [],
865
- "source": [
866
- "test"
867
- ]
868
- },
869
- {
870
- "cell_type": "code",
871
- "execution_count": null,
872
- "id": "b9a87480-1e25-4614-95fc-5aa201efb9c3",
873
- "metadata": {},
874
- "outputs": [],
875
- "source": [
876
- "test1 = wordnet.synsets('father',wordnet.NOUN)\n",
877
- "testx = wordnet.synset(\"mother.n.01\")\n",
878
- "for syn in test1:\n",
879
- " definition = clean_definition(syn)\n",
880
- " test_def = clean_definition(testx)\n",
881
- " print(test_def)\n",
882
- " print(syn, definition, check_sim(process_text(test_def.text),process_text(definition.text)))"
883
- ]
884
- },
885
- {
886
- "cell_type": "code",
887
- "execution_count": null,
888
- "id": "1757d434-5f67-465a-9559-34ce2eacf1f1",
889
- "metadata": {},
890
- "outputs": [],
891
- "source": [
892
- "test = \"colonizer.n.01\""
893
- ]
894
- },
895
- {
896
- "cell_type": "code",
897
- "execution_count": null,
898
- "id": "ea8eb48a-49dd-4aae-b5ae-ae0db2bddc49",
899
- "metadata": {},
900
- "outputs": [],
901
- "source": [
902
- "test2 = \"mother.n.01\""
903
- ]
904
- },
905
- {
906
- "cell_type": "code",
907
- "execution_count": null,
908
- "id": "7b99656d-efab-4c6e-8dca-0d604cbd5bbe",
909
- "metadata": {},
910
- "outputs": [],
911
- "source": [
912
- "mother = nlp(wordnet.synset(\"black.n.05\").definition())\n",
913
- "print(mother)\n",
914
- "colony = nlp(wordnet.synset(\"white.n.01\").definition())\n",
915
- "print(colony)\n",
916
- "print(mother.similarity(colony))"
917
- ]
918
- },
919
- {
920
- "cell_type": "code",
921
- "execution_count": null,
922
- "id": "b735efc4-0c84-4632-b850-7395f27971fe",
923
- "metadata": {},
924
- "outputs": [],
925
- "source": [
926
- "mother_processed = nlp(process_text(mother.text))\n",
927
- "colony_processed = nlp(process_text(colony.text))"
928
- ]
929
- },
930
- {
931
- "cell_type": "code",
932
- "execution_count": null,
933
- "id": "c5972e22-ae21-4b6a-9c1a-cbd907c8aef1",
934
- "metadata": {},
935
- "outputs": [],
936
- "source": [
937
- "print(mother_processed.similarity(colony_processed))"
938
- ]
939
- },
940
- {
941
- "cell_type": "code",
942
- "execution_count": null,
943
- "id": "1046cc13-ea10-4bb1-bd59-daa1507c5c19",
944
- "metadata": {},
945
- "outputs": [],
946
- "source": [
947
- "a = clean_definition(test)\n",
948
- "\n",
949
- "b = clean_definition(test2)\n",
950
- "\n",
951
- "a.similarity(b)"
952
- ]
953
- },
954
- {
955
- "cell_type": "code",
956
- "execution_count": null,
957
- "id": "d9f46e35-2bc9-4937-bb5f-ce6d268150af",
958
- "metadata": {},
959
- "outputs": [],
960
- "source": [
961
- "a_p = nlp(process_text(a.text))\n",
962
- "b_p = nlp(process_text(b.text))\n",
963
- "a_p.similarity(b_p)"
964
- ]
965
- },
966
- {
967
- "cell_type": "code",
968
- "execution_count": null,
969
- "id": "451815b5-3fa8-48c4-a89a-7b70d19d00da",
970
- "metadata": {},
971
- "outputs": [],
972
- "source": [
973
- "check_sim(a,b)"
974
- ]
975
- },
976
- {
977
- "cell_type": "code",
978
- "execution_count": null,
979
- "id": "98519912-fe93-4b9a-85cb-9d7001735627",
980
- "metadata": {},
981
- "outputs": [],
982
- "source": [
983
- "test3 = wordnet.synset(\"white_supremacist.n.01\")\n",
984
- "c = clean_definition(test3)\n",
985
- "a.similarity(c)"
986
- ]
987
- },
988
- {
989
- "cell_type": "code",
990
- "execution_count": null,
991
- "id": "2d44f7ee-f7a7-421e-9cb5-d0f599c2b9ab",
992
- "metadata": {},
993
- "outputs": [],
994
- "source": [
995
- "def get_parallel(word, seed_definition, QA=False):\n",
996
- " cleaned = nlp(process_text(seed_definition))\n",
997
- " root_syns = wordnet.synsets(word)\n",
998
- " hypers = []\n",
999
- " new_hypos = []\n",
1000
- " \n",
1001
- " for syn in root_syns:\n",
1002
- " hypers.extend(syn.hypernyms())\n",
1003
- " \n",
1004
- " #hypers = list(set([syn for syn in hypers if cleaned.similarity(nlp(process_text(syn.definition()))) >=.5]))\n",
1005
- " \n",
1006
- " for syn in hypers:\n",
1007
- " new_hypos.extend(syn.hyponyms())\n",
1008
- " \n",
1009
- " hypos = list(set([syn for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.75]))\n",
1010
- " print(len(hypos))\n",
1011
- " if len(hypos) < 3:\n",
1012
- " hypos = list(set([(syn, cleaned.similarity(nlp(process_text(syn.definition())))) for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.5]))\n",
1013
- " elif len(hypos) <10:\n",
1014
- " hypos = list(set([(syn, cleaned.similarity(nlp(process_text(syn.definition())))) for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.66]))\n",
1015
- " elif len(hypos) >= 10: \n",
1016
- " hypos = list(set([(syn, cleaned.similarity(nlp(process_text(syn.definition())))) for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.8]))\n",
1017
- " elif len(hypos) >= 20:\n",
1018
- " hypos = list(set([(syn, cleaned.similarity(nlp(process_text(syn.definition())))) for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.9]))\n",
1019
- " if QA:\n",
1020
- " print(hypers)\n",
1021
- " print(hypos)\n",
1022
- " return hypers, hypos\n",
1023
- " else:\n",
1024
- " return hypos\n",
1025
- "\n",
1026
- "# Builds a dataframe dynamically from WordNet using NLTK.\n",
1027
- "def wordnet_parallel_df(word,POS=False,seed_definition=None):\n",
1028
- " pos_options = ['NOUN','VERB','ADJ','ADV']\n",
1029
- " synonyms, antonyms = syn_ant(word,POS,False)\n",
1030
- " #print(synonyms, antonyms) #for QA purposes\n",
1031
- " words = []\n",
1032
- " cats = []\n",
1033
- " #WordNet hates spaces so you have to remove them\n",
1034
- " m_word = word.replace(\" \", \"_\")\n",
1035
- " \n",
1036
- " #Allow the user to pick a seed definition if it is not provided directly to the function.\n",
1037
- " if seed_definition is None:\n",
1038
- " if POS in pos_options:\n",
1039
- " seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS))]\n",
1040
- " else:\n",
1041
- " seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word)]\n",
1042
- " for d in range(len(seed_definitions)):\n",
1043
- " print(f\"{d}: {seed_definitions[d]}\")\n",
1044
- " choice = int(input(\"Which of the definitions above most aligns to your selection?\"))\n",
1045
- " seed_definition = seed_definitions[choice]\n",
1046
- " \n",
1047
- " hypos = get_parallel(m_word,seed_definition)\n",
1048
- " for syn,sim in hypos:\n",
1049
- " cur_lemmas = syn.lemmas()\n",
1050
- " hypos = syn.hyponyms()\n",
1051
- " for hypo in hypos:\n",
1052
- " cur_lemmas.extend(hypo.lemmas())\n",
1053
- " for lemma in cur_lemmas:\n",
1054
- " ll = lemma.name()\n",
1055
- " cats.append(re.sub(\"_\",\" \", syn.name().split(\".\")[0]))\n",
1056
- " words.append(re.sub(\"_\",\" \",ll))\n",
1057
- "\n",
1058
- " df = {\"Categories\":cats, \"Words\":words}\n",
1059
- " df = pd.DataFrame(df) \n",
1060
- " df = df.drop_duplicates().reset_index()\n",
1061
- " df = df.drop(\"index\", axis=1)\n",
1062
- " return df"
1063
- ]
1064
- },
1065
- {
1066
- "cell_type": "code",
1067
- "execution_count": null,
1068
- "id": "dbd95998-ec11-4166-93fa-18c0a99c4d6e",
1069
- "metadata": {},
1070
- "outputs": [],
1071
- "source": [
1072
- "gay_root = wordnet.synsets(\"gay\")"
1073
- ]
1074
- },
1075
- {
1076
- "cell_type": "code",
1077
- "execution_count": null,
1078
- "id": "3cdc4a08-2e90-4ab9-ae5e-6c95fd162048",
1079
- "metadata": {},
1080
- "outputs": [],
1081
- "source": [
1082
- "gay = wordnet.synset(\"gay.s.06\").definition()\n",
1083
- "print(gay)\n",
1084
- "hypers, hypos1 = get_parallel(\"gay\",gay,True)"
1085
- ]
1086
- },
1087
- {
1088
- "cell_type": "code",
1089
- "execution_count": null,
1090
- "id": "34b80b88-3089-44b5-8c5b-13e5f7ea8446",
1091
- "metadata": {},
1092
- "outputs": [],
1093
- "source": [
1094
- "len(hypos1)"
1095
- ]
1096
- },
1097
- {
1098
- "cell_type": "code",
1099
- "execution_count": null,
1100
- "id": "a134ba49-19cc-4937-b6af-67e044e3bcd2",
1101
- "metadata": {},
1102
- "outputs": [],
1103
- "source": [
1104
- "for root in gay_root:\n",
1105
- " print(root, root.definition())"
1106
- ]
1107
- },
1108
- {
1109
- "cell_type": "code",
1110
- "execution_count": null,
1111
- "id": "662ff6a8-b5af-4c6a-8102-39b66b85e5d1",
1112
- "metadata": {},
1113
- "outputs": [],
1114
- "source": [
1115
- "wordnet.synsets(\"chinese\")"
1116
- ]
1117
- },
1118
- {
1119
- "cell_type": "code",
1120
- "execution_count": null,
1121
- "id": "4bc77b81-8c43-4cbb-bc7e-a178e76d3659",
1122
- "metadata": {},
1123
- "outputs": [],
1124
- "source": [
1125
- "chinese = wordnet.synset(\"chinese.a.01\").definition()\n",
1126
- "hypers, hypos = get_parallel(\"chinese\",chinese,True)"
1127
- ]
1128
- },
1129
- {
1130
- "cell_type": "code",
1131
- "execution_count": null,
1132
- "id": "8b66bb7a-0ede-48a1-888a-c90e81e2d75d",
1133
- "metadata": {},
1134
- "outputs": [],
1135
- "source": [
1136
- "lemmas = []\n",
1137
- "for hypo in hypos1:\n",
1138
- " lemmas.extend([re.sub(\"_\",\" \",lemma.name()) for lemma in hypo[0].lemmas()])\n",
1139
- "lemmas"
1140
- ]
1141
- },
1142
- {
1143
- "cell_type": "code",
1144
- "execution_count": null,
1145
- "id": "221c43f2-05f1-4a48-95a8-eb6a122527e9",
1146
- "metadata": {},
1147
- "outputs": [],
1148
- "source": [
1149
- "len(lemmas)"
1150
- ]
1151
- },
1152
- {
1153
- "cell_type": "code",
1154
- "execution_count": null,
1155
- "id": "3d75b92b-be76-45c5-b955-d1f64ec03bd4",
1156
- "metadata": {},
1157
- "outputs": [],
1158
- "source": [
1159
- "df = wordnet_parallel_df(\"gay\",seed_definition=gay)\n",
1160
- "df.head()"
1161
- ]
1162
- },
1163
- {
1164
- "cell_type": "code",
1165
- "execution_count": null,
1166
- "id": "35194a7a-a814-43c6-a57c-c40e54b81847",
1167
- "metadata": {},
1168
- "outputs": [],
1169
- "source": [
1170
- "len(df)"
1171
- ]
1172
- },
1173
- {
1174
- "cell_type": "code",
1175
- "execution_count": null,
1176
- "id": "29618210-fec7-40b6-b326-107e8570abca",
1177
- "metadata": {},
1178
- "outputs": [],
1179
- "source": [
1180
- "df_grouped = df.groupby('Categories').count()"
1181
- ]
1182
- },
1183
- {
1184
- "cell_type": "code",
1185
- "execution_count": null,
1186
- "id": "407cda3a-1d7a-4863-aa1e-e69860e6cfb5",
1187
- "metadata": {},
1188
- "outputs": [],
1189
- "source": [
1190
- "df_grouped.head()"
1191
- ]
1192
- },
1193
- {
1194
- "cell_type": "code",
1195
- "execution_count": null,
1196
- "id": "3b70c510-997a-4675-963c-ca7000e79eb4",
1197
- "metadata": {},
1198
- "outputs": [],
1199
- "source": [
1200
- "tiny = wordnet.synsets(\"tiny\", wordnet.ADJ)"
1201
- ]
1202
- },
1203
- {
1204
- "cell_type": "code",
1205
- "execution_count": null,
1206
- "id": "2fe63d4d-b080-49ae-a1b6-487e8b440e76",
1207
- "metadata": {},
1208
- "outputs": [],
1209
- "source": [
1210
- "tiny"
1211
- ]
1212
- },
1213
- {
1214
- "cell_type": "code",
1215
- "execution_count": null,
1216
- "id": "9661c299-369b-4538-86d9-003b3dc9fa5c",
1217
- "metadata": {},
1218
- "outputs": [],
1219
- "source": [
1220
- "tiny[0].lemmas()"
1221
- ]
1222
- },
1223
- {
1224
- "cell_type": "code",
1225
- "execution_count": null,
1226
- "id": "99a6e4d9-2923-41a9-94b3-09d21c699f21",
1227
- "metadata": {},
1228
- "outputs": [],
1229
- "source": [
1230
- "new_alt = []\n",
1231
- "for lemma in tiny[0].lemmas():\n",
1232
- " new_alt.extend(wordnet.synsets(lemma.name()))\n",
1233
- "new_alt"
1234
- ]
1235
- },
1236
- {
1237
- "cell_type": "code",
1238
- "execution_count": null,
1239
- "id": "7ac3e75d-8a0e-44e2-910a-dcfcea86fa9f",
1240
- "metadata": {},
1241
- "outputs": [],
1242
- "source": [
1243
- "new_alt2 = list(set(new_alt))"
1244
- ]
1245
- },
1246
- {
1247
- "cell_type": "code",
1248
- "execution_count": null,
1249
- "id": "3617a495-c722-466f-a74b-1e22bf025248",
1250
- "metadata": {},
1251
- "outputs": [],
1252
- "source": [
1253
- "for alt in new_alt2:\n",
1254
- " print(alt,alt.hypernyms())"
1255
- ]
1256
- },
1257
- {
1258
- "cell_type": "code",
1259
- "execution_count": null,
1260
- "id": "cc2f7839-f219-4cf8-ab5d-82781016e6c5",
1261
- "metadata": {},
1262
- "outputs": [],
1263
- "source": []
1264
- }
1265
- ],
1266
- "metadata": {
1267
- "kernelspec": {
1268
- "display_name": "Python 3 (ipykernel)",
1269
- "language": "python",
1270
- "name": "python3"
1271
- },
1272
- "language_info": {
1273
- "codemirror_mode": {
1274
- "name": "ipython",
1275
- "version": 3
1276
- },
1277
- "file_extension": ".py",
1278
- "mimetype": "text/x-python",
1279
- "name": "python",
1280
- "nbconvert_exporter": "python",
1281
- "pygments_lexer": "ipython3",
1282
- "version": "3.8.8"
1283
- }
1284
- },
1285
- "nbformat": 4,
1286
- "nbformat_minor": 5
1287
- }