File size: 11,627 Bytes
bd64c46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
import gradio as gr
import torch
from transformers import AutoModelForTokenClassification, AutoTokenizer
import pandas as pd
import numpy as np



# Play with me, consts
CONDITIONING_VARIABLES = ["none", "birth_place", "birth_date", "name"]
FEMALE_WEIGHTS = [1.5, 5]  # About 5x more male than female tokens in dataset

# Internal consts
START_YEAR = 1800
STOP_YEAR = 1999
SPLIT_KEY = "DATE"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

MAX_TOKEN_LENGTH = 128
NON_LOSS_TOKEN_ID = -100
NON_GENDERED_TOKEN_ID = 30  # Picked an int that will pop out visually
LABEL_DICT = {"female": 9, "male": -9}  # Picked an int that will pop out visually
CLASSES = list(LABEL_DICT.keys())


# Fire up the models
models_paths = dict()
models = dict()

base_path = "emilylearning/"
for var in CONDITIONING_VARIABLES:
    for f_weight in FEMALE_WEIGHTS:
        if f_weight == 1.5:
            models_paths[(var, f_weight)] = (
                base_path
                + f"finetuned_cgp_added_{var}__female_weight_{f_weight}__test_run_False__p_dataset_100"
            )
        else:
            models_paths[(var, f_weight)] = (
                base_path
                + f"finetuned_cgp_add_{var}__f_weight_{f_weight}__p_dataset_100__test_False"
            )
        models[(var, f_weight)] = AutoModelForTokenClassification.from_pretrained(
            models_paths[(var, f_weight)]
        )


# Tokenizers same for each model, so just grabbing one of them
tokenizer = AutoTokenizer.from_pretrained(
    models_paths[(CONDITIONING_VARIABLES[0], FEMALE_WEIGHTS[0])], add_prefix_space=True
)
MASK_TOKEN_ID = tokenizer.mask_token_id


# more static stuff
gendered_lists = [ 
    ["he", "she"],
    ["him", "her"],
    ["his", "hers"],
    ["male", "female"],
    ["man", "woman"],
    ["men", "women"],
    ["husband", "wife"],
]
male_gendered_dict = {list[0]: list for list in gendered_lists}
female_gendered_dict = {list[1]: list for list in gendered_lists}

male_gendered_token_ids = tokenizer.convert_tokens_to_ids(
    list(male_gendered_dict.keys())
)
female_gendered_token_ids = tokenizer.convert_tokens_to_ids(
    list(female_gendered_dict.keys())
)
assert tokenizer.unk_token_id not in male_gendered_token_ids
assert tokenizer.unk_token_id not in female_gendered_token_ids

label_list = list(LABEL_DICT.values())
assert label_list[0] == LABEL_DICT["female"], "LABEL_DICT not an ordered dict"

label2id = {label: idx for idx, label in enumerate(label_list)}

# Prepare text
def tokenize_and_append_metadata(text, tokenizer):
    tokenized = tokenizer(
        text,
        truncation=True,
        padding=True,
        max_length=MAX_TOKEN_LENGTH,
    )

    # Finding the gender pronouns in the tokens
    token_ids = tokenized["input_ids"]
    female_tags = torch.tensor(
        [
            LABEL_DICT["female"]
            if id in female_gendered_token_ids
            else NON_GENDERED_TOKEN_ID
            for id in token_ids
        ]
    )
    male_tags = torch.tensor(
        [
            LABEL_DICT["male"]
            if id in male_gendered_token_ids
            else NON_GENDERED_TOKEN_ID
            for id in token_ids
        ]
    )

    # Labeling and masking out occurrences of gendered pronouns
    labels = torch.tensor([NON_LOSS_TOKEN_ID] * len(token_ids))
    labels = torch.where(
        female_tags == LABEL_DICT["female"],
        label2id[LABEL_DICT["female"]],
        NON_LOSS_TOKEN_ID,
    )
    labels = torch.where(
        male_tags == LABEL_DICT["male"], label2id[LABEL_DICT["male"]], labels
    )
    masked_token_ids = torch.where(
        female_tags == LABEL_DICT["female"], MASK_TOKEN_ID, torch.tensor(token_ids)
    )
    masked_token_ids = torch.where(
        male_tags == LABEL_DICT["male"], MASK_TOKEN_ID, masked_token_ids
    )

    tokenized["input_ids"] = masked_token_ids
    tokenized["labels"] = labels

    return tokenized


# Run inference
def predict_gender_pronouns(
    num_points, conditioning_variables, f_weights, input_text, return_preds=False
):

    text_portions = input_text.split(SPLIT_KEY)

    years = np.linspace(START_YEAR, STOP_YEAR, int(num_points)).astype(int)

    dfs = []
    dfs.append(pd.DataFrame({"year": years}))
    for f_weight in f_weights:
        for var in conditioning_variables:
            prefix = f"w{f_weight}_{var}"
            model = models[(var, f_weight)]

            p_female = []
            p_male = []
            for b_date in years:
                target_text = f"{b_date}".join(text_portions)
                tokenized_sample = tokenize_and_append_metadata(
                    target_text,
                    tokenizer=tokenizer,
                )

                ids = tokenized_sample["input_ids"]
                atten_mask = torch.tensor(tokenized_sample["attention_mask"])
                toks = tokenizer.convert_ids_to_tokens(ids)
                labels = tokenized_sample["labels"]

                with torch.no_grad():
                    outputs = model(ids.unsqueeze(dim=0), atten_mask.unsqueeze(dim=0))
                    preds = torch.argmax(outputs[0][0].cpu(), dim=1)

                    was_masked = labels.cpu() != -100
                    preds = torch.where(was_masked, preds, -100)
                    num_preds = torch.sum(was_masked).item()

                    p_female.append(len(torch.where(preds==0)[0])/num_preds*100)
                    p_male.append(len(torch.where(preds==1)[0])/num_preds*100)

            dfs.append(pd.DataFrame({f"%f_{prefix}": p_female, f"%m_{prefix}": p_male}))

    results = pd.concat(dfs, axis=1).set_index("year")

    female_df = results.filter(regex=".*f_")
    female_df_for_plot = (
        female_df.reset_index()
    )  # Gradio timeseries requires x-axis as column?

    male_df = results.filter(regex=".*m_")
    male_df_for_plot = (
        male_df.reset_index()
    )  # Gradio timeseries requires x-axis as column?

    return (
        target_text,
        female_df_for_plot,
        female_df,
        male_df_for_plot,
        male_df,
    ) 


title = "Changing Gender Pronouns"
description =  """
This is a demo for a project exploring possible spurious correlations in training datasets that can be exploited and manipulated to achieve alternative outcomes. In this case, manipulating `DATE` to change the predicted gender pronouns for both the BERT base model and a model fine-tuned with a specific pronoun predicting task using the [wiki-bio](https://huggingface.co/datasets/wiki_bio) dataset.
One way to explain phenomena is by looking at a likely  data generating process for biographical-like data in both the main BERT training dataset as well as the `wiki_bio` dataset, in the form of a causal DAG. 
 
In the DAG, we can see that `birth_place`, `birth_date` and `gender` are all independent elements that have no common cause with the other covariates in the DAG. However `birth_place`, `birth_date` and `gender` may all have a role in causing one's `access_to_resources`, with the general trend that `access_to_resources` has become less gender-dependent over time, but not in every `birth_place`, with recent events in Afghanistan providing a stark counterexample to this trend. `access_to_resources` further determines how or if at all, you may appear in the dataset’s `context_words`.
 
We also argue that although there are complex causal interactions between words in a segment, the `context_words` are more likely to cause the `gender_pronouns`, rather than vice versa. For example, if the subject is a famous doctor and the object is her wealthy father, these context words will determine which person is being referred to, and thus which gendered-pronoun to use.
 
 
In this graph, any pink path between `context_words` and `gender_pronouns` will allow the flow of statistical correlation (regardless of direction of the causal arrow), inviting confounding and thus spurious correlations into the trained model.
 
<center>
<img src="https://www.dropbox.com/s/x60r43h7uwztnru/generic_ds_dag.png?raw=1" 
    alt="DAG of possible data generating process for datasets used in training.">
</center>
  
Those familiar with causal DAGs may note when can simply condition on `gender` to block any confounding between the `context_words` and the `gender_pronouns`.  However, this is not always possible, particularly in generative or mask-filling tasks, like those common in language models.  
 
Here, we automatically mask (for prediction) the following tokens (and they will also be automatically masked if you use them below.)
```
gendered_lists = [
   ['he', 'she'],
   ['him', 'her'],
   ['his', 'hers'],
   ['male', 'female'],
   ['man', 'woman'],
   ['men', 'women'],
   ["husband", "wife"],
]
```
 
In this demo we are looking for a dose-response relationship between:
- our treatment: the text,
- and our outcome: the predicted gender of pronouns in the text.
 
Specifically we are seeing if making larger magnitude intervention: an older `DATE` in the text will result in a larger magnitude effect in the outcome: higher percentage of predicted female pronouns.
 
In the demo below you can select among 4 different fine-tuning methods:
- which, if any, conditioning variable was appended to the text.
 
And two different weighting schemes that were used in the loss function to nudge more toward the minority class in the dataset: 
- female pronouns.
 
"""


article = "Check out [main colab notebook](https://colab.research.google.com/drive/14ce4KD6PrCIL60Eng-t79tEI1UP-DHGz?usp=sharing#scrollTo=Mg1tUeHLRLaG) \
 with a lot more details about this method and implementation."

gr.Interface(
    fn=predict_gender_pronouns,
    inputs=[
        gr.inputs.Number(
            default=10,
            label="Number of points (years) plotted -- select fewer if slow.",
        ),
        gr.inputs.CheckboxGroup(
            CONDITIONING_VARIABLES,
            default=["none", "birth_date"],
            type="value",
            label="Pick model(s) that were trained with the following conditioning variables",
        ),
        gr.inputs.CheckboxGroup(
            FEMALE_WEIGHTS,
            default=[5],
            type="value",
            label="Pick model(s) that were trained with the following loss function weight on female predictions",
        ),
        gr.inputs.Textbox(
            lines=7,
            label="Input Text. Include one of more instance of the word 'DATE' below, to be replace with a range of dates in demo.", 
            default="Born DATE, she was a computer scientist. Her work was greatly respected, and she was well-regarded in her field.",
        ),
    ],
    outputs=[
        gr.outputs.Textbox(type="auto", label="Sample target text fed to model"),
        gr.outputs.Timeseries(
            x="year",
            label="Precent pred female pronoun vs year, per model trained with conditioning and with weight for female preds",
        ),
        gr.outputs.Dataframe(
            overflow_row_behaviour="show_ends",
            label="Precent pred female pronoun vs year, per model trained with conditioning and with weight for female preds",
        ),
        gr.outputs.Timeseries(
            x="year",
            label="Precent pred male pronoun vs year, per model trained with conditioning and with weight for female preds",
        ),
        gr.outputs.Dataframe(
            overflow_row_behaviour="show_ends",
            label="Precent pred male pronoun vs year, per model trained with conditioning and with weight for female preds",
        ),
    ],
    title = title, 
    description = description, 
    article = article
).launch()