emilylearning commited on
Commit
12781b6
1 Parent(s): c494a1d

fix add-own-model bug, less nice but its late, updated markdown,

Browse files
Files changed (1) hide show
  1. app.py +26 -30
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # Model card: https://huggingface.co/emilylearning/selection-induced-collider-bias
2
  # %%
3
  import gradio as gr
4
  import matplotlib.pyplot as plt
@@ -8,18 +7,9 @@ import random
8
  from matplotlib.ticker import MaxNLocator
9
  from transformers import pipeline
10
 
 
11
  OWN_MODEL_NAME = 'add-a-model'
12
 
13
- MODEL_NAME_DICT = {
14
- "roberta-large": "RoBERTa-large",
15
- "bert-large-uncased": "BERT-large",
16
- "roberta-base": "RoBERTa-base",
17
- "bert-base-uncased": "BERT-base",
18
- "olm/olm-roberta-base-oct-2022": "OLM_RoBERTa-base",
19
- OWN_MODEL_NAME: "Your model's"
20
- }
21
- MODEL_NAMES = list(MODEL_NAME_DICT.keys())
22
-
23
  DECIMAL_PLACES = 1
24
  EPS = 1e-5 # to avoid /0 errors
25
 
@@ -145,12 +135,15 @@ GENDERED_LIST = [
145
 
146
  # %%
147
  # Fire up the models
148
- models = {m : pipeline("fill-mask", model=m) for m in MODEL_NAMES if m != OWN_MODEL_NAME}
 
 
 
149
 
150
  # %%
151
 
152
 
153
- def get_gendered_tokens():
154
  male_gendered_tokens = [list[0] for list in GENDERED_LIST]
155
  female_gendered_tokens = [list[1] for list in GENDERED_LIST]
156
 
@@ -177,7 +170,7 @@ def get_avg_prob_from_pipeline_outputs(mask_filled_text, gendered_token, num_pre
177
  # %%
178
 
179
 
180
- def get_figure(df, gender, n_fit=1):
181
  df = df.set_index('x-axis')
182
  cols = df.columns
183
  xs = list(range(len(df)))
@@ -205,7 +198,7 @@ def get_figure(df, gender, n_fit=1):
205
  ax.axis('tight')
206
  ax.set_xlabel("Value injected into input text")
207
  ax.set_title(
208
- f"Probability of predicting {gender} pronouns.")
209
  ax.set_ylabel(f"Softmax prob for pronouns")
210
  ax.xaxis.set_major_locator(MaxNLocator(6))
211
  ax.tick_params(axis='x', labelrotation=5)
@@ -227,6 +220,7 @@ def predict_gender_pronouns(
227
  """
228
  if model_name not in MODEL_NAMES:
229
  model = pipeline("fill-mask", model=own_model_name)
 
230
  else:
231
  model = models[model_name]
232
 
@@ -234,7 +228,7 @@ def predict_gender_pronouns(
234
 
235
  indie_vars_list = indie_vars.split(',')
236
 
237
- male_gendered_tokens, female_gendered_tokens = get_gendered_tokens()
238
 
239
  text_segments, num_preds = prepare_text_for_masking(
240
  input_text, mask_token, male_gendered_tokens + female_gendered_tokens, split_key)
@@ -276,9 +270,9 @@ def predict_gender_pronouns(
276
  results_df['female_pronouns'] = female_pronoun_preds
277
  results_df['male_pronouns'] = male_pronoun_preds
278
  female_fig = get_figure(results_df.drop(
279
- 'male_pronouns', axis=1), 'female', n_fit,)
280
  male_fig = get_figure(results_df.drop(
281
- 'female_pronouns', axis=1), 'male', n_fit,)
282
  display_text = f"{random.choice(indie_vars_list)}".join(text_segments)
283
 
284
  return (
@@ -293,18 +287,17 @@ def predict_gender_pronouns(
293
  title = "Causing Gender Pronouns"
294
  description = """
295
  ## Intro
296
-
297
  """
298
 
299
 
300
  date_example = [
301
- MODEL_NAMES[4],
302
  '',
303
  ', '.join(DATES),
304
  'DATE',
305
  "False",
306
  1,
307
- 'In DATE, she became an adult.'
308
  ]
309
 
310
 
@@ -315,7 +308,7 @@ place_example = [
315
  'PLACE',
316
  "False",
317
  1,
318
- 'She became a teen in PLACE.'
319
  ]
320
 
321
 
@@ -362,7 +355,9 @@ with demo:
362
  gr.Markdown("# Spurious Correlation Evaluation for Pre-trained LLMs")
363
  gr.Markdown("Find spurious correlations between seemingly independent variables (for example between `gender` and `time`) in almost any BERT-like LLM on Hugging Face, below.")
364
 
365
- gr.Markdown("See why this happens in ['Selection Induced Collider Bias: A Gender Pronoun Uncertainty Case Study'](https://arxiv.org/abs/2210.00131).")
 
 
366
  gr.Markdown("## Instructions for this Demo")
367
  gr.Markdown("1) Click on one of the examples below (where we sweep through a spectrum of `places`, `dates` and `subreddits`) to pre-populate the input fields.")
368
  gr.Markdown("2) Check out the pre-populated fields as you scroll down to the ['Hit Submit...'] button!")
@@ -401,7 +396,8 @@ with demo:
401
 
402
  with gr.Row():
403
  model_name = gr.Radio(
404
- MODEL_NAMES,
 
405
  label="B) BERT-like model.",
406
  )
407
  own_model_name = gr.Textbox(
@@ -417,6 +413,7 @@ with demo:
417
  to_normalize = gr.Dropdown(
418
  ["False", "True"],
419
  label="D) Normalize model's predictions to only the gendered ones?",
 
420
  )
421
  place_holder = gr.Textbox(
422
  label="E) Special token place-holder",
@@ -424,6 +421,7 @@ with demo:
424
  n_fit = gr.Dropdown(
425
  list(range(1, 5)),
426
  label="F) Degree of polynomial fit",
 
427
  )
428
 
429
  gr.Markdown(
@@ -436,15 +434,16 @@ with demo:
436
  )
437
 
438
  gr.Markdown("## Outputs!")
 
439
  with gr.Row():
440
  btn = gr.Button("Hit submit to generate predictions!")
441
 
442
  with gr.Row():
443
  sample_text = gr.Textbox(
444
- label="Output text: Sample of text fed to model")
445
  with gr.Row():
446
- female_fig = gr.Plot()
447
- male_fig = gr.Plot()
448
  with gr.Row():
449
  df = gr.Dataframe(
450
  show_label=True,
@@ -471,6 +470,3 @@ with demo:
471
 
472
 
473
  demo.launch(debug=True)
474
-
475
-
476
- # %%
 
 
1
  # %%
2
  import gradio as gr
3
  import matplotlib.pyplot as plt
 
7
  from matplotlib.ticker import MaxNLocator
8
  from transformers import pipeline
9
 
10
+ MODEL_NAMES = ["bert-base-uncased", "roberta-base", "bert-large-uncased", "roberta-large"]
11
  OWN_MODEL_NAME = 'add-a-model'
12
 
 
 
 
 
 
 
 
 
 
 
13
  DECIMAL_PLACES = 1
14
  EPS = 1e-5 # to avoid /0 errors
15
 
 
135
 
136
  # %%
137
  # Fire up the models
138
+ models = dict()
139
+
140
+ for bert_like in MODEL_NAMES:
141
+ models[bert_like] = pipeline("fill-mask", model=bert_like)
142
 
143
  # %%
144
 
145
 
146
+ def get_gendered_token_ids():
147
  male_gendered_tokens = [list[0] for list in GENDERED_LIST]
148
  female_gendered_tokens = [list[1] for list in GENDERED_LIST]
149
 
 
170
  # %%
171
 
172
 
173
+ def get_figure(df, gender, n_fit=1, model_name=None):
174
  df = df.set_index('x-axis')
175
  cols = df.columns
176
  xs = list(range(len(df)))
 
198
  ax.axis('tight')
199
  ax.set_xlabel("Value injected into input text")
200
  ax.set_title(
201
+ f"Probability of predicting {gender} pronouns on {model_name}.")
202
  ax.set_ylabel(f"Softmax prob for pronouns")
203
  ax.xaxis.set_major_locator(MaxNLocator(6))
204
  ax.tick_params(axis='x', labelrotation=5)
 
220
  """
221
  if model_name not in MODEL_NAMES:
222
  model = pipeline("fill-mask", model=own_model_name)
223
+ model_name = OWN_MODEL_NAME
224
  else:
225
  model = models[model_name]
226
 
 
228
 
229
  indie_vars_list = indie_vars.split(',')
230
 
231
+ male_gendered_tokens, female_gendered_tokens = get_gendered_token_ids()
232
 
233
  text_segments, num_preds = prepare_text_for_masking(
234
  input_text, mask_token, male_gendered_tokens + female_gendered_tokens, split_key)
 
270
  results_df['female_pronouns'] = female_pronoun_preds
271
  results_df['male_pronouns'] = male_pronoun_preds
272
  female_fig = get_figure(results_df.drop(
273
+ 'male_pronouns', axis=1), 'female', n_fit, model_name)
274
  male_fig = get_figure(results_df.drop(
275
+ 'female_pronouns', axis=1), 'male', n_fit, model_name)
276
  display_text = f"{random.choice(indie_vars_list)}".join(text_segments)
277
 
278
  return (
 
287
  title = "Causing Gender Pronouns"
288
  description = """
289
  ## Intro
 
290
  """
291
 
292
 
293
  date_example = [
294
+ MODEL_NAMES[1],
295
  '',
296
  ', '.join(DATES),
297
  'DATE',
298
  "False",
299
  1,
300
+ 'She was a teenager in DATE.'
301
  ]
302
 
303
 
 
308
  'PLACE',
309
  "False",
310
  1,
311
+ 'She became an adult in PLACE.'
312
  ]
313
 
314
 
 
355
  gr.Markdown("# Spurious Correlation Evaluation for Pre-trained LLMs")
356
  gr.Markdown("Find spurious correlations between seemingly independent variables (for example between `gender` and `time`) in almost any BERT-like LLM on Hugging Face, below.")
357
 
358
+ # gr.Markdown("Note: If there is an issue with the rendering of the results taking longer than expected (more than 10s of seconds), there may be an unexpected issue effecting the hosting. If so, please see this [backup colab notebook](https://colab.research.google.com/drive/1A3a9cy9fERaxkuoX8YNTFhLlhRt_cxMm?usp=sharing).")
359
+
360
+
361
  gr.Markdown("## Instructions for this Demo")
362
  gr.Markdown("1) Click on one of the examples below (where we sweep through a spectrum of `places`, `dates` and `subreddits`) to pre-populate the input fields.")
363
  gr.Markdown("2) Check out the pre-populated fields as you scroll down to the ['Hit Submit...'] button!")
 
396
 
397
  with gr.Row():
398
  model_name = gr.Radio(
399
+ MODEL_NAMES + [OWN_MODEL_NAME],
400
+ type="value",
401
  label="B) BERT-like model.",
402
  )
403
  own_model_name = gr.Textbox(
 
413
  to_normalize = gr.Dropdown(
414
  ["False", "True"],
415
  label="D) Normalize model's predictions to only the gendered ones?",
416
+ type="index",
417
  )
418
  place_holder = gr.Textbox(
419
  label="E) Special token place-holder",
 
421
  n_fit = gr.Dropdown(
422
  list(range(1, 5)),
423
  label="F) Degree of polynomial fit",
424
+ type="value",
425
  )
426
 
427
  gr.Markdown(
 
434
  )
435
 
436
  gr.Markdown("## Outputs!")
437
+ #gr.Markdown("Scroll down and 'Hit Submit'!")
438
  with gr.Row():
439
  btn = gr.Button("Hit submit to generate predictions!")
440
 
441
  with gr.Row():
442
  sample_text = gr.Textbox(
443
+ type="auto", label="Output text: Sample of text fed to model")
444
  with gr.Row():
445
+ female_fig = gr.Plot(type="auto")
446
+ male_fig = gr.Plot(type="auto")
447
  with gr.Row():
448
  df = gr.Dataframe(
449
  show_label=True,
 
470
 
471
 
472
  demo.launch(debug=True)