emilylearning commited on
Commit
3e5ab39
1 Parent(s): 4237de9

rem n_fit, add descript, both plots same row

Browse files
Files changed (1) hide show
  1. app.py +25 -25
app.py CHANGED
@@ -170,7 +170,7 @@ def get_avg_prob_from_pipeline_outputs(mask_filled_text, gendered_token, num_pre
170
  ]
171
  return round(sum(pronoun_preds) / (EPS + num_preds) * 100, DECIMAL_PLACES)
172
 
173
-
174
  def get_figure(df, gender, n_fit=1):
175
  df = df.set_index('x-axis')
176
  cols = df.columns
@@ -212,7 +212,6 @@ def predict_gender_pronouns(
212
  indie_vars,
213
  split_key,
214
  normalizing,
215
- n_fit,
216
  input_text,
217
  ):
218
  """Run inference on input_text for each model type, returning df and plots of precentage
@@ -265,9 +264,9 @@ def predict_gender_pronouns(
265
  results_df['female_pronouns'] = female_pronoun_preds
266
  results_df['male_pronouns'] = male_pronoun_preds
267
  female_fig = get_figure(results_df.drop(
268
- 'male_pronouns', axis=1), 'female', n_fit)
269
  male_fig = get_figure(results_df.drop(
270
- 'female_pronouns', axis=1), 'male', n_fit)
271
 
272
  return (
273
  target_text,
@@ -285,29 +284,26 @@ description = """
285
 
286
  place_example = [
287
  MODEL_NAMES[0],
288
- ','.join(PLACES),
289
  'PLACE',
290
  "False",
291
- 1,
292
  'Born in PLACE, she was a teacher.'
293
  ]
294
 
295
  date_example = [
296
  MODEL_NAMES[0],
297
- ','.join(DATES),
298
  'DATE',
299
  "False",
300
- 2,
301
  'Born in DATE, she was a doctor.'
302
  ]
303
 
304
 
305
  subreddit_example = [
306
  MODEL_NAMES[2],
307
- ','.join(SUBREDDITS),
308
  'SUBREDDIT',
309
  "False",
310
- 1,
311
  'I saw on r/SUBREDDIT that she is a hacker.'
312
  ]
313
 
@@ -324,8 +320,15 @@ def reddit_fn():
324
  demo = gr.Blocks()
325
  with demo:
326
  gr.Markdown("## Hunt for spurious correlations in our LLMs.")
327
- gr.Markdown("Please see a better explanation in another [Space](https://huggingface.co/spaces/emilylearning/causing_gender_pronouns_two).")
 
 
 
328
 
 
 
 
 
329
 
330
  with gr.Row():
331
  x_axis = gr.Textbox(
@@ -347,11 +350,6 @@ with demo:
347
  label="Normalize?",
348
  type="index",
349
  )
350
- n_fit = gr.Dropdown(
351
- list(range(1, 5)),
352
- label="Degree of polynomial fit for dose response trend",
353
- type="value",
354
- )
355
  with gr.Row():
356
  input_text = gr.Textbox(
357
  lines=5,
@@ -363,7 +361,6 @@ with demo:
363
  with gr.Row():
364
  female_fig = gr.Plot(
365
  type="auto", label="Plot of softmax probability pronouns predicted female.")
366
- with gr.Row():
367
  male_fig = gr.Plot(
368
  type="auto", label="Plot of softmax probability pronouns predicted male.")
369
  with gr.Row():
@@ -372,29 +369,32 @@ with demo:
372
  overflow_row_behaviour="show_ends",
373
  label="Table of softmax probability for pronouns predictions",
374
  )
375
- gr.Markdown("x-axis sorted by older to more recent dates:")
376
  place_gen = gr.Button('Populate fields with a location example')
377
 
378
- gr.Markdown("x-axis sorted by bottom 10 and top 10 Global Gender Gap ranked countries:")
379
  date_gen = gr.Button('Populate fields with a date example')
380
 
381
- gr.Markdown("x-axis sorted in order of increasing self-identified female participation (see [bburky demo](http://bburky.com/subredditgenderratios/)): ")
382
  subreddit_gen = gr.Button('Populate fields with a subreddit example')
383
 
384
- #https://github.com/gradio-app/gradio/issues/690#issuecomment-1118772919
385
  with gr.Row():
 
386
  date_gen.click(date_fn, inputs=[], outputs=[model_name,
387
- x_axis, place_holder, to_normalize, n_fit, input_text])
388
  place_gen.click(place_fn, inputs=[], outputs=[
389
- model_name, x_axis, place_holder, to_normalize, n_fit, input_text])
390
  subreddit_gen.click(reddit_fn, inputs=[], outputs=[
391
- model_name, x_axis, place_holder, to_normalize, n_fit, input_text])
392
  with gr.Row():
393
  btn = gr.Button("Hit submit")
394
  btn.click(
395
  predict_gender_pronouns,
396
  inputs=[model_name, x_axis, place_holder,
397
- to_normalize, n_fit, input_text],
398
  outputs=[sample_text, female_fig, male_fig, df])
399
 
400
  demo.launch(debug=True)
 
 
 
 
170
  ]
171
  return round(sum(pronoun_preds) / (EPS + num_preds) * 100, DECIMAL_PLACES)
172
 
173
+ # %%
174
  def get_figure(df, gender, n_fit=1):
175
  df = df.set_index('x-axis')
176
  cols = df.columns
 
212
  indie_vars,
213
  split_key,
214
  normalizing,
 
215
  input_text,
216
  ):
217
  """Run inference on input_text for each model type, returning df and plots of precentage
 
264
  results_df['female_pronouns'] = female_pronoun_preds
265
  results_df['male_pronouns'] = male_pronoun_preds
266
  female_fig = get_figure(results_df.drop(
267
+ 'male_pronouns', axis=1), 'female')
268
  male_fig = get_figure(results_df.drop(
269
+ 'female_pronouns', axis=1), 'male')
270
 
271
  return (
272
  target_text,
 
284
 
285
  place_example = [
286
  MODEL_NAMES[0],
287
+ ', '.join(PLACES),
288
  'PLACE',
289
  "False",
 
290
  'Born in PLACE, she was a teacher.'
291
  ]
292
 
293
  date_example = [
294
  MODEL_NAMES[0],
295
+ ', '.join(DATES),
296
  'DATE',
297
  "False",
 
298
  'Born in DATE, she was a doctor.'
299
  ]
300
 
301
 
302
  subreddit_example = [
303
  MODEL_NAMES[2],
304
+ ', '.join(SUBREDDITS),
305
  'SUBREDDIT',
306
  "False",
 
307
  'I saw on r/SUBREDDIT that she is a hacker.'
308
  ]
309
 
 
320
  demo = gr.Blocks()
321
  with demo:
322
  gr.Markdown("## Hunt for spurious correlations in our LLMs.")
323
+ gr.Markdown("Although genders are relatively evenly distributed across time, place and interests, there are also known gender disparities in terms of access to resources. We suggest that this access disparity can result in dataset selection bias, causing models to learn a surprising range of spurious associations.")
324
+ gr.Markdown("These spurious associations are often considered undesirable, as they do not match our intuition about the real-world domain from which we derive samples for inference-time prediction.")
325
+ gr.Markdown("Selection bias of samples into datasets is a zero-sum-game, with even our high quality datasets forced to trade off one for another, thus inducing selection bias into the learned associations of the model.")
326
+
327
 
328
+ gr.Markdown("One intuitive way to see the impact that changing one variable may have upon another is to look for a dose-response relationship, in which a larger intervention in the treatment (the value in text form injected in the otherwise unchanged text sample) produces a larger response in the output (the softmax probability of a gendered pronoun). Specifically, below are examples of sweeping through a spectrum of place, date and subreddit interest (we encourage you to try your own).")
329
+
330
+ gr.Markdown("This requires a spectrum of less to more gender-equal values for each covariate. For date, it’s easy to just use time itself, as gender equality has generally improved with time, so we picked years ranging from 1800 - 1999. For place we used the bottom and top 10 Global Gender Gap ranked countries. And for subreddit, we use subreddit name ordered by subreddits that have an increasingly larger percentage of self-reported female commenters.")
331
+ #gr.Markdown("Please see a better explanation in another [Space](https://huggingface.co/spaces/emilylearning/causing_gender_pronouns_two).")
332
 
333
  with gr.Row():
334
  x_axis = gr.Textbox(
 
350
  label="Normalize?",
351
  type="index",
352
  )
 
 
 
 
 
353
  with gr.Row():
354
  input_text = gr.Textbox(
355
  lines=5,
 
361
  with gr.Row():
362
  female_fig = gr.Plot(
363
  type="auto", label="Plot of softmax probability pronouns predicted female.")
 
364
  male_fig = gr.Plot(
365
  type="auto", label="Plot of softmax probability pronouns predicted male.")
366
  with gr.Row():
 
369
  overflow_row_behaviour="show_ends",
370
  label="Table of softmax probability for pronouns predictions",
371
  )
372
+ gr.Markdown("X-axis sorted by older to more recent dates:")
373
  place_gen = gr.Button('Populate fields with a location example')
374
 
375
+ gr.Markdown("X-axis sorted by bottom 10 and top 10 Global Gender Gap ranked countries:")
376
  date_gen = gr.Button('Populate fields with a date example')
377
 
378
+ gr.Markdown("X-axis sorted in order of increasing self-identified female participation (see [bburky demo](http://bburky.com/subredditgenderratios/)): ")
379
  subreddit_gen = gr.Button('Populate fields with a subreddit example')
380
 
 
381
  with gr.Row():
382
+
383
  date_gen.click(date_fn, inputs=[], outputs=[model_name,
384
+ x_axis, place_holder, to_normalize, input_text])
385
  place_gen.click(place_fn, inputs=[], outputs=[
386
+ model_name, x_axis, place_holder, to_normalize, input_text])
387
  subreddit_gen.click(reddit_fn, inputs=[], outputs=[
388
+ model_name, x_axis, place_holder, to_normalize, input_text])
389
  with gr.Row():
390
  btn = gr.Button("Hit submit")
391
  btn.click(
392
  predict_gender_pronouns,
393
  inputs=[model_name, x_axis, place_holder,
394
+ to_normalize, input_text],
395
  outputs=[sample_text, female_fig, male_fig, df])
396
 
397
  demo.launch(debug=True)
398
+
399
+
400
+ # %%