emilylearning commited on
Commit
a1d9fca
1 Parent(s): b2d7917

fig narrower, clean up text, rem overlapping plot label

Browse files
Files changed (1) hide show
  1. app.py +8 -13
app.py CHANGED
@@ -178,7 +178,7 @@ def get_figure(df, gender, n_fit=1):
178
  ys = df[cols[0]]
179
  fig, ax = plt.subplots()
180
  # Trying small fig due to rendering issues on HF, not on VS Code
181
- fig.set_figheight(4)
182
  fig.set_figwidth(8)
183
 
184
  # find stackoverflow reference
@@ -217,7 +217,7 @@ def predict_gender_pronouns(
217
  normalizing,
218
  input_text,
219
  ):
220
- """Run inference on input_text for each model type, returning df and plots of precentage
221
  of gender pronouns predicted as female and male in each target text.
222
  """
223
  model = models[model_type]
@@ -323,15 +323,12 @@ def reddit_fn():
323
  demo = gr.Blocks()
324
  with demo:
325
  gr.Markdown("## Hunt for spurious correlations in our LLMs.")
326
- gr.Markdown("Although genders are relatively evenly distributed across time, place and interests, there are also known gender disparities in terms of access to resources. We suggest that this access disparity can result in dataset selection bias, causing models to learn a surprising range of spurious associations.")
327
  gr.Markdown("These spurious associations are often considered undesirable, as they do not match our intuition about the real-world domain from which we derive samples for inference-time prediction.")
328
- gr.Markdown("Selection bias of samples into datasets is a zero-sum-game, with even our high quality datasets forced to trade off one for another, thus inducing selection bias into the learned associations of the model.")
329
 
330
-
331
- gr.Markdown("One intuitive way to see the impact that changing one variable may have upon another is to look for a dose-response relationship, in which a larger intervention in the treatment (the value in text form injected in the otherwise unchanged text sample) produces a larger response in the output (the softmax probability of a gendered pronoun). Specifically, below are examples of sweeping through a spectrum of place, date and subreddit interest (we encourage you to try your own).")
332
-
333
- gr.Markdown("This requires a spectrum of less to more gender-equal values for each covariate. For date, it’s easy to just use time itself, as gender equality has generally improved with time, so we picked years ranging from 1800 - 1999. For place we used the bottom and top 10 Global Gender Gap ranked countries. And for subreddit, we use subreddit name ordered by subreddits that have an increasingly larger percentage of self-reported female commenters.")
334
- #gr.Markdown("Please see a better explanation in another [Space](https://huggingface.co/spaces/emilylearning/causing_gender_pronouns_two).")
335
 
336
  with gr.Row():
337
  x_axis = gr.Textbox(
@@ -362,10 +359,8 @@ with demo:
362
  sample_text = gr.Textbox(
363
  type="auto", label="Output text: Sample of text fed to model")
364
  with gr.Row():
365
- female_fig = gr.Plot(
366
- type="auto", label="Plot of softmax probability pronouns predicted female.")
367
- male_fig = gr.Plot(
368
- type="auto", label="Plot of softmax probability pronouns predicted male.")
369
  with gr.Row():
370
  df = gr.Dataframe(
371
  show_label=True,
 
178
  ys = df[cols[0]]
179
  fig, ax = plt.subplots()
180
  # Trying small fig due to rendering issues on HF, not on VS Code
181
+ fig.set_figheight(3)
182
  fig.set_figwidth(8)
183
 
184
  # find stackoverflow reference
 
217
  normalizing,
218
  input_text,
219
  ):
220
+ """Run inference on input_text for each model type, returning df and plots of percentage
221
  of gender pronouns predicted as female and male in each target text.
222
  """
223
  model = models[model_type]
 
323
  demo = gr.Blocks()
324
  with demo:
325
  gr.Markdown("## Hunt for spurious correlations in our LLMs.")
326
+ gr.Markdown("Although genders are relatively evenly distributed across time, place and interests, there are also known gender disparities in terms of access to resources. Here we demonstrate that this access disparity can result in dataset selection bias, causing models to learn a surprising range of spurious associations.")
327
  gr.Markdown("These spurious associations are often considered undesirable, as they do not match our intuition about the real-world domain from which we derive samples for inference-time prediction.")
328
+ gr.Markdown("Selection of samples into datasets is a zero-sum-game, with even our high quality datasets forced to trade off one for another, thus inducing selection bias into the learned associations of the model.")
329
 
330
+ gr.Markdown("### Dose-response Relationship.")
331
+ gr.Markdown("One intuitive way to see the impact that changing one variable may have upon another is to look for a dose-response relationship, in which a larger intervention in the treatment (the value in text form injected in the otherwise unchanged text sample) produces a larger response in the output (the softmax probability of a gendered pronoun). Specifically, below are examples of sweeping through a spectrum of place, date and subreddit interest. We encourage you to try your own!")
 
 
 
332
 
333
  with gr.Row():
334
  x_axis = gr.Textbox(
 
359
  sample_text = gr.Textbox(
360
  type="auto", label="Output text: Sample of text fed to model")
361
  with gr.Row():
362
+ female_fig = gr.Plot(type="auto")
363
+ male_fig = gr.Plot(type="auto")
 
 
364
  with gr.Row():
365
  df = gr.Dataframe(
366
  show_label=True,