emilylearning commited on
Commit
25dd383
1 Parent(s): 7c94469

update markdown

Browse files
Files changed (1) hide show
  1. app.py +36 -36
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # Model card: https://huggingface.co/emilylearning/selection-induced-collider-bias
2
  # %%
3
  import gradio as gr
4
  import matplotlib.pyplot as plt
@@ -17,7 +16,6 @@ MODEL_NAME_DICT = {
17
  "bert-large-uncased": "BERT-large",
18
  "roberta-base": "RoBERTa-base",
19
  "bert-base-uncased": "BERT-base",
20
- "olm/olm-roberta-base-oct-2022": "OLM_RoBERTa-base",
21
  OWN_MODEL_NAME: "Your model's"
22
  }
23
  MODEL_NAMES = list(MODEL_NAME_DICT.keys())
@@ -51,10 +49,10 @@ GENDERED_LIST = [
51
  ]
52
 
53
 
54
-
55
  # %%
56
  # Fire up the models
57
- models = {m : pipeline("fill-mask", model=m) for m in MODEL_NAMES if m != OWN_MODEL_NAME}
 
58
 
59
  # %%
60
  # Get the winogender sentences
@@ -63,7 +61,9 @@ occs = sorted(list({sentence_id.split('_')[0]
63
  for sentence_id in winogender_sentences}))
64
 
65
  # %%
66
- def get_gendered_tokens():
 
 
67
  male_gendered_tokens = [list[0] for list in GENDERED_LIST]
68
  female_gendered_tokens = [list[1] for list in GENDERED_LIST]
69
 
@@ -109,8 +109,9 @@ def get_figure(df, model_name, occ):
109
  ax.bar(xs, ys)
110
  ax.axis('tight')
111
  ax.set_xlabel("Sentence number")
112
- ax.set_ylabel("Uncertainty metric")
113
- ax.set_title(f"{MODEL_NAME_DICT[model_name]} gender pronoun uncertainty in '{occ}' sentences")
 
114
  return fig
115
 
116
 
@@ -121,7 +122,7 @@ def predict_gender_pronouns(
121
  texts,
122
  occ,
123
  ):
124
- """Run inference on input_text for selected model type, returning uncertainty results.
125
  """
126
 
127
  # TODO: make these selectable by user
@@ -130,8 +131,8 @@ def predict_gender_pronouns(
130
 
131
  # For debugging
132
  print('input_texts', texts)
133
-
134
- if model_name is None or model_name == '':
135
  model_name = MODEL_NAMES[0]
136
  model = models[model_name]
137
  elif model_name == OWN_MODEL_NAME:
@@ -143,7 +144,7 @@ def predict_gender_pronouns(
143
 
144
  indie_vars_list = indie_vars.split(',')
145
 
146
- male_gendered_tokens, female_gendered_tokens = get_gendered_tokens()
147
 
148
  masked_texts = [text.replace('MASK', mask_token) for text in texts]
149
 
@@ -193,7 +194,7 @@ def predict_gender_pronouns(
193
  / num_ave), DECIMAL_PLACES)
194
 
195
  uncertain_df = pd.DataFrame.from_dict(
196
- all_uncertainty_f, orient='index', columns=['Uncertainty metric'])
197
 
198
  uncertain_df = uncertain_df.reset_index().rename(
199
  columns={'index': 'Sentence number'})
@@ -208,28 +209,26 @@ def predict_gender_pronouns(
208
  demo = gr.Blocks()
209
  with demo:
210
  input_texts = gr.Variable([])
211
- gr.Markdown("## Are you certain?")
212
- gr.Markdown(
213
- "#### LLMs are pretty good at reporting their uncertainty. We just need to ask the right way.")
214
- gr.Markdown("Using our uncertainty metric informed by applying causal inference techniques in \
215
- ['Selection Induced Collider Bias: A Gender Pronoun Uncertainty Case Study'](https://arxiv.org/abs/2210.00131 ), \
216
- we are able to identify likely spurious correlations and exploit them in \
217
- the scenario of gender underspecified tasks. (Note that introspecting softmax probabilities alone is insufficient, as in the sentences \
218
- below, LLMs may report a softmax prob of ~0.9 despite the task being underspecified.)")
219
-
220
- gr.Markdown("We extend the [Winogender Schemas](https://github.com/rudinger/winogender-schemas) evaluation set to produce\
221
- eight syntactically similar sentences. However semantically, \
222
- only two of the sentences are gender-specified while the rest remain gender-underspecified")
223
- gr.Markdown("If a model can reliably tell us when it is uncertain about its predictions, one can replace only those uncertain predictions with\
224
- an appropriate heuristic or information retrieval process.")
225
-
226
- gr.Markdown("#### TL;DR")
227
- gr.Markdown("Follow steps below to test out one of the pre-loaded options. Once you get the hang of it, you can load a new model and/or provide your own input texts.")
228
 
229
  with gr.Row():
230
  model_name = gr.Radio(
231
  MODEL_NAMES,
232
- label="1) Pick a preloaded BERT-like model for uncertainty evaluation (note: RoBERTa-large performance is best)...",
 
233
  )
234
  own_model_name = gr.Textbox(
235
  label=f"...Or, if you selected an '{OWN_MODEL_NAME}' model, put any Hugging Face pipeline model name \
@@ -246,11 +245,11 @@ with demo:
246
  lines=2,
247
  label=f"...Or, if you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
248
  to include a single MASK-ed out pronoun. \
249
- If unsure on the required format, click an occupation above instead, to see some example input texts for this round.",
250
  )
251
 
252
  with gr.Row():
253
- get_text_btn = gr.Button("3) Load input texts")
254
 
255
  get_text_btn.click(
256
  fn=display_input_texts,
@@ -261,16 +260,17 @@ with demo:
261
  )
262
 
263
  with gr.Row():
264
- uncertain_btn = gr.Button("4) Get uncertainty results!")
265
  gr.Markdown(
266
  "If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
267
 
268
  with gr.Row():
269
- female_fig = gr.Plot()
270
  with gr.Row():
271
  female_df = gr.Dataframe()
272
  with gr.Row():
273
- display_text = gr.Textbox(label="Sample of text fed to model")
 
274
 
275
  uncertain_btn.click(
276
  fn=predict_gender_pronouns,
@@ -281,4 +281,4 @@ with demo:
281
 
282
  demo.launch(debug=True)
283
 
284
- # %%
 
 
1
  # %%
2
  import gradio as gr
3
  import matplotlib.pyplot as plt
 
16
  "bert-large-uncased": "BERT-large",
17
  "roberta-base": "RoBERTa-base",
18
  "bert-base-uncased": "BERT-base",
 
19
  OWN_MODEL_NAME: "Your model's"
20
  }
21
  MODEL_NAMES = list(MODEL_NAME_DICT.keys())
 
49
  ]
50
 
51
 
 
52
  # %%
53
  # Fire up the models
54
+ models = {m: pipeline("fill-mask", model=m)
55
+ for m in MODEL_NAMES if m != OWN_MODEL_NAME}
56
 
57
  # %%
58
  # Get the winogender sentences
 
61
  for sentence_id in winogender_sentences}))
62
 
63
  # %%
64
+
65
+
66
+ def get_gendered_token_ids():
67
  male_gendered_tokens = [list[0] for list in GENDERED_LIST]
68
  female_gendered_tokens = [list[1] for list in GENDERED_LIST]
69
 
 
109
  ax.bar(xs, ys)
110
  ax.axis('tight')
111
  ax.set_xlabel("Sentence number")
112
+ ax.set_ylabel("Specification Metric")
113
+ ax.set_title(
114
+ f"Task Specification Metric on {MODEL_NAME_DICT[model_name]} for '{occ}' sentences")
115
  return fig
116
 
117
 
 
122
  texts,
123
  occ,
124
  ):
125
+ """Run inference on input_text for selected model type, returning Task Specification metric results.
126
  """
127
 
128
  # TODO: make these selectable by user
 
131
 
132
  # For debugging
133
  print('input_texts', texts)
134
+
135
+ if model_name is None or model_name == '':
136
  model_name = MODEL_NAMES[0]
137
  model = models[model_name]
138
  elif model_name == OWN_MODEL_NAME:
 
144
 
145
  indie_vars_list = indie_vars.split(',')
146
 
147
+ male_gendered_tokens, female_gendered_tokens = get_gendered_token_ids()
148
 
149
  masked_texts = [text.replace('MASK', mask_token) for text in texts]
150
 
 
194
  / num_ave), DECIMAL_PLACES)
195
 
196
  uncertain_df = pd.DataFrame.from_dict(
197
+ all_uncertainty_f, orient='index', columns=['Specification Metric'])
198
 
199
  uncertain_df = uncertain_df.reset_index().rename(
200
  columns={'index': 'Sentence number'})
 
209
  demo = gr.Blocks()
210
  with demo:
211
  input_texts = gr.Variable([])
212
+ gr.Markdown("**Detect Task Specification at Inference-time.**")
213
+
214
+ gr.Markdown("**Follow the numbered steps below to test one of the pre-loaded options.** Once you get the hang of it, you can load a new model and/or provide your own input texts.")
215
+ gr.Markdown(f"""1) Pick a preloaded BERT-like model.
216
+ *Note: RoBERTa-large performance is best.*
217
+ 2) Pick an Occupation type from the Winogender Schemas evaluation set.
218
+ *Or select '{PICK_YOUR_OWN_LABEL}' (it need not be about an occupation).*
219
+ 3) Click button to load input texts.
220
+ *Read the sentences to determine which two are well-specified for gendered pronoun coreference resolution. The rest are gender-unspecified.*
221
+ 4) Click button to get Task Specification Metric results!
222
+ """)
223
+
224
+
225
+
 
 
 
226
 
227
  with gr.Row():
228
  model_name = gr.Radio(
229
  MODEL_NAMES,
230
+ type="value",
231
+ label="1) Pick a preloaded BERT-like model (note: RoBERTa-large performance is best).",
232
  )
233
  own_model_name = gr.Textbox(
234
  label=f"...Or, if you selected an '{OWN_MODEL_NAME}' model, put any Hugging Face pipeline model name \
 
245
  lines=2,
246
  label=f"...Or, if you selected '{PICK_YOUR_OWN_LABEL}' above, add your own texts new-line delimited sentences here. Be sure\
247
  to include a single MASK-ed out pronoun. \
248
+ If unsure on the required format, click an occupation above instead, to see some example input texts for this round."
249
  )
250
 
251
  with gr.Row():
252
+ get_text_btn = gr.Button("3) Click to load input texts.)")
253
 
254
  get_text_btn.click(
255
  fn=display_input_texts,
 
260
  )
261
 
262
  with gr.Row():
263
+ uncertain_btn = gr.Button("4) Click to get Task Specification Metric results!")
264
  gr.Markdown(
265
  "If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
266
 
267
  with gr.Row():
268
+ female_fig = gr.Plot(type="auto")
269
  with gr.Row():
270
  female_df = gr.Dataframe()
271
  with gr.Row():
272
+ display_text = gr.Textbox(
273
+ type="auto", label="Sample of text fed to model")
274
 
275
  uncertain_btn.click(
276
  fn=predict_gender_pronouns,
 
281
 
282
  demo.launch(debug=True)
283
 
284
+ # %%