emilylearning commited on
Commit
a828a08
β€’
1 Parent(s): 25dd383

update explanations / comments

Browse files
Files changed (4) hide show
  1. README.md +6 -12
  2. app.py +24 -7
  3. spec_metric_result.png +0 -0
  4. winogender_sentences.py +1 -1
README.md CHANGED
@@ -1,19 +1,13 @@
1
  ---
2
- title: Llm Uncertainty
3
- emoji: πŸ‘€
4
- colorFrom: indigo
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 3.1.7
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- # Setup env:
13
- ```
14
- python3 -m venv venv_llm
15
- source venv_llm/bin/activate
16
- pip install -r requirements.txt
17
- ```
18
-
19
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Uncertainty
3
+ emoji: 🐠
4
+ colorFrom: pink
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 3.9
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
 
 
 
 
 
 
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -210,15 +210,30 @@ demo = gr.Blocks()
210
  with demo:
211
  input_texts = gr.Variable([])
212
  gr.Markdown("**Detect Task Specification at Inference-time.**")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
- gr.Markdown("**Follow the numbered steps below to test one of the pre-loaded options.** Once you get the hang of it, you can load a new model and/or provide your own input texts.")
215
  gr.Markdown(f"""1) Pick a preloaded BERT-like model.
216
  *Note: RoBERTa-large performance is best.*
217
- 2) Pick an Occupation type from the Winogender Schemas evaluation set.
218
  *Or select '{PICK_YOUR_OWN_LABEL}' (it need not be about an occupation).*
219
- 3) Click button to load input texts.
220
  *Read the sentences to determine which two are well-specified for gendered pronoun coreference resolution. The rest are gender-unspecified.*
221
- 4) Click button to get Task Specification Metric results!
222
  """)
223
 
224
 
@@ -249,7 +264,7 @@ with demo:
249
  )
250
 
251
  with gr.Row():
252
- get_text_btn = gr.Button("3) Click to load input texts.)")
253
 
254
  get_text_btn.click(
255
  fn=display_input_texts,
@@ -262,7 +277,9 @@ with demo:
262
  with gr.Row():
263
  uncertain_btn = gr.Button("4) Click to get Task Specification Metric results!")
264
  gr.Markdown(
265
- "If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.")
 
 
266
 
267
  with gr.Row():
268
  female_fig = gr.Plot(type="auto")
@@ -270,7 +287,7 @@ with demo:
270
  female_df = gr.Dataframe()
271
  with gr.Row():
272
  display_text = gr.Textbox(
273
- type="auto", label="Sample of text fed to model")
274
 
275
  uncertain_btn.click(
276
  fn=predict_gender_pronouns,
 
210
  with demo:
211
  input_texts = gr.Variable([])
212
  gr.Markdown("**Detect Task Specification at Inference-time.**")
213
+ gr.Markdown("""This method exploits the specification-induced spurious correlations demonstrated in this
214
+ [Spurious Correlations Hugging Face Space](https://huggingface.co/spaces/anonymousauthorsanonymous/spurious) to detect task specification at inference-time.
215
+ For this method, well-specified tasks should have a lower specification metric value, and unspecified tasks should have a higher specification metric value.
216
+ """)
217
+
218
+ gr.Markdown("""As an example, see the figure below with test sentences from the [Winogender schema](https://aclanthology.org/N18-2002/) for the occupation of `Doctor`.
219
+ With a close read, you can see that only sentence numbers (3) and (4) are well-specified for the gendered pronoun resolution task:
220
+ the masked pronoun is coreferent with the `man` or `woman`; the remainder are unspecfied: the masked pronoun is coreferent with a gender-unspecified person.
221
+
222
+ In this example we have 100\% accurate detection with the specification metric near zero for only sentence (3) and (4).
223
+ <p align="center">
224
+ <img src="file/spec_metric_result.png" alt="results" width="500"/>
225
+ </p>
226
+ """)
227
+
228
 
229
+ gr.Markdown("**To test this for yourself, follow the numbered steps below to test one of the pre-loaded options.** Once you get the hang of it, you can load a new model and/or provide your own input texts.")
230
  gr.Markdown(f"""1) Pick a preloaded BERT-like model.
231
  *Note: RoBERTa-large performance is best.*
232
+ 2) Pick an Occupation type from the Winogender Schemas evaluation set.
233
  *Or select '{PICK_YOUR_OWN_LABEL}' (it need not be about an occupation).*
234
+ 3) Click the first button to load input texts.
235
  *Read the sentences to determine which two are well-specified for gendered pronoun coreference resolution. The rest are gender-unspecified.*
236
+ 4) Click the second button to get Task Specification Metric results.
237
  """)
238
 
239
 
 
264
  )
265
 
266
  with gr.Row():
267
+ get_text_btn = gr.Button("3) Click to load input texts.")
268
 
269
  get_text_btn.click(
270
  fn=display_input_texts,
 
277
  with gr.Row():
278
  uncertain_btn = gr.Button("4) Click to get Task Specification Metric results!")
279
  gr.Markdown(
280
+ """We expect a lower specification metric value for well-specified tasks.
281
+
282
+ Note: If there is an * by a sentence number, then at least one top prediction for that sentence was non-gendered.""")
283
 
284
  with gr.Row():
285
  female_fig = gr.Plot(type="auto")
 
287
  female_df = gr.Dataframe()
288
  with gr.Row():
289
  display_text = gr.Textbox(
290
+ type="text", label="Sample of text fed to model")
291
 
292
  uncertain_btn.click(
293
  fn=predict_gender_pronouns,
spec_metric_result.png ADDED
winogender_sentences.py CHANGED
@@ -1,6 +1,6 @@
1
  ######################################################################
2
  ##
3
- ## This script is a lightly modifed version fo taht provided in winogender-schemas
4
  ## https://github.com/rudinger/winogender-schemas
5
  ##
6
  ######################################################################
 
1
  ######################################################################
2
  ##
3
+ ## This script is a lightly modified version of that provided in winogender-schemas
4
  ## https://github.com/rudinger/winogender-schemas
5
  ##
6
  ######################################################################