Rafal commited on
Commit
2a7e3b8
1 Parent(s): f818770

Changed background in Accordions and per sentence progress in get sentences

Browse files
Files changed (4) hide show
  1. app.py +13 -13
  2. mgr_bias_scoring.py +1 -1
  3. mgr_requests.py +7 -2
  4. mgr_sentences.py +2 -1
app.py CHANGED
@@ -275,8 +275,6 @@ def _genSentenceCoverMsg(test_sentences, total_att_terms, bias_spec, isGen=False
275
  # missing pairs spec
276
  bt_mgr.genMissingPairsSpec(bias_spec, test_sentences_df)
277
 
278
-
279
-
280
  att1_missing_num = sum([v for k, v in att1_missing.items()])
281
  att2_missing_num = sum([v for k, v in att2_missing.items()])
282
  total_missing = att1_missing_num + att2_missing_num
@@ -287,9 +285,9 @@ def _genSentenceCoverMsg(test_sentences, total_att_terms, bias_spec, isGen=False
287
  source_msg = "Found" if isGen==False else "Generated"
288
  if num_covered_atts >= total_att_terms:
289
  if total_missing > 0:
290
- info_msg = f"**{source_msg} {len(test_sentences)} sentences covering all bias specification attributes, but some attributes are underepresented. Generating additional {total_missing} sentences is suggested.**"
291
  else:
292
- info_msg = f"**{source_msg} {len(test_sentences)} sentences covering all bias specification attributes. Please select model to test.**"
293
  else:
294
  info_msg = f"**{source_msg} {len(test_sentences)} sentences covering {num_covered_atts} of {total_att_terms} attributes. Please select model to test.**"
295
 
@@ -707,8 +705,7 @@ def useOnlineGen(value):
707
 
708
  def changeTerm(evt: gr.EventData):
709
  global G_CORE_BIAS_NAME
710
-
711
- print("Bias is custom now...")
712
 
713
  G_CORE_BIAS_NAME = None
714
 
@@ -805,7 +802,10 @@ css_adds = "#group_row {background: white; border-color: white;} \
805
  #filled:hover .tooltiptext_left {visibility: visible;} \
806
  #empty:hover .tooltiptext_left {visibility: visible;} \
807
  #filled:hover .tooltiptext_right {visibility: visible;} \
808
- #empty:hover .tooltiptext_right {visibility: visible;}"
 
 
 
809
 
810
  #'bethecloud/storj_theme'
811
  with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
@@ -917,7 +917,7 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
917
  gr.Markdown(" ")
918
 
919
  with gr.Row(visible=False) as row_sentences:
920
- with gr.Accordion(label="Test Sentences", open=False, visible=False) as acc_test_sentences:
921
  test_sentences = gr.DataFrame(
922
  headers=["Sentence", "Alternative Sentence", "Group term 1", "Group term 2", "Attribute term"],
923
  datatype=["str", "str", "str", "str", "str"],
@@ -950,8 +950,8 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
950
  model_bias_label = gr.Label(num_top_classes=1, label="% stereotyped choices (↑ more bias)",
951
  elem_id="res_label",
952
  show_label=False)
953
- with gr.Accordion("Additional Interpretation", open=False, visible=True):
954
- interpretation_msg = gr.HTML(value="Interpretation: Stereotype Score metric details in <a href='https://arxiv.org/abs/2004.09456'>Nadeem'20<a>", visible=False)
955
 
956
  lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (↑ more bias)")
957
  #gr.Markdown("**Legend**")
@@ -971,13 +971,13 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
971
  gr.Markdown("#### Attribute Group 2")
972
  attribute_bias_html_antistereo = gr.HTML()
973
 
974
- gr.HTML(value="Visualization inspired by <a href='https://www.bloomberg.com/graphics/2023-generative-ai-bias/' target='_blank'>Bloomberg article on bias in text-to-image models</a>.")
975
  save_msg = gr.HTML(value="<span style=\"color:black\">Bias test result saved! </span>",
976
  visible=False)
977
 
978
  with gr.Row():
979
  with gr.Column(scale=2):
980
- with gr.Accordion("Per Sentence Bias Results", open=False, visible=True):
981
  test_pairs = gr.DataFrame(
982
  headers=["group_term", "template", "att_term_1", "att_term_2","label_1","label_2"],
983
  datatype=["str", "str", "str", "str", "str", "str"],
@@ -1071,7 +1071,7 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
1071
  exp_button.click(export_csv,
1072
  inputs=[test_pairs, group1, group2, att1, att2],
1073
  outputs=[csv])
1074
-
1075
  # Changing any of the bias specification terms
1076
  group1.change(fn=changeTerm, inputs=[], outputs=[csv])
1077
  group2.change(fn=changeTerm, inputs=[], outputs=[csv])
 
275
  # missing pairs spec
276
  bt_mgr.genMissingPairsSpec(bias_spec, test_sentences_df)
277
 
 
 
278
  att1_missing_num = sum([v for k, v in att1_missing.items()])
279
  att2_missing_num = sum([v for k, v in att2_missing.items()])
280
  total_missing = att1_missing_num + att2_missing_num
 
285
  source_msg = "Found" if isGen==False else "Generated"
286
  if num_covered_atts >= total_att_terms:
287
  if total_missing > 0:
288
+ info_msg = f"**{source_msg} {len(test_sentences)} sentences covering all bias specification attributes, but some attributes are underepresented (see at the bottom). Generating additional {total_missing} sentences is suggested.**"
289
  else:
290
+ info_msg = f"**{source_msg} {len(test_sentences)} sentences covering all bias specification attributes (see at the bottom). Please select model to test.**"
291
  else:
292
  info_msg = f"**{source_msg} {len(test_sentences)} sentences covering {num_covered_atts} of {total_att_terms} attributes. Please select model to test.**"
293
 
 
705
 
706
  def changeTerm(evt: gr.EventData):
707
  global G_CORE_BIAS_NAME
708
+ #print("Bias is custom now...")
 
709
 
710
  G_CORE_BIAS_NAME = None
711
 
 
802
  #filled:hover .tooltiptext_left {visibility: visible;} \
803
  #empty:hover .tooltiptext_left {visibility: visible;} \
804
  #filled:hover .tooltiptext_right {visibility: visible;} \
805
+ #empty:hover .tooltiptext_right {visibility: visible;} \
806
+ #add_interpret {border: 1px solid #f1f5f9; background: #F8FAFC;} \
807
+ #per_sent_bias_accordion {border: 1px solid #f1f5f9; background: #F8FAFC;} \
808
+ #test_sentences_accordion {border: 1px solid #f1f5f9; background: #F8FAFC;}"
809
 
810
  #'bethecloud/storj_theme'
811
  with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
 
917
  gr.Markdown(" ")
918
 
919
  with gr.Row(visible=False) as row_sentences:
920
+ with gr.Accordion(label="Test Sentences", open=False, visible=False, elem_id="test_sentences_accordion") as acc_test_sentences:
921
  test_sentences = gr.DataFrame(
922
  headers=["Sentence", "Alternative Sentence", "Group term 1", "Group term 2", "Attribute term"],
923
  datatype=["str", "str", "str", "str", "str"],
 
950
  model_bias_label = gr.Label(num_top_classes=1, label="% stereotyped choices (↑ more bias)",
951
  elem_id="res_label",
952
  show_label=False)
953
+ with gr.Accordion("Additional Interpretation", open=False, visible=True, elem_id="add_interpret") as interpret_accordion:
954
+ interpretation_msg = gr.HTML(value="Interpretation: Stereotype Score metric details in <a href='https://arxiv.org/abs/2004.09456'>Nadeem et al. 2020<a>", visible=False)
955
 
956
  lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (↑ more bias)")
957
  #gr.Markdown("**Legend**")
 
971
  gr.Markdown("#### Attribute Group 2")
972
  attribute_bias_html_antistereo = gr.HTML()
973
 
974
+ gr.HTML(value="Visualization inspired by <a href='https://www.bloomberg.com/graphics/2023-generative-ai-bias/' target='_blank'>Bloomberg article on bias in text-to-image models</a>.<br />While we perform several checks, the tool still relies on ChatGPT generations, please examine the sentences for quality.")
975
  save_msg = gr.HTML(value="<span style=\"color:black\">Bias test result saved! </span>",
976
  visible=False)
977
 
978
  with gr.Row():
979
  with gr.Column(scale=2):
980
+ with gr.Accordion("Per Sentence Bias Results", open=False, visible=True, elem_id="per_sent_bias_accordion"):
981
  test_pairs = gr.DataFrame(
982
  headers=["group_term", "template", "att_term_1", "att_term_2","label_1","label_2"],
983
  datatype=["str", "str", "str", "str", "str", "str"],
 
1071
  exp_button.click(export_csv,
1072
  inputs=[test_pairs, group1, group2, att1, att2],
1073
  outputs=[csv])
1074
+
1075
  # Changing any of the bias specification terms
1076
  group1.change(fn=changeTerm, inputs=[], outputs=[csv])
1077
  group2.change(fn=changeTerm, inputs=[], outputs=[csv])
mgr_bias_scoring.py CHANGED
@@ -905,7 +905,7 @@ def _constructInterpretationMsg(bias_spec, num_sentences, model_name, bias_stats
905
  interpret_msg += "<div style=\"margin-top: 3px; margin-left: 3px\"><b>◼ </b>" + att1_msg + "<br /></div>"
906
  interpret_msg += "<div style=\"margin-top: 3px; margin-left: 3px; margin-bottom: 3px\"><b>◼ </b>" + att2_msg + "<br /></div>"
907
  interpret_msg += "Please examine the exact test sentences used below."
908
- interpret_msg += "<br />More details about Stereotype Score metric: <a href='https://arxiv.org/abs/2004.09456' target='_blank'>Nadeem'20<a>"
909
 
910
  return interpret_msg
911
 
 
905
  interpret_msg += "<div style=\"margin-top: 3px; margin-left: 3px\"><b>◼ </b>" + att1_msg + "<br /></div>"
906
  interpret_msg += "<div style=\"margin-top: 3px; margin-left: 3px; margin-bottom: 3px\"><b>◼ </b>" + att2_msg + "<br /></div>"
907
  interpret_msg += "Please examine the exact test sentences used below."
908
+ interpret_msg += "<br />More details about Stereotype Score metric: <a href='https://arxiv.org/abs/2004.09456' target='_blank'>Nadeem et al. 2020<a>"
909
 
910
  return interpret_msg
911
 
mgr_requests.py CHANGED
@@ -171,9 +171,14 @@ def _getSavedSentences(bias_spec, progress, use_paper_sentences):
171
  att_list.extend(att_list_nospace)
172
  att_list = list(set(att_list))
173
 
174
- progress(gi/len(g1+g2), desc=f"{g_term}")
175
 
176
- _, sentence_df, _ = smgr.getSavedSentences(g_term)
 
 
 
 
 
177
  # only take from paper & gpt3.5
178
  flt_gen_models = ["gpt-3.5","gpt-3.5-turbo","gpt-4"]
179
  print(f"Before filter: {sentence_df.shape[0]}")
 
171
  att_list.extend(att_list_nospace)
172
  att_list = list(set(att_list))
173
 
174
+ #progress(gi/len(g1+g2), desc=f"{g_term}")
175
 
176
+ _, sentence_df, _ = smgr.getSavedSentences(g_term)#, gi, len(g1+g2), progress)
177
+ if sentence_df.shape[0] > 0:
178
+ progress(gi/len(g1+g2), desc=f"{sentence_df['sentence'].tolist()[0]}")
179
+ else:
180
+ progress(gi/len(g1+g2), desc=f"{g_term}")
181
+
182
  # only take from paper & gpt3.5
183
  flt_gen_models = ["gpt-3.5","gpt-3.5-turbo","gpt-4"]
184
  print(f"Before filter: {sentence_df.shape[0]}")
mgr_sentences.py CHANGED
@@ -90,13 +90,14 @@ def get_sentence_csv(file_path: str):
90
 
91
  return df
92
 
93
- def getSavedSentences(grp):
94
  filename = f"{grp.replace(' ','-')}.csv"
95
  sentence_df = pd.DataFrame()
96
 
97
  try:
98
  text = f"Loading sentences: {filename}\n"
99
  sentence_df = get_sentence_csv(filename)
 
100
 
101
  except Exception as e:
102
  text = f"Error, no saved generations for {filename}"
 
90
 
91
  return df
92
 
93
+ def getSavedSentences(grp): #, gi, total_grp_len, progress):
94
  filename = f"{grp.replace(' ','-')}.csv"
95
  sentence_df = pd.DataFrame()
96
 
97
  try:
98
  text = f"Loading sentences: {filename}\n"
99
  sentence_df = get_sentence_csv(filename)
100
+ #progress(gi/total_grp_len, desc=f"{sentence_df[0]}")
101
 
102
  except Exception as e:
103
  text = f"Error, no saved generations for {filename}"