Yacine Jernite commited on
Commit
cc73923
1 Parent(s): 969e2c4

completed_v1

Browse files
Files changed (2) hide show
  1. datacards/context.py +17 -21
  2. datacards/curation.py +6 -1
datacards/context.py CHANGED
@@ -73,29 +73,25 @@ def context_page():
73
  st.session_state.card_dict["context"]["biases"] = st.session_state.card_dict[
74
  "context"
75
  ].get("biases", {})
76
-
77
- make_text_area(
78
- label="Are there documented biases in the data?",
79
- key_list=key_pref + [""],
80
- help="",
81
- )
82
-
83
- make_text_area(
84
- label="Link to analyses",
85
- key_list=key_pref + [""],
86
- help="",
87
- )
88
-
89
- make_text_area(
90
- label="How does the distribution of language producers differ from a base distribution?",
91
- key_list=key_pref + [""],
92
- help="",
93
  )
94
-
 
 
 
 
 
 
 
95
  make_text_area(
96
- label="Topic coverage?",
97
- key_list=key_pref + [""],
98
- help="",
99
  )
100
 
101
 
 
73
  st.session_state.card_dict["context"]["biases"] = st.session_state.card_dict[
74
  "context"
75
  ].get("biases", {})
76
+ make_radio(
77
+ label="Are there documented social biases in the dataset? " + \
78
+ "Biases in this context are variations in the ways members of different social categories are represented that can have harmful downstream consequences for members of the more disadvantaged group.",
79
+ options=["yes", "unsure", "no"],
80
+ key_list=key_pref + ["has-biases"],
81
+ help="For a more extensive definition of social biases, see [Language (Technology) is Power: A Critical Survey of “Bias” in NLP ](https://aclanthology.org/2020.acl-main.485.pdf)",
 
 
 
 
 
 
 
 
 
 
 
82
  )
83
+ if st.session_state.card_dict["context"]["biases"]["has-biases"] == "yes":
84
+ make_text_area(
85
+ label="Provide links to and summaries of works analyzing these biases.",
86
+ key_list=key_pref + ["bias-analyses"],
87
+ help="The analyses can take the form of academic papers or news articles, or even blog posts.",
88
+ )
89
+ else:
90
+ st.session_state.card_dict["context"]["biases"]["bias-analyses"] = "N/A"
91
  make_text_area(
92
+ label="Does the distribution of language producers in the dataset accurately represent the full distribution of speakers of the language world-wide? If not, how does it differ?",
93
+ key_list=key_pref + ["speaker-distibution"],
94
+ help="For example, are most speakers in the dataset of a certain gender or located in a certain county?",
95
  )
96
 
97
 
datacards/curation.py CHANGED
@@ -11,7 +11,7 @@ from .streamlit_utils import (
11
  )
12
 
13
  N_FIELDS_ORIGINAL = 4
14
- N_FIELDS_LANGUAGE = 10
15
  N_FIELDS_ANNOTATIONS = 10
16
  N_FIELDS_CONSENT = 4
17
  N_FIELDS_PII = 7
@@ -120,6 +120,11 @@ def curation_page():
120
  key_list=key_pref + ["producers-description"],
121
  help="Provide a description of the context in which the language was produced and who produced it.",
122
  )
 
 
 
 
 
123
  make_selectbox(
124
  label="Was the text validated by a different worker or a data curator?",
125
  options=[
 
11
  )
12
 
13
  N_FIELDS_ORIGINAL = 4
14
+ N_FIELDS_LANGUAGE = 11
15
  N_FIELDS_ANNOTATIONS = 10
16
  N_FIELDS_CONSENT = 4
17
  N_FIELDS_PII = 7
 
120
  key_list=key_pref + ["producers-description"],
121
  help="Provide a description of the context in which the language was produced and who produced it.",
122
  )
123
+ make_text_area(
124
+ label="Does the language in the dataset focus on specific topics? How would you describe them?",
125
+ key_list=key_pref + ["topics"],
126
+ help="for example, tourism, entertainment, etc.",
127
+ )
128
  make_selectbox(
129
  label="Was the text validated by a different worker or a data curator?",
130
  options=[