Sebastian Gehrmann commited on
Commit
13fd677
1 Parent(s): 396d1e7

considerations

Browse files
datacards/considerations.py CHANGED
@@ -1,13 +1,97 @@
1
  import streamlit as st
2
 
3
- from .streamlit_utils import make_text_input
 
 
 
 
 
 
4
 
5
- N_FIELDS = 1
 
 
 
 
6
 
7
 
8
  def considerations_page():
9
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  def considerations_summary():
13
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
 
3
+ from .streamlit_utils import (
4
+ make_multiselect,
5
+ make_selectbox,
6
+ make_text_area,
7
+ make_text_input,
8
+ make_radio,
9
+ )
10
 
11
+ N_FIELDS_PII = 3
12
+ N_FIELDS_LICENSES = 3
13
+ N_FIELDS_LIMITATIONS = 4
14
+
15
+ N_FIELDS = N_FIELDS_PII + N_FIELDS_LICENSES + N_FIELDS_LIMITATIONS
16
 
17
 
18
  def considerations_page():
19
+ st.session_state.card_dict["considerations"] = st.session_state.card_dict.get(
20
+ "considerations", {}
21
+ )
22
+ with st.expander("PII Risks and Liability", expanded=False):
23
+ key_pref = ["considerations", "pii"]
24
+ st.session_state.card_dict["considerations"]["pii"] = st.session_state.card_dict[
25
+ "considerations"
26
+ ].get("pii", {})
27
+
28
+ # TODO: cross-link this section with curation.
29
+
30
+ with st.expander("Licenses", expanded=False):
31
+ key_pref = ["considerations", "licenses"]
32
+ st.session_state.card_dict["considerations"]["licenses"] = st.session_state.card_dict[
33
+ "considerations"
34
+ ].get("licenses", {})
35
+
36
+ # TODO: cross-link the first question with overview.py.
37
+
38
+ make_text_input(
39
+ label="Can the dataset be used for research and/or commercial purposes?",
40
+ key_list=key_pref + ["data-restrictions"],
41
+ help="Describe any restrictions put on how the data can be used.",
42
+ )
43
+ make_radio(
44
+ label="Are thre restrictions on the underlying data?",
45
+ options=["Open", "Non-Commercial", "Copyrighted", "Other"],
46
+ key_list=key_pref + ["data-copyright"],
47
+ help="Are there restructions on the underlying data?",
48
+ )
49
+
50
+ with st.expander("Known limitations", expanded=False):
51
+ key_pref = ["considerations", "limitations"]
52
+ st.session_state.card_dict["considerations"]["limitations"] = st.session_state.card_dict[
53
+ "considerations"
54
+ ].get("limitations", {})
55
+
56
+ # TODO: Form proper language
57
+
58
+ make_text_area(
59
+ label="Technical limitations, annotation noise, etc.",
60
+ key_list=key_pref + ["data-technical-limitations"],
61
+ help="",
62
+ )
63
+
64
+ make_text_area(
65
+ label="Particularly unsuited for applications",
66
+ key_list=key_pref + ["data-unsuited-applications"],
67
+ help="",
68
+ )
69
+
70
+ make_text_area(
71
+ label="What are discouraged use cases of the dataset?",
72
+ key_list=key_pref + ["data-discouraged-use"],
73
+ help="",
74
+ )
75
+
76
+ make_text_area(
77
+ label="Citation of work identifying these limitations",
78
+ key_list=key_pref + ["data-citations-limitations"],
79
+ help="",
80
+ )
81
 
82
 
83
  def considerations_summary():
84
+ total_filled = sum(
85
+ [len(dct) for dct in st.session_state.card_dict.get("considerations", {}).values()]
86
+ )
87
+ with st.expander(
88
+ f"Dataset Overview Completion - {total_filled} of {N_FIELDS}", expanded=False
89
+ ):
90
+ completion_markdown = ""
91
+ completion_markdown += (
92
+ f"- **Overall competion:**\n - {total_filled} of {N_FIELDS} fields\n"
93
+ )
94
+ completion_markdown += f"- **Sub-section - PII Risks and Liability:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('pii', {}))} of {N_FIELDS_PII} fields\n"
95
+ completion_markdown += f"- **Sub-section - Licenses:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('licenses', {}))} of {N_FIELDS_LICENSES} fields\n"
96
+ completion_markdown += f"- **Sub-section - Known limitations:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('limitations', {}))} of {N_FIELDS_LIMITATIONS} fields\n"
97
+ st.markdown(completion_markdown)
datacards/curation.py CHANGED
@@ -72,17 +72,17 @@ def curation_page():
72
  make_multiselect(
73
  label="How was the language data obtained?",
74
  options=[
75
- "found",
76
- "created for the dataset",
77
- "crowdsourced",
78
- "machine-generated",
79
- "other",
80
  ],
81
  key_list=key_pref + ["obtained"],
82
  )
83
  make_multiselect(
84
  label="If found, where from?",
85
- options=["website", "offline media collection", "other", "N/A"],
86
  key_list=key_pref + ["found"],
87
  help="select N/A if none of the language data was found",
88
  )
@@ -90,9 +90,9 @@ def curation_page():
90
  label="If crowdsourced, where from?",
91
  options=[
92
  "Amazon Mechanical Turk",
93
- "other crowdworker platform",
94
- "participatory experiment",
95
- "other",
96
  "N/A",
97
  ],
98
  key_list=key_pref + ["crowdsourced"],
 
72
  make_multiselect(
73
  label="How was the language data obtained?",
74
  options=[
75
+ "Found",
76
+ "Created for the dataset",
77
+ "Crowdsourced",
78
+ "Machine-generated",
79
+ "Other",
80
  ],
81
  key_list=key_pref + ["obtained"],
82
  )
83
  make_multiselect(
84
  label="If found, where from?",
85
+ options=["Multiple websites", "Single website", "Offline media collection", "Other", "N/A"],
86
  key_list=key_pref + ["found"],
87
  help="select N/A if none of the language data was found",
88
  )
 
90
  label="If crowdsourced, where from?",
91
  options=[
92
  "Amazon Mechanical Turk",
93
+ "Other crowdworker platform",
94
+ "Participatory experiment",
95
+ "Other",
96
  "N/A",
97
  ],
98
  key_list=key_pref + ["crowdsourced"],
datacards/overview.py CHANGED
@@ -167,9 +167,9 @@ def overview_page():
167
  )
168
  with st.expander("Structure", expanded=False):
169
  key_pref = ["overview", "structure"]
170
- st.session_state.card_dict["overview"][
171
- "structure"
172
- ] = st.session_state.card_dict.get("structure", {})
173
  data_fields_help = """
174
  [free text; paragraphs]
175
  - Mention their data type, and whether and how they are used as part of the generation pipeline.
 
167
  )
168
  with st.expander("Structure", expanded=False):
169
  key_pref = ["overview", "structure"]
170
+ st.session_state.card_dict["overview"]["structure"] = st.session_state.card_dict[
171
+ "overview"
172
+ ].get("structure", {})
173
  data_fields_help = """
174
  [free text; paragraphs]
175
  - Mention their data type, and whether and how they are used as part of the generation pipeline.