DatasetCardForm / datacards /considerations.py
Yacine Jernite
contest
0ea4f6d
raw
history blame
4.29 kB
import streamlit as st
from .streamlit_utils import (
make_multiselect,
make_selectbox,
make_text_area,
make_text_input,
make_radio,
)
N_FIELDS_PII = 1
N_FIELDS_LICENSES = 3
N_FIELDS_LIMITATIONS = 4
N_FIELDS = N_FIELDS_PII + N_FIELDS_LICENSES + N_FIELDS_LIMITATIONS
def considerations_page():
st.session_state.card_dict["considerations"] = st.session_state.card_dict.get(
"considerations", {}
)
with st.expander("PII Risks and Liability", expanded=False):
key_pref = ["considerations", "pii"]
st.session_state.card_dict["considerations"]["pii"] = st.session_state.card_dict[
"considerations"
].get("pii", {})
make_text_area(
label="Considering your answers to the PII part of the Data Curation Section, describe any potential privacy risks of using the data.",
key_list=key_pref+["risks-description"],
help="In terms for example of having models memorize private information of data subjects or other breaches of privacy."
)
with st.expander("Licenses", expanded=False):
key_pref = ["considerations", "licenses"]
st.session_state.card_dict["considerations"]["licenses"] = st.session_state.card_dict[
"considerations"
].get("licenses", {})
make_multiselect(
label="Are there restrictions on the dataset use?",
options=[
"public domain",
"multiple licenses",
"copyright - all rights reserved",
"open license - commercial use allowed",
"research use only",
"non-commercial use only",
"do not distribute",
"other",
],
key_list=key_pref + ["data-restrictions"],
help="Does the license restrict how the dataset can be used?",
)
make_multiselect(
label="Are there restrictions on the underlying data?",
options=["Open", "Non-Commercial", "Copyrighted", "Other"],
key_list=key_pref + ["data-copyright"],
help="Are there restructions on the underlying data?",
)
with st.expander("Known limitations", expanded=False):
key_pref = ["considerations", "limitations"]
st.session_state.card_dict["considerations"]["limitations"] = st.session_state.card_dict[
"considerations"
].get("limitations", {})
# TODO: Form proper language
make_text_area(
label="Technical limitations, annotation noise, etc.",
key_list=key_pref + ["data-technical-limitations"],
help="",
)
make_text_area(
label="Particularly unsuited for applications",
key_list=key_pref + ["data-unsuited-applications"],
help="",
)
make_text_area(
label="What are discouraged use cases of the dataset?",
key_list=key_pref + ["data-discouraged-use"],
help="",
)
make_text_area(
label="Citation of work identifying these limitations",
key_list=key_pref + ["data-citations-limitations"],
help="",
)
def considerations_summary():
total_filled = sum(
[len(dct) for dct in st.session_state.card_dict.get("considerations", {}).values()]
)
with st.expander(
f"Considerations for Using Data Completion - {total_filled} of {N_FIELDS}", expanded=False
):
completion_markdown = ""
completion_markdown += (
f"- **Overall competion:**\n - {total_filled} of {N_FIELDS} fields\n"
)
completion_markdown += f"- **Sub-section - PII Risks and Liability:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('pii', {}))} of {N_FIELDS_PII} fields\n"
completion_markdown += f"- **Sub-section - Licenses:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('licenses', {}))} of {N_FIELDS_LICENSES} fields\n"
completion_markdown += f"- **Sub-section - Known limitations:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('limitations', {}))} of {N_FIELDS_LIMITATIONS} fields\n"
st.markdown(completion_markdown)