Spaces:

GEM
/

DatasetCardForm

Runtime error

File size: 6,283 Bytes

ac6c40f
 
bac5a97
 
 
 
 
 
 
ac6c40f
969e2c4
 
bac5a97
ac6c40f
969e2c4
57616af
ac6c40f
bac5a97
 
 
969e2c4
 
 
bac5a97
969e2c4
bac5a97
969e2c4
 
 
 
4081c39
 
969e2c4
 
 
8a3a160
969e2c4
 
 
 
 
 
 
 
 
 
 
 
 
 
4081c39
bac5a97
 
 
 
 
969e2c4
 
 
 
 
 
 
 
8a3a160
969e2c4
 
 
 
bac5a97
 
 
 
 
 
cc73923
 
 
 
 
 
4081c39
cc73923
 
 
 
 
 
 
 
4081c39
cc73923
 
 
4081c39
 
57616af
ac6c40f
bac5a97
 
 
 
3578aa2
bac5a97
 
 
969e2c4
bac5a97
ead9ac7
bac5a97

import streamlit as st

from .streamlit_utils import (
    make_multiselect,
    make_selectbox,
    make_text_area,
    make_text_input,
    make_radio,
)

N_FIELDS_PREVIOUS = 3
N_FIELDS_UNDERSERVED_COMMUNITIES = 2
N_FIELDS_BIASES= 3

N_FIELDS = N_FIELDS_PREVIOUS + N_FIELDS_UNDERSERVED_COMMUNITIES + N_FIELDS_BIASES

def context_page():
    st.session_state.card_dict["context"] = st.session_state.card_dict.get(
        "context", {}
    )
    with st.expander("Previous Work on the Social Impact of the Dataset", expanded=False):
        key_pref = ["context", "previous"]
        st.session_state.card_dict["context"]["previous"] = st.session_state.card_dict[
            "context"
        ].get("previous", {})

        make_radio(
            label="Are you aware of cases where models trained on the task featured in this dataset ore related tasks have been used in automated systems?",
            options=["no", "yes - related tasks", "yes - other datasets featuring the same task", "yes - models trained on this dataset"],
            key_list=key_pref + ["is-deployed"],
            help="",
        )
        if "yes" in st.session_state.card_dict["context"]["previous"]["is-deployed"]:
            make_text_area(
                label="Did any of these previous uses result in observations about the social impact of the systems? " + \
                "In particular, has there been work outlining the risks and limitations of the system? Provide links and descriptions here.",
                key_list=key_pref + ["described-risks"],
                help="",
            )
            if st.session_state.card_dict["context"]["previous"]["is-deployed"] == "yes - models trained on this dataset":
                make_text_area(
                    label="Have any changes been made to the dataset as a result of these observations?",
                    key_list=key_pref + ["changes-from-observation"],
                    help="",
                )
            else:
                st.session_state.card_dict["context"]["previous"]["changes-from-observation"] = "N/A"
        else:
            st.session_state.card_dict["context"]["previous"]["described-risks"] = "N/A"
            st.session_state.card_dict["context"]["previous"]["changes-from-observation"] = "N/A"

    with st.expander("Impact on Under-Served Communities", expanded=False):
        key_pref = ["context", "underserved"]
        st.session_state.card_dict["context"]["underserved"] = st.session_state.card_dict[
            "context"
        ].get("underserved", {})
        make_radio(
            label="Does this dataset address the needs of communities that are traditionally underserved in language technology, and particularly language generation technology?" + \
                "Communities may be underserved for exemple because their language, language variety, or social or geographical context is underepresented in NLP and NLG resources (datasets and models).",
            options=["no", "yes"],
            key_list=key_pref+["helps-underserved"],
        )
        if st.session_state.card_dict["context"]["underserved"]["helps-underserved"] == "yes":
            make_text_area(
                label="Describe how this dataset addresses the needs of underserved communities.",
                key_list=key_pref+["underserved-description"],
            )
        else:
            st.session_state.card_dict["context"]["underserved"]["underserved-description"] = "N/A"

    with st.expander("Discussion of Biases", expanded=False):
        key_pref = ["context", "biases"]
        st.session_state.card_dict["context"]["biases"] = st.session_state.card_dict[
            "context"
        ].get("biases", {})
        make_radio(
            label="Are there documented social biases in the dataset? " + \
                "Biases in this context are variations in the ways members of different social categories are represented that can have harmful downstream consequences for members of the more disadvantaged group.",
            options=["yes", "unsure", "no"],
            key_list=key_pref + ["has-biases"],
            help="For a more extensive definition of social biases, see [Language (Technology) is Power: A Critical Survey of “Bias” in NLP ](https://aclanthology.org/2020.acl-main.485.pdf)",
        )
        if st.session_state.card_dict["context"]["biases"]["has-biases"] == "yes":
            make_text_area(
                label="Provide links to and summaries of works analyzing these biases.",
                key_list=key_pref + ["bias-analyses"],
                help="The analyses can take the form of academic papers or news articles, or even blog posts.",
            )
        else:
            st.session_state.card_dict["context"]["biases"]["bias-analyses"] = "N/A"
        make_text_area(
            label="Does the distribution of language producers in the dataset accurately represent the full distribution of speakers of the language world-wide? If not, how does it differ?",
            key_list=key_pref + ["speaker-distibution"],
            help="For example, are most speakers in the dataset of a certain gender or located in a certain county?",
        )


def context_summary():
    total_filled = sum(
        [len(dct) for dct in st.session_state.card_dict.get("context", {}).values()]
    )
    with st.expander(
        f"Broader Social Context Completion - {total_filled} of {N_FIELDS}", expanded=False
    ):
        completion_markdown = ""
        completion_markdown += (
            f"- **Overall completion:**\n  - {total_filled} of {N_FIELDS} fields\n"
        )
        completion_markdown += f"- **Sub-section - Previous Work on the Social Impact of the Dataset:**\n  - {len(st.session_state.card_dict.get('context', {}).get('previous', {}))} of {N_FIELDS_PREVIOUS} fields\n"
        completion_markdown += f"- **Sub-section - Impact on Under-Served Communities:**\n  - {len(st.session_state.card_dict.get('context', {}).get('underserved', {}))} of {N_FIELDS_UNDERSERVED_COMMUNITIES} fields\n"
        completion_markdown += f"- **Sub-section - Discussion of Biases:**\n  - {len(st.session_state.card_dict.get('context', {}).get('biases', {}))} of {N_FIELDS_BIASES} fields\n"
        st.markdown(completion_markdown)