import streamlit as st from .streamlit_utils import ( make_multiselect, make_selectbox, make_text_area, make_text_input, make_radio, ) N_FIELDS_PREVIOUS = 3 N_FIELDS_UNDERSERVED_COMMUNITIES = 2 N_FIELDS_BIASES= 3 N_FIELDS = N_FIELDS_PREVIOUS + N_FIELDS_UNDERSERVED_COMMUNITIES + N_FIELDS_BIASES def context_page(): st.session_state.card_dict["context"] = st.session_state.card_dict.get( "context", {} ) with st.expander("Previous Work on the Social Impact of the Dataset", expanded=False): key_pref = ["context", "previous"] st.session_state.card_dict["context"]["previous"] = st.session_state.card_dict[ "context" ].get("previous", {}) make_radio( label="Are you aware of cases where models trained on the task featured in this dataset ore related tasks have been used in automated systems?", options=["no", "yes - related tasks", "yes - other datasets featuring the same task", "yes - models trained on this dataset"], key_list=key_pref + ["is-deployed"], help="", ) if "yes" in st.session_state.card_dict["context"]["previous"]["is-deployed"]: make_text_area( label="Did any of these previous uses result in observations about the social impact of the systems? " + \ "In particular, has there been work outlining the risks and limitations of the system? Provide links and descriptions here.", key_list=key_pref + ["described-risks"], help="", ) if st.session_state.card_dict["context"]["previous"]["is-deployed"] == "yes - models trained on this dataset": make_text_area( label="Have any changes been made to the dataset as a result of these observations?", key_list=key_pref + ["changes-from-observation"], help="", ) else: st.session_state.card_dict["context"]["previous"]["changes-from-observation"] = "N/A" else: st.session_state.card_dict["context"]["previous"]["described-risks"] = "N/A" st.session_state.card_dict["context"]["previous"]["changes-from-observation"] = "N/A" with st.expander("Impact on Under-Served Communities", expanded=False): key_pref = ["context", "underserved"] st.session_state.card_dict["context"]["underserved"] = st.session_state.card_dict[ "context" ].get("underserved", {}) make_radio( label="Does this dataset address the needs of communities that are traditionally underserved in language technology, and particularly language generation technology?" + \ "Communities may be underserved for exemple because their language, language variety, or social or geographical context is underepresented in NLP and NLG resources (datasets and models).", options=["no", "yes"], key_list=key_pref+["helps-underserved"], ) if st.session_state.card_dict["context"]["underserved"]["helps-underserved"] == "yes": make_text_area( label="Describe how this dataset addresses the needs of underserved communities.", key_list=key_pref+["underserved-description"], ) else: st.session_state.card_dict["context"]["underserved"]["underserved-description"] = "N/A" with st.expander("Discussion of Biases", expanded=False): key_pref = ["context", "biases"] st.session_state.card_dict["context"]["biases"] = st.session_state.card_dict[ "context" ].get("biases", {}) make_radio( label="Are there documented social biases in the dataset? " + \ "Biases in this context are variations in the ways members of different social categories are represented that can have harmful downstream consequences for members of the more disadvantaged group.", options=["yes", "unsure", "no"], key_list=key_pref + ["has-biases"], help="For a more extensive definition of social biases, see [Language (Technology) is Power: A Critical Survey of “Bias” in NLP ](https://aclanthology.org/2020.acl-main.485.pdf)", ) if st.session_state.card_dict["context"]["biases"]["has-biases"] == "yes": make_text_area( label="Provide links to and summaries of works analyzing these biases.", key_list=key_pref + ["bias-analyses"], help="The analyses can take the form of academic papers or news articles, or even blog posts.", ) else: st.session_state.card_dict["context"]["biases"]["bias-analyses"] = "N/A" make_text_area( label="Does the distribution of language producers in the dataset accurately represent the full distribution of speakers of the language world-wide? If not, how does it differ?", key_list=key_pref + ["speaker-distibution"], help="For example, are most speakers in the dataset of a certain gender or located in a certain county?", ) def context_summary(): total_filled = sum( [len(dct) for dct in st.session_state.card_dict.get("context", {}).values()] ) with st.expander( f"Broader Social Context Completion - {total_filled} of {N_FIELDS}", expanded=False ): completion_markdown = "" completion_markdown += ( f"- **Overall completion:**\n - {total_filled} of {N_FIELDS} fields\n" ) completion_markdown += f"- **Sub-section - Previous Work on the Social Impact of the Dataset:**\n - {len(st.session_state.card_dict.get('context', {}).get('previous', {}))} of {N_FIELDS_PREVIOUS} fields\n" completion_markdown += f"- **Sub-section - Impact on Under-Served Communities:**\n - {len(st.session_state.card_dict.get('context', {}).get('underserved', {}))} of {N_FIELDS_UNDERSERVED_COMMUNITIES} fields\n" completion_markdown += f"- **Sub-section - Discussion of Biases:**\n - {len(st.session_state.card_dict.get('context', {}).get('biases', {}))} of {N_FIELDS_BIASES} fields\n" st.markdown(completion_markdown)