import json import re from datetime import datetime import datacards from datacards import ( considerations_page, considerations_summary, context_page, context_summary, curation_page, curation_summary, gem_page, gem_summary, overview_page, overview_summary, results_page, results_summary, ) import streamlit as st ################## ## streamlit ################## st.set_page_config( page_title="GEM Data Card Input Form", page_icon="https://avatars.githubusercontent.com/u/72612128", layout="wide", initial_sidebar_state="auto", ) page_description = """ # GEM Data Card Input Form This application was designed for GEM. It allows users to fill out all of the information going into the data documentation when submitting a new dataset. Use the left sidebar to navigate: - "**Dataset at a Glance**" shows selected information and tracks progress - Each of the "**Section:**" pages opens a form for a specific section of the card - Go to "**Review and Save**" when you are done to save your data card Please use **markdown** formatting for each field. """ _N_FIELDS = datacards.considerations.N_FIELDS + \ datacards.context.N_FIELDS + \ datacards.curation.N_FIELDS + \ datacards.gem.N_FIELDS + \ datacards.overview.N_FIELDS + \ datacards.results.N_FIELDS def main(): if "save_state" not in st.session_state: st.session_state.save_state = {} if "card_dict" not in st.session_state: st.session_state.card_dict = {} st.sidebar.markdown(page_description, unsafe_allow_html=True) pages = { "Dataset at a Glance": glance_page, "Section: Dataset Overview": overview_page, "Section: Dataset Curation": curation_page, "Section: Dataset in GEM": gem_page, "Section: Previous Results": results_page, "Section: Considerations for Using Data": considerations_page, "Section: Broader Social Context": context_page, "Review and Save": review_page, } app_mode = st.sidebar.radio( label="Navigation menu:", options=list(pages.keys()), index=0, ) with st.sidebar.expander("Save or Load your work"): # Save intermediary state current_date = datetime.now().strftime( "%m/%d/%Y, %H:%M:%S" ) friendly_date = re.sub( r"[^\w\s]", "_", current_date ).replace(" ", "_").replace("__", "_").replace("-", "") st.download_button( label="Save Intermediary Card State", data=json.dumps(st.session_state.get("card_dict", {}), indent=2), file_name=f"data_card_{friendly_date}.json", ) # load from file uploaded_file = st.file_uploader( label=f"Load Intermediary Card State", ) if uploaded_file is not None: pre_card_dict = json.load(uploaded_file, encoding="utf-8") def save_dict_to_save_state(saved, prefix=None): prefix = [] if prefix is None else prefix if isinstance(saved, dict): for k, v in saved.items(): save_dict_to_save_state(v, prefix + [k]) else: if not st.session_state.save_state.get("_".join(prefix) + "_filled", False): st.session_state.save_state["_".join(prefix)] = saved # TODO: don't overwrite by default st.session_state.card_dict = pre_card_dict save_dict_to_save_state(st.session_state.card_dict) # TODO: delete file after use st.markdown("#### GEM Data Card Input Form") pages[app_mode]() def glance_page(): with st.expander("Dataset at a Glance", expanded=True): dataset_summary = "" dataset_summary += f"- **Dataset Website**: {st.session_state.save_state.get('overview_where_website', '*Go to `Section: Dataset Overview` to fill in*')}\n" dataset_summary += f"- **Dataset Contact**: {st.session_state.save_state.get('overview_where_contact-name', '*Go to `Section: Dataset Overview` to fill in*')}\n" dataset_summary += f"- **Dataset License**: {st.session_state.save_state.get('overview_languages_license', '*Go to `Section: Dataset Overview` to fill in*')}\n" dataset_summary += f"- **Multilingual Dataset**: {st.session_state.save_state.get('overview_languages_is-multilingual', '*Go to `Section: Dataset Overview` to fill in*')}\n" dataset_summary += f"- **Dataset Languages**: {st.session_state.save_state.get('overview_languages_language-names', '*Go to `Section: Dataset Overview` to fill in*')}\n" dataset_summary += f"- **Dataset Supported Task**: {st.session_state.save_state.get('overview_languages_task', '*Go to `Section: Dataset Overview` to fill in*')}\n" dataset_summary += f"- **Communicative Goal**: {st.session_state.save_state.get('overview_languages_communicative', '*Go to `Section: Dataset Overview` to fill in*')}\n" dataset_summary += f"- **Language Data Origin**: {st.session_state.save_state.get('curation_language_obtained', '*Go to `Section: Dataset Curation` to fill in*')}\n" dataset_summary += f"- **Annotation Data Origin**: {st.session_state.save_state.get('curation_annotations_obtained', '*Go to `Section: Dataset Curation` to fill in*')}\n" dataset_summary += f"- **Likelihood of PII**: {st.session_state.save_state.get('curation_pii_has-pii', '*Go to `Section: Dataset Curation` to fill in*')}\n" st.markdown(dataset_summary + "---\n") num_fields = sum([len(dct) for k in st.session_state.get("card_dict", {}) for dct in st.session_state.card_dict.get(k, {}).values()]) st.markdown(f"You have currently filled out **{num_fields} of {_N_FIELDS} required fields** in the data card.") left_col, right_col = st.columns(2) with left_col: overview_summary() curation_summary() gem_summary() with right_col: results_summary() considerations_summary() context_summary() def review_page(): dataset_name = st.text_input( label="Enter dataset name here", ) if dataset_name != "": friendly_name = re.sub( r"[^\w\s]", " ", dataset_name.lower() ).strip().replace(" ", "_") current_date = datetime.now().strftime( "%m/%d/%Y, %H:%M:%S" ) friendly_date = re.sub( r"[^\w\s]", "_", current_date ).replace(" ", "_").replace("__", "_").replace("-", "") dataset_file_name = f"{friendly_name}-{friendly_date}.json" st.download_button( label=f"Download the Dataset Card below as {dataset_file_name}", data=json.dumps(st.session_state.get("card_dict", {}), indent=2), file_name=dataset_file_name, ) else: st.markdown("##### Enter a dataset name above to be able to download the card!") st.markdown("---\n") st.write(st.session_state.get("card_dict", {})) # TODO add buttons to save and download if __name__ == "__main__": main()