DatasetCardForm / app.py
Sebastian Gehrmann
add formatting
8c3bdec
import json
import re
from datetime import datetime
import datacards
from datacards import (
considerations_page,
considerations_summary,
context_page,
context_summary,
curation_page,
curation_summary,
gem_page,
gem_summary,
overview_page,
overview_summary,
results_page,
results_summary,
)
import streamlit as st
##################
## streamlit
##################
st.set_page_config(
page_title="GEM Data Card Input Form",
page_icon="https://avatars.githubusercontent.com/u/72612128",
layout="wide",
initial_sidebar_state="auto",
)
page_description = """
# GEM Data Card Input Form
This application was designed for GEM.
It allows users to fill out all of the information going into the data documentation when submitting a new dataset.
Use the left sidebar to navigate:
- "**Dataset at a Glance**" shows selected information and tracks progress
- Each of the "**Section:**" pages opens a form for a specific section of the card
- Go to "**Review and Save**" when you are done to save your data card
Please use **markdown** formatting for each field.
"""
_N_FIELDS = datacards.considerations.N_FIELDS + \
datacards.context.N_FIELDS + \
datacards.curation.N_FIELDS + \
datacards.gem.N_FIELDS + \
datacards.overview.N_FIELDS + \
datacards.results.N_FIELDS
def main():
if "save_state" not in st.session_state:
st.session_state.save_state = {}
if "card_dict" not in st.session_state:
st.session_state.card_dict = {}
st.sidebar.markdown(page_description, unsafe_allow_html=True)
pages = {
"Dataset at a Glance": glance_page,
"Section: Dataset Overview": overview_page,
"Section: Dataset Curation": curation_page,
"Section: Dataset in GEM": gem_page,
"Section: Previous Results": results_page,
"Section: Considerations for Using Data": considerations_page,
"Section: Broader Social Context": context_page,
"Review and Save": review_page,
}
app_mode = st.sidebar.radio(
label="Navigation menu:",
options=list(pages.keys()),
index=0,
)
with st.sidebar.expander("Save or Load your work"):
# Save intermediary state
current_date = datetime.now().strftime(
"%m/%d/%Y, %H:%M:%S"
)
friendly_date = re.sub(
r"[^\w\s]", "_", current_date
).replace(" ", "_").replace("__", "_").replace("-", "")
st.download_button(
label="Save Intermediary Card State",
data=json.dumps(st.session_state.get("card_dict", {}), indent=2),
file_name=f"data_card_{friendly_date}.json",
)
# load from file
uploaded_file = st.file_uploader(
label=f"Load Intermediary Card State",
)
if uploaded_file is not None:
pre_card_dict = json.load(uploaded_file, encoding="utf-8")
def save_dict_to_save_state(saved, prefix=None):
prefix = [] if prefix is None else prefix
if isinstance(saved, dict):
for k, v in saved.items():
save_dict_to_save_state(v, prefix + [k])
else:
if not st.session_state.save_state.get("_".join(prefix) + "_filled", False):
st.session_state.save_state["_".join(prefix)] = saved
# TODO: don't overwrite by default
st.session_state.card_dict = pre_card_dict
save_dict_to_save_state(st.session_state.card_dict)
# TODO: delete file after use
st.markdown("#### GEM Data Card Input Form")
pages[app_mode]()
def glance_page():
with st.expander("Dataset at a Glance", expanded=True):
dataset_summary = ""
dataset_summary += f"- **Dataset Website**: {st.session_state.save_state.get('overview_where_website', '*Go to `Section: Dataset Overview` to fill in*')}\n"
dataset_summary += f"- **Dataset Contact**: {st.session_state.save_state.get('overview_where_contact-name', '*Go to `Section: Dataset Overview` to fill in*')}\n"
dataset_summary += f"- **Dataset License**: {st.session_state.save_state.get('overview_languages_license', '*Go to `Section: Dataset Overview` to fill in*')}\n"
dataset_summary += f"- **Multilingual Dataset**: {st.session_state.save_state.get('overview_languages_is-multilingual', '*Go to `Section: Dataset Overview` to fill in*')}\n"
dataset_summary += f"- **Dataset Languages**: {st.session_state.save_state.get('overview_languages_language-names', '*Go to `Section: Dataset Overview` to fill in*')}\n"
dataset_summary += f"- **Dataset Supported Task**: {st.session_state.save_state.get('overview_languages_task', '*Go to `Section: Dataset Overview` to fill in*')}\n"
dataset_summary += f"- **Communicative Goal**: {st.session_state.save_state.get('overview_languages_communicative', '*Go to `Section: Dataset Overview` to fill in*')}\n"
dataset_summary += f"- **Language Data Origin**: {st.session_state.save_state.get('curation_language_obtained', '*Go to `Section: Dataset Curation` to fill in*')}\n"
dataset_summary += f"- **Annotation Data Origin**: {st.session_state.save_state.get('curation_annotations_obtained', '*Go to `Section: Dataset Curation` to fill in*')}\n"
dataset_summary += f"- **Likelihood of PII**: {st.session_state.save_state.get('curation_pii_has-pii', '*Go to `Section: Dataset Curation` to fill in*')}\n"
st.markdown(dataset_summary + "---\n")
num_fields = sum([len(dct) for k in st.session_state.get("card_dict", {}) for dct in st.session_state.card_dict.get(k, {}).values()])
st.markdown(f"You have currently filled out **{num_fields} of {_N_FIELDS} required fields** in the data card.")
left_col, right_col = st.columns(2)
with left_col:
overview_summary()
curation_summary()
gem_summary()
with right_col:
results_summary()
considerations_summary()
context_summary()
def review_page():
dataset_name = st.text_input(
label="Enter dataset name here",
)
if dataset_name != "":
friendly_name = re.sub(
r"[^\w\s]", " ", dataset_name.lower()
).strip().replace(" ", "_")
current_date = datetime.now().strftime(
"%m/%d/%Y, %H:%M:%S"
)
friendly_date = re.sub(
r"[^\w\s]", "_", current_date
).replace(" ", "_").replace("__", "_").replace("-", "")
dataset_file_name = f"{friendly_name}-{friendly_date}.json"
st.download_button(
label=f"Download the Dataset Card below as {dataset_file_name}",
data=json.dumps(st.session_state.get("card_dict", {}), indent=2),
file_name=dataset_file_name,
)
else:
st.markdown("##### Enter a dataset name above to be able to download the card!")
st.markdown("---\n")
st.write(st.session_state.get("card_dict", {}))
# TODO add buttons to save and download
if __name__ == "__main__":
main()