Spaces:

GEM
/

DatasetCardForm

Runtime error

Sebastian Gehrmann

Add formatting lookup for prompts

9999db9 over 2 years ago

No virus

7.06 kB

	import streamlit as st

	from .streamlit_utils import make_text_input

	from .streamlit_utils import (
	make_multiselect,
	make_text_area,
	make_radio,
	)

	N_FIELDS_RATIONALE = 5
	N_FIELDS_CURATION = 6
	N_FIELDS_STARTING = 2

	N_FIELDS = N_FIELDS_RATIONALE + N_FIELDS_CURATION + N_FIELDS_STARTING


	def gem_page():
	st.session_state.card_dict["gem"] = st.session_state.card_dict.get("gem", {})
	with st.expander("Rationale", expanded=False):
	key_pref = ["gem", "rationale"]
	st.session_state.card_dict["gem"]["rationale"] = st.session_state.card_dict[
	"gem"
	].get("rationale", {})
	make_text_area(
	label="What does this dataset contribute toward better generation evaluation and why is it part of GEM?",
	key_list=key_pref + ["contribution"],
	help="Describe briefly what makes this dataset an interesting target for NLG evaluations and why it is part of GEM",
	)
	make_radio(
	label="Do other datasets for the high level task exist?",
	options=["no", "yes"],
	key_list=key_pref + ["sole-task-dataset"],
	help="for example, is this the only summarization dataset proposed in GEM",
	)
	if "yes" in st.session_state.card_dict["gem"]["rationale"].get("sole-task-dataset", []):
	make_radio(
	label="Does this dataset cover other languages than other datasets for the same task?",
	options=["no", "yes"],
	key_list=key_pref + ["sole-language-task-dataset"],
	help="for example, is this the only summarization dataset proposed in GEM to have French text?",
	)
	make_text_area(
	label="What else sets this dataset apart from other similar datasets in GEM?",
	key_list=key_pref + ["distinction-description"],
	help="Describe briefly for each similar dataset (same task/languages) what sets this one apart",
	)
	else:
	st.session_state.card_dict["gem"]["rationale"]["sole-language-task-dataset"] = "N/A"
	st.session_state.card_dict["gem"]["rationale"]["distinction-description"] = "N/A"

	make_text_area(
	label="What aspect of model ability can be measured with this dataset?",
	key_list=key_pref + ["model-ability"],
	help="What kind of abilities should a model exhibit that performs well on the task of this dataset (e.g., reasoning capability, morphological inflection)?",
	)

	with st.expander("GEM Additional Curation", expanded=False):
	key_pref = ["gem", "curation"]
	st.session_state.card_dict["gem"]["curation"] = st.session_state.card_dict[
	"gem"
	].get("curation", {})
	make_radio(
	label="Has the GEM version of the dataset been modified in any way (data, processing, splits) from the original curated data?",
	options=["no", "yes"],
	key_list=key_pref+["has-additional-curation"],
	)
	if st.session_state.card_dict["gem"]["curation"]["has-additional-curation"] == "yes":
	make_multiselect(
	label="What changes have been made to he original dataset?",
	options=["data points added", "data points removed", "data points modified", "annotations added", "other"],
	key_list=key_pref+["modification-types"],
	)
	make_text_area(
	label="For each of these changes, described them in more details and provided the intended purpose of the modification",
	key_list=key_pref+["modification-description"],
	)
	make_radio(
	label="Does GEM provide additional splits to the dataset?",
	options=["no", "yes"],
	key_list=key_pref+["has-additional-splits"],
	)
	if st.session_state.card_dict["gem"]["curation"]["has-additional-splits"] == "yes":
	make_text_area(
	label="Describe how the new splits were created",
	key_list=key_pref+["additional-splits-description"],
	)
	make_text_area(
	label="What aspects of the model's generation capacities were the splits created to test?",
	key_list=key_pref+["additional-splits-capacicites"],
	)
	else:
	st.session_state.card_dict["gem"]["curation"]["additional-splits-description"] = "N/A"
	st.session_state.card_dict["gem"]["curation"]["additional-splits-capacicites"] = "N/A"
	else:
	st.session_state.card_dict["gem"]["curation"]["modification-types"] = []
	st.session_state.card_dict["gem"]["curation"]["modification-description"] = "N/A"
	st.session_state.card_dict["gem"]["curation"]["has-additional-splits"] = "no"
	st.session_state.card_dict["gem"]["curation"]["additional-splits-description"] = "N/A"
	st.session_state.card_dict["gem"]["curation"]["additional-splits-capacicites"] = "N/A"

	with st.expander("Getting Started", expanded=False):
	key_pref = ["gem", "starting"]
	st.session_state.card_dict["gem"]["starting"] = st.session_state.card_dict[
	"gem"
	].get("starting", {})
	make_text_area(
	label="Getting started with in-depth research on the task. Add relevant pointers to resources that researchers can consult when they want to get started digging deeper into the task.",
	key_list=key_pref + ["research-pointers"],
	help=" These can include blog posts, research papers, literature surveys, etc. You can also link to tutorials on the GEM website.",
	)
	make_text_area(
	label="Technical terms used in this card and the dataset and their definitions",
	key_list=key_pref + ["technical-terms"],
	help="Provide a brief definition of technical terms that are unique to this dataset",
	)



	def gem_summary():
	total_filled = sum(
	[len(dct) for dct in st.session_state.card_dict.get("gem", {}).values()]
	)
	with st.expander(
	f"Dataset in GEM Completion - {total_filled} of {N_FIELDS}", expanded=False
	):
	completion_markdown = ""
	completion_markdown += (
	f"- Overall completion:\n - {total_filled} of {N_FIELDS} fields\n"
	)
	completion_markdown += f"- Sub-section - Rationale:\n - {len(st.session_state.card_dict.get('gem', {}).get('rationale', {}))} of {N_FIELDS_RATIONALE} fields\n"
	completion_markdown += f"- Sub-section - GEM Additional Curation:\n - {len(st.session_state.card_dict.get('gem', {}).get('curation', {}))} of {N_FIELDS_CURATION} fields\n"
	completion_markdown += f"- Sub-section - Getting Started:\n - {len(st.session_state.card_dict.get('gem', {}).get('starting', {}))} of {N_FIELDS_STARTING} fields\n"
	st.markdown(completion_markdown)