import gradio as gr ABSTRACT_TEXT = "Machine learning (ML) approaches have demonstrated promising results in a wide range of healthcare applications. Data plays a crucial role in developing ML-based healthcare systems that directly affect people's lives. Many of the ethical issues surrounding the use of ML in healthcare stem from structural inequalities underlying the way we collect, use, and handle data. Developing guidelines to improve documentation practices regarding the creation, use, and maintenance of ML healthcare datasets is therefore of critical importance. In this work, we introduce Healthsheet, a contextualized adaptation of the original datasheet questionnaire ~\cite{gebru2018datasheets} for health-specific applications. Through a series of semi-structured interviews, we adapt the datasheets for healthcare data documentation. As part of the Healthsheet development process and to understand the obstacles researchers face in creating datasheets, we worked with three publicly-available healthcare datasets as our case studies, each with different types of structured data: Electronic health Records (EHR), clinical trial study data, and smartphone-based performance outcome measures. Our findings from the interviewee study and case studies show 1) that datasheets should be contextualized for healthcare, 2) that despite incentives to adopt accountability practices such as datasheets, there is a lack of consistency in the broader use of these practices 3) how the ML for health community views datasheets and particularly \textit{Healthsheets} as diagnostic tool to surface the limitations and strength of datasets and 4) the relative importance of different fields in the datasheet to healthcare concerns." CITATION_BUTTON_TEXT = """@article{2022, title={Healthsheet: Development of a Transparency Artifact for Health Datasets}, url={http://dx.doi.org/10.1145/3531146.3533239}, DOI={10.1145/3531146.3533239}, journal={2022 ACM Conference on Fairness, Accountability, and Transparency}, publisher={ACM}, author={Rostamzadeh, Negar and Mincu, Diana and Roy, Subhrajit and Smart, Andrew and Wilcox, Lauren and Pushkarna, Mahima and Schrouff, Jessica and Amironesei, Razvan and Moorosi, Nyalleng and Heller, Katherine}, year={2022}, month={Jun} } """ with gr.Blocks() as demo: gr.HTML("

Healthsheet Creator! 🪄📄✨

") gr.HTML('

Create a healthsheet based on Rostamzadeh et al. (2022) "Healthsheet: Development of a Transparency Artifact for Health Datasets"

') with gr.Row(): with gr.Accordion("📝 Abstract", open=False): abstract_button = gr.Textbox( value = ABSTRACT_TEXT, lines = 5, ) with gr.Row(): with gr.Accordion("📘 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, lines=9, elem_id="citation-button", ).style(show_copy_button=True) with gr.Row(): with gr.Column(): with gr.Tab("General Information"): gr.Markdown("If the answer to any of the questions in the questionnaire is N/A, please describe why the answer is N/A (e.g: data not being available).") summary = gr.Textbox(label='Provide a 2 sentence summary of this dataset.', lines=3) audit = gr.Textbox(label='Has the dataset been audited before? If yes, by whom and what are the results?', lines=3) with gr.Tab("Dataset Versioning"): gr.Markdown("Version: A dataset will be considered to have a new version if there are major differences from a previous release. Some examples are a change in the number of patients/participants, or an increase in the data modalities covered.") gr.Markdown("Sub-version: A sub-version tends to apply smaller scale changes to a given version. Some datasets in healthcare are released without labels and predefined tasks, or will be later labeled by researchers for specific tasks and problems, to form sub-versions of the dataset.") gr.Markdown("The following set of questions clarifies the information about the current (latest) version of the dataset. It is important to report the rationale for labeling the data in any of the versions and sub-versions that this datasheet addresses, funding resources, and motivations behind each released version of the dataset.") update = gr.Textbox(label='Does the dataset get released as static versions or is it dynamically updated? a. If static, how many versions of the dataset exist? b.If dynamic, how frequently is the dataset updated?') with gr.Column(): gr.Markdown("This is where your healthsheet will appear!") gr.Textbox("Healthsheet") if __name__ == "__main__": demo.launch()