Spaces:

nateraw
/

modelcard-creator

Runtime error

App Files Files Community

yourusername commited on Jun 13, 2022

Commit

4e3cf4d

•

1 Parent(s): 5a73d89

:sparkles: update app to include new features

Browse files

Files changed (7) hide show

1_📝_form.py +83 -0
about.md +4 -0
app.py +0 -147
pages/2_👀_view_card.py +99 -0
pages/3_❓_about.py +10 -0
persist.py +26 -0
template.md +112 -0

1_📝_form.py ADDED Viewed

	@@ -0,0 +1,83 @@

+from yaml import load
+from persist import persist, load_widget_state
+import streamlit as st
+import requests
+import pandas as pd
+@st.cache
+def get_cached_data():
+    languages_df = pd.read_html("https://hf.co/languages")[0]
+    languages_map = pd.Series(languages_df["Language"].values, index=languages_df["ISO code"]).to_dict()
+    license_df = pd.read_html("https://huggingface.co/docs/hub/repositories-licenses")[0]
+    license_map = pd.Series(
+        license_df["License identifier (to use in model card)"].values, index=license_df.Fullname
+    ).to_dict()
+    available_metrics = [x['id'] for x in requests.get('https://huggingface.co/api/metrics').json()]
+    r = requests.get('https://huggingface.co/api/models-tags-by-type')
+    tags_data = r.json()
+    libraries = [x['id'] for x in tags_data['library']]
+    tasks = [x['id'] for x in tags_data['pipeline_tag']]
+    return languages_map, license_map, available_metrics, libraries, tasks
+def main():
+    if "model_name" not in st.session_state:
+        # Initialize session state.
+        st.session_state.update({
+            "model_name": "",
+            "languages": [],
+            "license": "",
+            "library_name": "",
+            "datasets": "",
+            "metrics": [],
+            "task": "",
+            "tags": "",
+            "model_description": "Some cool model...",
+            "authors": "",
+            "paper_url": "",
+            "github_url": "",
+            "bibtex_citations": "",
+            "emissions": "",
+        })
+    languages_map, license_map, available_metrics, libraries, tasks = get_cached_data()
+    st.header("Model Card Form")
+    warning_placeholder = st.empty()
+    st.text_input("Model Name", key=persist("model_name"))
+    st.text_area("Model Description", help="The model description provides basic details about the model. This includes the architecture, version, if it was introduced in a paper, if an original implementation is available, the author, and general information about the model. Any copyright should be attributed here. General information about training procedures, parameters, and important disclaimers can also be mentioned in this section.", key=persist('model_description'))
+    st.multiselect("Language(s)", list(languages_map), format_func=lambda x: languages_map[x], help="The language(s) associated with this model. If this is not a text-based model, you should specify whatever lanuage is used in the dataset. For instance, if the dataset's labels are in english, you should select English here.", key=persist("languages"))
+    st.selectbox("License", [""] + list(license_map.values()), help="The license associated with this model.", key=persist("license"))
+    st.selectbox("Library Name", [""] + libraries, help="The name of the library this model came from (Ex. pytorch, timm, spacy, keras, etc.). This is usually automatically detected in model repos, so it is not required.", key=persist('library_name'))
+    st.text_input("Datasets (comma separated)", help="The dataset(s) used to train this model. Use dataset id from https://hf.co/datasets.", key=persist("datasets"))
+    st.multiselect("Metrics", available_metrics, help="Metrics used in the training/evaluation of this model. Use metric id from https://hf.co/metrics.", key=persist("metrics"))
+    st.selectbox("Task", [""] + tasks, help="What task does this model aim to solve?", key=persist('task'))
+    st.text_input("Tags (comma separated)", help="Additional tags to add which will be filterable on https://hf.co/models. (Ex. image-classification, vision, resnet)", key=persist("tags"))
+    st.text_input("Author(s) (comma separated)", help="The authors who developed this model. If you trained this model, the author is you.", key=persist("authors"))
+    st.text_input("Related Research Paper", help="Research paper related to this model.", key=persist("paper_url"))
+    st.text_input("Related GitHub Repository", help="Link to a GitHub repository used in the development of this model", key=persist("github_url"))
+    st.text_area("Bibtex Citation", help="Bibtex citations for related work", key=persist("bibtex_citations"))
+    st.text_input("Carbon Emitted:", help="You can estimate carbon emissions using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700)", key=persist("emissions"))
+    languages=st.session_state.languages or None
+    license=st.session_state.license or None
+    # Handle any warnings...
+    do_warn = False
+    warning_msg = "Warning: The following fields are required but have not been filled in: "
+    if not languages:
+        warning_msg += "\n- Languages"
+        do_warn = True
+    if not license:
+        warning_msg += "\n- License"
+        do_warn = True
+    if do_warn:
+        warning_placeholder.error(warning_msg)
+if __name__ == '__main__':
+    load_widget_state()
+    main()

about.md ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ # About
2	+
3	+ We built this space to make it easier to create _good_ model cards :)
4	+

app.py DELETED Viewed

@@ -1,147 +0,0 @@
-import pandas as pd
-import requests
-import streamlit as st
-from modelcards import ModelCard
-@st.cache
-def get_cached_data():
-    languages_df = pd.read_html("https://hf.co/languages")[0]
-    languages_map = pd.Series(languages_df["Language"].values, index=languages_df["ISO code"]).to_dict()
-    license_df = pd.read_html("https://huggingface.co/docs/hub/repositories-licenses")[0]
-    license_map = pd.Series(
-        license_df["License identifier (to use in model card)"].values, index=license_df.Fullname
-    ).to_dict()
-    available_metrics = [x['id'] for x in requests.get('https://huggingface.co/api/metrics').json()]
-    return languages_map, license_map, available_metrics
-languages_map, license_map, available_metrics = get_cached_data()
-with st.sidebar:
-    st.markdown('''
-        <div align="center">
-            <h1>Model Card Creator</h1>
-        [![Github Badge](https://img.shields.io/github/stars/nateraw/modelcards?style=social)](https://github.com/nateraw/modelcards)
-        </div>
-    ''', unsafe_allow_html=True)
-    st.markdown("This app lets you generate model cards for your 🤗 Hub model repo!")
-    view = st.selectbox("View", ["Markdown", "Raw Text", "How to Programmatically Generate"])
-    warning_placeholder = st.empty()
-    placeholder = st.empty()
-    st.markdown('---')
-    model_name = st.text_input(
-        "Model Name", "my-cool-model", help="The name of your model. (Ex. my-cool-model, bert-base-uncased, etc.)"
-    )
-    languages = (
-        st.multiselect(
-            "Language",
-            languages_map.keys(),
-            format_func=lambda x: languages_map[x],
-            help="The language(s) associated with this model. If this is not a text-based model, you should specify whatever lanuage is used in the dataset. For instance, if the dataset's labels are in english, you should select English here.",
-        )
-        or None
-    )
-    license = st.selectbox("License", license_map.keys(), 33, help="The license associated with this model.")
-    library_name = (
-        st.text_input(
-            "Library Name", help="The name of the library this model came from (Ex. pytorch, timm, spacy, keras, etc.)"
-        )
-        or None
-    )
-    tags = [
-        x.strip()
-        for x in st.text_input(
-            "Tags (comma separated)",
-            help="Additional tags to add which will be filterable on https://hf.co/models. (Ex. image-classification, vision, resnet)",
-        ).split(',')
-        if x.strip()
-    ] or None
-    dataset = (
-        st.text_input(
-            "Dataset", help="The dataset used to train this model. Use dataset id from https://hf.co/datasets"
-        )
-        or None
-    )
-    metrics = (
-        st.multiselect(
-            "Metrics",
-            available_metrics,
-            help="Metrics used in the training/evaluation of this model. Use metric id from https://hf.co/metrics.",
-        )
-        or None
-    )
-    model_description = st.text_area(
-        "Model Description",
-        "Describe your model here...",
-        help="The model description provides basic details about the model. This includes the architecture, version, if it was introduced in a paper, if an original implementation is available, the author, and general information about the model. Any copyright should be attributed here. General information about training procedures, parameters, and important disclaimers can also be mentioned in this section.",
-    )
-    do_warn = False
-    warning_msg = "Warning: The following fields are required but have not been filled in: "
-    if not languages:
-        warning_msg += "\n- Languages"
-        do_warn = True
-    if not license:
-        warning_msg += "\n- License"
-        do_warn = True
-    if do_warn:
-        warning_placeholder.warning(warning_msg)
-    card = ModelCard.from_template(
-        language=languages,
-        license=license_map[license],
-        library_name=library_name,
-        tags=tags,
-        datasets=dataset,
-        metrics=metrics,
-        model_id=model_name,
-        model_description=model_description,
-    )
-    placeholder.download_button(
-        label="Download Model Card", data=str(card), file_name='README.md', mime='text/plain', disabled=do_warn
-    )
-markdown_text = f"""
-Card metadata...this should be at the beginning of your readme file.
-```
----
-{card.data.to_yaml()}
----
-```
-{card.text}
-"""
-creation_code = f'''
-# Make sure you have modelcards installed!
-# pip install modelcards==0.0.4
-from modelcards import ModelCard
-card = ModelCard.from_template(
-    language={languages},
-    license={"'" + license_map[license] + "'" if license else None},
-    library_name={"'" + library_name + "'" if library_name else None},
-    tags={tags},
-    datasets={"'" + dataset + "'" if dataset else None},
-    metrics={metrics},
-    model_id={"'" + model_name + "'" if model_name else None},
-    model_description={"'" + model_description + "'" if model_description else None},
-)
-'''
-if view == 'Raw Text':
-    st.text(str(card))
-elif view == "Markdown":
-    st.markdown(markdown_text)
-else:
-    st.code(creation_code)

pages/2_👀_view_card.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import streamlit as st
+from persist import persist, load_widget_state
+from modelcards import CardData, ModelCard
+from huggingface_hub import create_repo
+def is_float(value):
+    try:
+        float(value)
+        return True
+    except:
+        return False
+def get_card():
+    languages=st.session_state.languages or None
+    license=st.session_state.license or None
+    library_name = st.session_state.library_name or None
+    tags= [x.strip() for x in st.session_state.tags.split(',') if x.strip()]
+    tags.append("autogenerated-modelcard")
+    datasets= [x.strip() for x in st.session_state.datasets.split(',') if x.strip()] or None
+    metrics=st.session_state.metrics or None
+    model_name = st.session_state.model_name or None
+    model_description = st.session_state.model_description or None
+    authors = st.session_state.authors or None
+    paper_url = st.session_state.paper_url or None
+    github_url = st.session_state.github_url or None
+    bibtex_citations = st.session_state.bibtex_citations or None
+    emissions = float(st.session_state.emissions) if is_float(st.session_state.emissions) else None  # BUG
+    # Handle any warnings...
+    do_warn = False
+    warning_msg = "Warning: The following fields are required but have not been filled in: "
+    if not languages:
+        warning_msg += "\n- Languages"
+        do_warn = True
+    if not license:
+        warning_msg += "\n- License"
+        do_warn = True
+    if do_warn:
+        st.error(warning_msg)
+        st.stop()
+    # Generate and display card
+    card_data = CardData(
+        language=languages,
+        license=license,
+        library_name=library_name,
+        tags=tags,
+        datasets=datasets,
+        metrics=metrics,
+    )
+    if emissions:
+        card_data.co2_eq_emissions = {'emissions': emissions}
+    card = ModelCard.from_template(
+        card_data,
+        template_path='template.md',
+        model_id=model_name,
+        # Template kwargs:
+        model_description=model_description,
+        license=license,
+        authors=authors,
+        paper_url=paper_url,
+        github_url=github_url,
+        bibtex_citations=bibtex_citations,
+        emissions=emissions
+    )
+    return card
+def main():
+    card = get_card()
+    card.save('current_card.md')
+    view_raw = st.sidebar.checkbox("View Raw")
+    if view_raw:
+        st.text(card)
+    else:
+        st.markdown(card.text, unsafe_allow_html=True)
+    with st.sidebar:
+        with st.form("Upload to 🤗 Hub"):
+            st.markdown("Use a token with write access from [here](https://hf.co/settings/tokens)")
+            token = st.text_input("Token", type='password')
+            repo_id = st.text_input("Repo ID")
+            submit = st.form_submit_button('Upload to 🤗 Hub')
+        if submit:
+            if len(repo_id.split('/')) == 2:
+                repo_url = create_repo(repo_id, exist_ok=True)# token=token)
+                card.push_to_hub(repo_id)
+                st.success(f"Pushed the card to the repo [here]({repo_url}!")
+            else:
+                st.error("Repo ID invalid. It should be username/repo-name. For example: nateraw/food")
+if __name__ == "__main__":
+    load_widget_state()
+    main()

pages/3_❓_about.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from persist import load_widget_state
+from pathlib import Path
+import streamlit as st
+def main():
+    st.markdown(Path('about.md').read_text(), unsafe_allow_html=True)
+if __name__ == '__main__':
+    load_widget_state()
+    main()

persist.py ADDED Viewed

	@@ -0,0 +1,26 @@

+# Thank god this existed.
+# https://gist.github.com/okld/0aba4869ba6fdc8d49132e6974e2e662
+from streamlit import session_state as _state
+_PERSIST_STATE_KEY = f"{__name__}_PERSIST"
+def persist(key: str) -> str:
+    """Mark widget state as persistent."""
+    if _PERSIST_STATE_KEY not in _state:
+        _state[_PERSIST_STATE_KEY] = set()
+    _state[_PERSIST_STATE_KEY].add(key)
+    return key
+def load_widget_state():
+    """Load persistent widget state."""
+    if _PERSIST_STATE_KEY in _state:
+        _state.update({
+            key: value
+            for key, value in _state.items()
+            if key in _state[_PERSIST_STATE_KEY]
+        })

template.md ADDED Viewed

	@@ -0,0 +1,112 @@

+---
+{{ card_data }}
+---
+# {{ model_id | default("MyModelName", true)}}
+## Table of Contents
+- [Model Details](#model-details)
+- [How To Get Started With the Model](#how-to-get-started-with-the-model)
+- [Uses](#uses)
+  - [Direct Use](#direct-use)
+  - [Downstream Use](#downstream-use)
+  - [Misuse and Out of Scope Use](#misuse-and-out-of-scope-use)
+- [Limitations and Biases](#limitations-and-biases)
+- [Training](#training)
+  - [Training Data](#training-data)
+  - [Training Procedure](#training-procedure)
+- [Evaluation Results](#evaluation-results)
+- [Environmental Impact](#environmental-impact)
+- [Licensing Information](#licensing-information)
+- [Citation Information](#citation-information)
+## Model Details
+<!-- Give an overview of your model, the relevant research paper, who trained it, etc.  -->
+{{ model_description if model_description else "[More Information Needed]" }}
+- Developed by: {{ authors if authors }}
+- Language(s): {{ languages }}
+- License: This model is licensed under the {{ license }}{{ " license" if "license" not in license.lower() }}
+- Resources for more information:
+{{ "  - [Research Paper](" + paper_url + ")" if paper_url }}
+{{ "  - [GitHub Repo](" + github_url + ")" if github_url }}
+## How to Get Started with the Model
+Use the code below to get started with the model.
+```python
+# A nice code snippet here that describes how to use the model...
+```
+## Uses
+#### Direct Use
+<!-- Describe what kind of tasks this model can be used for directly or problems it can solve. -->
+[More Information Needed]
+#### Downstream Use
+<!-- Describe how this model could be leveraged by a downstream model (if applicable) -->
+[More Information Needed]
+#### Misuse and Out-of-scope Use
+<!-- Describe ways in which this model ***should not*** be used. -->
+[More Information Needed]
+## Limitations and Biases
+<!-- Describe limitations and biases of this model or models of it's type. -->
+**CONTENT WARNING: Readers should be aware this section contains content that is disturbing, offensive, and can propogate historical and current stereotypes.**
+[More Information Needed]
+## Training
+#### Training Data
+<!-- Describe the dataset used to train this model. -->
+<!-- Refer to data card if dataset is provided and exists on the hub -->
+See the data card for additional information.
+#### Training Procedure
+<!-- Describe the preprocessing, hardware used, training hyperparameters, etc. -->
+[More Information Needed]
+## Evaluation Results
+<!-- Describe evaluation results of this model across any datasets it was evaluated on. -->
+[More Information Needed]
+## Environmental Impact
+<!-- Provide information to document the environmental impact of this model -->
+You can estimate carbon emissions using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700)
+- **Hardware Type:**
+- **Hours used:**
+- **Cloud Provider:**
+- **Compute Region:**
+- **Carbon Emitted:** {{ emissions if emissions }}
+## Citation Information
+```bibtex
+{{ bibtex_citations if bibtex_citations }}
+```