yourusername commited on
Commit
4e3cf4d
·
1 Parent(s): 5a73d89

:sparkles: update app to include new features

Browse files
Files changed (7) hide show
  1. 1_📝_form.py +83 -0
  2. about.md +4 -0
  3. app.py +0 -147
  4. pages/2_👀_view_card.py +99 -0
  5. pages/3_❓_about.py +10 -0
  6. persist.py +26 -0
  7. template.md +112 -0
1_📝_form.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from yaml import load
2
+ from persist import persist, load_widget_state
3
+ import streamlit as st
4
+
5
+ import requests
6
+ import pandas as pd
7
+
8
+ @st.cache
9
+ def get_cached_data():
10
+ languages_df = pd.read_html("https://hf.co/languages")[0]
11
+ languages_map = pd.Series(languages_df["Language"].values, index=languages_df["ISO code"]).to_dict()
12
+
13
+ license_df = pd.read_html("https://huggingface.co/docs/hub/repositories-licenses")[0]
14
+ license_map = pd.Series(
15
+ license_df["License identifier (to use in model card)"].values, index=license_df.Fullname
16
+ ).to_dict()
17
+
18
+ available_metrics = [x['id'] for x in requests.get('https://huggingface.co/api/metrics').json()]
19
+
20
+ r = requests.get('https://huggingface.co/api/models-tags-by-type')
21
+ tags_data = r.json()
22
+ libraries = [x['id'] for x in tags_data['library']]
23
+ tasks = [x['id'] for x in tags_data['pipeline_tag']]
24
+ return languages_map, license_map, available_metrics, libraries, tasks
25
+
26
+
27
+ def main():
28
+ if "model_name" not in st.session_state:
29
+ # Initialize session state.
30
+ st.session_state.update({
31
+ "model_name": "",
32
+ "languages": [],
33
+ "license": "",
34
+ "library_name": "",
35
+ "datasets": "",
36
+ "metrics": [],
37
+ "task": "",
38
+ "tags": "",
39
+ "model_description": "Some cool model...",
40
+ "authors": "",
41
+ "paper_url": "",
42
+ "github_url": "",
43
+ "bibtex_citations": "",
44
+ "emissions": "",
45
+ })
46
+
47
+ languages_map, license_map, available_metrics, libraries, tasks = get_cached_data()
48
+
49
+ st.header("Model Card Form")
50
+
51
+ warning_placeholder = st.empty()
52
+
53
+ st.text_input("Model Name", key=persist("model_name"))
54
+ st.text_area("Model Description", help="The model description provides basic details about the model. This includes the architecture, version, if it was introduced in a paper, if an original implementation is available, the author, and general information about the model. Any copyright should be attributed here. General information about training procedures, parameters, and important disclaimers can also be mentioned in this section.", key=persist('model_description'))
55
+ st.multiselect("Language(s)", list(languages_map), format_func=lambda x: languages_map[x], help="The language(s) associated with this model. If this is not a text-based model, you should specify whatever lanuage is used in the dataset. For instance, if the dataset's labels are in english, you should select English here.", key=persist("languages"))
56
+ st.selectbox("License", [""] + list(license_map.values()), help="The license associated with this model.", key=persist("license"))
57
+ st.selectbox("Library Name", [""] + libraries, help="The name of the library this model came from (Ex. pytorch, timm, spacy, keras, etc.). This is usually automatically detected in model repos, so it is not required.", key=persist('library_name'))
58
+ st.text_input("Datasets (comma separated)", help="The dataset(s) used to train this model. Use dataset id from https://hf.co/datasets.", key=persist("datasets"))
59
+ st.multiselect("Metrics", available_metrics, help="Metrics used in the training/evaluation of this model. Use metric id from https://hf.co/metrics.", key=persist("metrics"))
60
+ st.selectbox("Task", [""] + tasks, help="What task does this model aim to solve?", key=persist('task'))
61
+ st.text_input("Tags (comma separated)", help="Additional tags to add which will be filterable on https://hf.co/models. (Ex. image-classification, vision, resnet)", key=persist("tags"))
62
+ st.text_input("Author(s) (comma separated)", help="The authors who developed this model. If you trained this model, the author is you.", key=persist("authors"))
63
+ st.text_input("Related Research Paper", help="Research paper related to this model.", key=persist("paper_url"))
64
+ st.text_input("Related GitHub Repository", help="Link to a GitHub repository used in the development of this model", key=persist("github_url"))
65
+ st.text_area("Bibtex Citation", help="Bibtex citations for related work", key=persist("bibtex_citations"))
66
+ st.text_input("Carbon Emitted:", help="You can estimate carbon emissions using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700)", key=persist("emissions"))
67
+
68
+ languages=st.session_state.languages or None
69
+ license=st.session_state.license or None
70
+ # Handle any warnings...
71
+ do_warn = False
72
+ warning_msg = "Warning: The following fields are required but have not been filled in: "
73
+ if not languages:
74
+ warning_msg += "\n- Languages"
75
+ do_warn = True
76
+ if not license:
77
+ warning_msg += "\n- License"
78
+ do_warn = True
79
+ if do_warn:
80
+ warning_placeholder.error(warning_msg)
81
+ if __name__ == '__main__':
82
+ load_widget_state()
83
+ main()
about.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # About
2
+
3
+ We built this space to make it easier to create _good_ model cards :)
4
+
app.py DELETED
@@ -1,147 +0,0 @@
1
- import pandas as pd
2
- import requests
3
- import streamlit as st
4
- from modelcards import ModelCard
5
-
6
-
7
- @st.cache
8
- def get_cached_data():
9
- languages_df = pd.read_html("https://hf.co/languages")[0]
10
- languages_map = pd.Series(languages_df["Language"].values, index=languages_df["ISO code"]).to_dict()
11
-
12
- license_df = pd.read_html("https://huggingface.co/docs/hub/repositories-licenses")[0]
13
- license_map = pd.Series(
14
- license_df["License identifier (to use in model card)"].values, index=license_df.Fullname
15
- ).to_dict()
16
-
17
- available_metrics = [x['id'] for x in requests.get('https://huggingface.co/api/metrics').json()]
18
-
19
- return languages_map, license_map, available_metrics
20
-
21
-
22
- languages_map, license_map, available_metrics = get_cached_data()
23
-
24
- with st.sidebar:
25
- st.markdown('''
26
- <div align="center">
27
- <h1>Model Card Creator</h1>
28
-
29
- [![Github Badge](https://img.shields.io/github/stars/nateraw/modelcards?style=social)](https://github.com/nateraw/modelcards)
30
- </div>
31
- ''', unsafe_allow_html=True)
32
- st.markdown("This app lets you generate model cards for your 🤗 Hub model repo!")
33
- view = st.selectbox("View", ["Markdown", "Raw Text", "How to Programmatically Generate"])
34
- warning_placeholder = st.empty()
35
- placeholder = st.empty()
36
- st.markdown('---')
37
-
38
- model_name = st.text_input(
39
- "Model Name", "my-cool-model", help="The name of your model. (Ex. my-cool-model, bert-base-uncased, etc.)"
40
- )
41
- languages = (
42
- st.multiselect(
43
- "Language",
44
- languages_map.keys(),
45
- format_func=lambda x: languages_map[x],
46
- help="The language(s) associated with this model. If this is not a text-based model, you should specify whatever lanuage is used in the dataset. For instance, if the dataset's labels are in english, you should select English here.",
47
- )
48
- or None
49
- )
50
- license = st.selectbox("License", license_map.keys(), 33, help="The license associated with this model.")
51
- library_name = (
52
- st.text_input(
53
- "Library Name", help="The name of the library this model came from (Ex. pytorch, timm, spacy, keras, etc.)"
54
- )
55
- or None
56
- )
57
- tags = [
58
- x.strip()
59
- for x in st.text_input(
60
- "Tags (comma separated)",
61
- help="Additional tags to add which will be filterable on https://hf.co/models. (Ex. image-classification, vision, resnet)",
62
- ).split(',')
63
- if x.strip()
64
- ] or None
65
- dataset = (
66
- st.text_input(
67
- "Dataset", help="The dataset used to train this model. Use dataset id from https://hf.co/datasets"
68
- )
69
- or None
70
- )
71
- metrics = (
72
- st.multiselect(
73
- "Metrics",
74
- available_metrics,
75
- help="Metrics used in the training/evaluation of this model. Use metric id from https://hf.co/metrics.",
76
- )
77
- or None
78
- )
79
- model_description = st.text_area(
80
- "Model Description",
81
- "Describe your model here...",
82
- help="The model description provides basic details about the model. This includes the architecture, version, if it was introduced in a paper, if an original implementation is available, the author, and general information about the model. Any copyright should be attributed here. General information about training procedures, parameters, and important disclaimers can also be mentioned in this section.",
83
- )
84
-
85
- do_warn = False
86
- warning_msg = "Warning: The following fields are required but have not been filled in: "
87
- if not languages:
88
- warning_msg += "\n- Languages"
89
- do_warn = True
90
- if not license:
91
- warning_msg += "\n- License"
92
- do_warn = True
93
-
94
- if do_warn:
95
- warning_placeholder.warning(warning_msg)
96
-
97
- card = ModelCard.from_template(
98
- language=languages,
99
- license=license_map[license],
100
- library_name=library_name,
101
- tags=tags,
102
- datasets=dataset,
103
- metrics=metrics,
104
- model_id=model_name,
105
- model_description=model_description,
106
- )
107
-
108
- placeholder.download_button(
109
- label="Download Model Card", data=str(card), file_name='README.md', mime='text/plain', disabled=do_warn
110
- )
111
-
112
- markdown_text = f"""
113
- Card metadata...this should be at the beginning of your readme file.
114
-
115
- ```
116
- ---
117
- {card.data.to_yaml()}
118
- ---
119
- ```
120
-
121
- {card.text}
122
- """
123
-
124
- creation_code = f'''
125
- # Make sure you have modelcards installed!
126
- # pip install modelcards==0.0.4
127
-
128
- from modelcards import ModelCard
129
-
130
- card = ModelCard.from_template(
131
- language={languages},
132
- license={"'" + license_map[license] + "'" if license else None},
133
- library_name={"'" + library_name + "'" if library_name else None},
134
- tags={tags},
135
- datasets={"'" + dataset + "'" if dataset else None},
136
- metrics={metrics},
137
- model_id={"'" + model_name + "'" if model_name else None},
138
- model_description={"'" + model_description + "'" if model_description else None},
139
- )
140
- '''
141
-
142
- if view == 'Raw Text':
143
- st.text(str(card))
144
- elif view == "Markdown":
145
- st.markdown(markdown_text)
146
- else:
147
- st.code(creation_code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pages/2_👀_view_card.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from persist import persist, load_widget_state
3
+ from modelcards import CardData, ModelCard
4
+ from huggingface_hub import create_repo
5
+
6
+
7
+ def is_float(value):
8
+ try:
9
+ float(value)
10
+ return True
11
+ except:
12
+ return False
13
+
14
+ def get_card():
15
+ languages=st.session_state.languages or None
16
+ license=st.session_state.license or None
17
+ library_name = st.session_state.library_name or None
18
+ tags= [x.strip() for x in st.session_state.tags.split(',') if x.strip()]
19
+ tags.append("autogenerated-modelcard")
20
+ datasets= [x.strip() for x in st.session_state.datasets.split(',') if x.strip()] or None
21
+ metrics=st.session_state.metrics or None
22
+ model_name = st.session_state.model_name or None
23
+ model_description = st.session_state.model_description or None
24
+ authors = st.session_state.authors or None
25
+ paper_url = st.session_state.paper_url or None
26
+ github_url = st.session_state.github_url or None
27
+ bibtex_citations = st.session_state.bibtex_citations or None
28
+ emissions = float(st.session_state.emissions) if is_float(st.session_state.emissions) else None # BUG
29
+
30
+ # Handle any warnings...
31
+ do_warn = False
32
+ warning_msg = "Warning: The following fields are required but have not been filled in: "
33
+ if not languages:
34
+ warning_msg += "\n- Languages"
35
+ do_warn = True
36
+ if not license:
37
+ warning_msg += "\n- License"
38
+ do_warn = True
39
+ if do_warn:
40
+ st.error(warning_msg)
41
+ st.stop()
42
+
43
+ # Generate and display card
44
+ card_data = CardData(
45
+ language=languages,
46
+ license=license,
47
+ library_name=library_name,
48
+ tags=tags,
49
+ datasets=datasets,
50
+ metrics=metrics,
51
+ )
52
+ if emissions:
53
+ card_data.co2_eq_emissions = {'emissions': emissions}
54
+
55
+ card = ModelCard.from_template(
56
+ card_data,
57
+ template_path='template.md',
58
+ model_id=model_name,
59
+ # Template kwargs:
60
+ model_description=model_description,
61
+ license=license,
62
+ authors=authors,
63
+ paper_url=paper_url,
64
+ github_url=github_url,
65
+ bibtex_citations=bibtex_citations,
66
+ emissions=emissions
67
+ )
68
+ return card
69
+
70
+
71
+ def main():
72
+
73
+ card = get_card()
74
+ card.save('current_card.md')
75
+ view_raw = st.sidebar.checkbox("View Raw")
76
+ if view_raw:
77
+ st.text(card)
78
+ else:
79
+ st.markdown(card.text, unsafe_allow_html=True)
80
+
81
+ with st.sidebar:
82
+ with st.form("Upload to 🤗 Hub"):
83
+ st.markdown("Use a token with write access from [here](https://hf.co/settings/tokens)")
84
+ token = st.text_input("Token", type='password')
85
+ repo_id = st.text_input("Repo ID")
86
+ submit = st.form_submit_button('Upload to 🤗 Hub')
87
+
88
+ if submit:
89
+ if len(repo_id.split('/')) == 2:
90
+ repo_url = create_repo(repo_id, exist_ok=True)# token=token)
91
+ card.push_to_hub(repo_id)
92
+ st.success(f"Pushed the card to the repo [here]({repo_url}!")
93
+ else:
94
+ st.error("Repo ID invalid. It should be username/repo-name. For example: nateraw/food")
95
+
96
+
97
+ if __name__ == "__main__":
98
+ load_widget_state()
99
+ main()
pages/3_❓_about.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from persist import load_widget_state
2
+ from pathlib import Path
3
+ import streamlit as st
4
+
5
+ def main():
6
+ st.markdown(Path('about.md').read_text(), unsafe_allow_html=True)
7
+
8
+ if __name__ == '__main__':
9
+ load_widget_state()
10
+ main()
persist.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Thank god this existed.
2
+ # https://gist.github.com/okld/0aba4869ba6fdc8d49132e6974e2e662
3
+
4
+ from streamlit import session_state as _state
5
+
6
+ _PERSIST_STATE_KEY = f"{__name__}_PERSIST"
7
+
8
+
9
+ def persist(key: str) -> str:
10
+ """Mark widget state as persistent."""
11
+ if _PERSIST_STATE_KEY not in _state:
12
+ _state[_PERSIST_STATE_KEY] = set()
13
+
14
+ _state[_PERSIST_STATE_KEY].add(key)
15
+
16
+ return key
17
+
18
+
19
+ def load_widget_state():
20
+ """Load persistent widget state."""
21
+ if _PERSIST_STATE_KEY in _state:
22
+ _state.update({
23
+ key: value
24
+ for key, value in _state.items()
25
+ if key in _state[_PERSIST_STATE_KEY]
26
+ })
template.md ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ {{ card_data }}
3
+ ---
4
+
5
+ # {{ model_id | default("MyModelName", true)}}
6
+
7
+ ## Table of Contents
8
+ - [Model Details](#model-details)
9
+ - [How To Get Started With the Model](#how-to-get-started-with-the-model)
10
+ - [Uses](#uses)
11
+ - [Direct Use](#direct-use)
12
+ - [Downstream Use](#downstream-use)
13
+ - [Misuse and Out of Scope Use](#misuse-and-out-of-scope-use)
14
+ - [Limitations and Biases](#limitations-and-biases)
15
+ - [Training](#training)
16
+ - [Training Data](#training-data)
17
+ - [Training Procedure](#training-procedure)
18
+ - [Evaluation Results](#evaluation-results)
19
+ - [Environmental Impact](#environmental-impact)
20
+ - [Licensing Information](#licensing-information)
21
+ - [Citation Information](#citation-information)
22
+
23
+
24
+ ## Model Details
25
+
26
+ <!-- Give an overview of your model, the relevant research paper, who trained it, etc. -->
27
+
28
+ {{ model_description if model_description else "[More Information Needed]" }}
29
+
30
+ - Developed by: {{ authors if authors }}
31
+ - Language(s): {{ languages }}
32
+ - License: This model is licensed under the {{ license }}{{ " license" if "license" not in license.lower() }}
33
+ - Resources for more information:
34
+ {{ " - [Research Paper](" + paper_url + ")" if paper_url }}
35
+ {{ " - [GitHub Repo](" + github_url + ")" if github_url }}
36
+
37
+
38
+ ## How to Get Started with the Model
39
+
40
+ Use the code below to get started with the model.
41
+
42
+ ```python
43
+ # A nice code snippet here that describes how to use the model...
44
+ ```
45
+
46
+ ## Uses
47
+
48
+ #### Direct Use
49
+
50
+ <!-- Describe what kind of tasks this model can be used for directly or problems it can solve. -->
51
+
52
+ [More Information Needed]
53
+
54
+ #### Downstream Use
55
+
56
+ <!-- Describe how this model could be leveraged by a downstream model (if applicable) -->
57
+
58
+ [More Information Needed]
59
+
60
+ #### Misuse and Out-of-scope Use
61
+
62
+ <!-- Describe ways in which this model ***should not*** be used. -->
63
+
64
+ [More Information Needed]
65
+
66
+ ## Limitations and Biases
67
+
68
+ <!-- Describe limitations and biases of this model or models of it's type. -->
69
+
70
+ **CONTENT WARNING: Readers should be aware this section contains content that is disturbing, offensive, and can propogate historical and current stereotypes.**
71
+
72
+ [More Information Needed]
73
+
74
+ ## Training
75
+
76
+ #### Training Data
77
+
78
+ <!-- Describe the dataset used to train this model. -->
79
+ <!-- Refer to data card if dataset is provided and exists on the hub -->
80
+
81
+ See the data card for additional information.
82
+
83
+ #### Training Procedure
84
+
85
+ <!-- Describe the preprocessing, hardware used, training hyperparameters, etc. -->
86
+
87
+ [More Information Needed]
88
+
89
+ ## Evaluation Results
90
+
91
+ <!-- Describe evaluation results of this model across any datasets it was evaluated on. -->
92
+
93
+ [More Information Needed]
94
+
95
+ ## Environmental Impact
96
+
97
+ <!-- Provide information to document the environmental impact of this model -->
98
+
99
+ You can estimate carbon emissions using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700)
100
+
101
+ - **Hardware Type:**
102
+ - **Hours used:**
103
+ - **Cloud Provider:**
104
+ - **Compute Region:**
105
+ - **Carbon Emitted:** {{ emissions if emissions }}
106
+
107
+
108
+ ## Citation Information
109
+
110
+ ```bibtex
111
+ {{ bibtex_citations if bibtex_citations }}
112
+ ```