theo commited on
Commit
1cc3978
β€’
1 Parent(s): 326ad7e

add a validator input

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -0
  2. tagging_app.py +29 -19
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  pyyaml
2
  datasets
3
  streamlit
 
 
1
  pyyaml
2
  datasets
3
  streamlit
4
+ langcodes[data]
tagging_app.py CHANGED
@@ -1,6 +1,6 @@
1
  import json
2
  from pathlib import Path
3
- from typing import Callable, List, Tuple
4
 
5
  import streamlit as st
6
  import yaml
@@ -85,6 +85,21 @@ def multiselect(
85
  return w.multiselect(markdown, valid_set, default=valid_values, format_func=format_func)
86
 
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  def new_state():
89
  return {
90
  "task_categories": [],
@@ -155,17 +170,7 @@ if rightbtn.button("flush state"):
155
  st.experimental_set_query_params()
156
 
157
  if preloaded_id is not None and initial_state is not None:
158
- try:
159
- DatasetMetadata(**initial_state)
160
- valid = "βœ”οΈ This is a valid tagset!"
161
- except Exception as e:
162
- valid = f"""
163
- πŸ™ This is an invalid tagset, here are the errors in it:
164
- ```
165
- {e}
166
- ```
167
- You're _very_ welcome to fix these issues and submit a new PR on [`datasets`](https://github.com/huggingface/datasets/)
168
- """
169
  st.sidebar.markdown(
170
  f"""
171
  ---
@@ -323,13 +328,8 @@ state["size_categories"] = [
323
  ########################
324
  ## Show results
325
  ########################
326
- try:
327
- DatasetMetadata(**state)
328
- valid = "βœ” Validated! Copy it into your dataset's `README.md` header! πŸ€— "
329
- except Exception as e:
330
- valid = f"""πŸ™ Could not validate:
331
- ```{e}```
332
- """
333
  rightcol.markdown(
334
  f"""
335
  ### Finalized tag set
@@ -339,5 +339,15 @@ rightcol.markdown(
339
  ```yaml
340
  {yaml.dump(state)}
341
  ```
 
 
 
 
342
  """,
343
  )
 
 
 
 
 
 
 
1
  import json
2
  from pathlib import Path
3
+ from typing import Callable, Dict, List, Tuple
4
 
5
  import streamlit as st
6
  import yaml
 
85
  return w.multiselect(markdown, valid_set, default=valid_values, format_func=format_func)
86
 
87
 
88
+ def validate_dict(state_dict: Dict) -> str:
89
+ try:
90
+ DatasetMetadata(**state_dict)
91
+ valid = "βœ”οΈ This is a valid tagset! πŸ€—"
92
+ except Exception as e:
93
+ valid = f"""
94
+ πŸ™ This is an invalid tagset, here are the errors in it:
95
+ ```
96
+ {e}
97
+ ```
98
+ You're _very_ welcome to fix these issues and submit a new PR on [`datasets`](https://github.com/huggingface/datasets/)
99
+ """
100
+ return valid
101
+
102
+
103
  def new_state():
104
  return {
105
  "task_categories": [],
 
170
  st.experimental_set_query_params()
171
 
172
  if preloaded_id is not None and initial_state is not None:
173
+ valid = validate_dict(initial_state)
 
 
 
 
 
 
 
 
 
 
174
  st.sidebar.markdown(
175
  f"""
176
  ---
 
328
  ########################
329
  ## Show results
330
  ########################
331
+
332
+ valid = validate_dict(state)
 
 
 
 
 
333
  rightcol.markdown(
334
  f"""
335
  ### Finalized tag set
 
339
  ```yaml
340
  {yaml.dump(state)}
341
  ```
342
+ ---
343
+ #### Arbitrary yaml validator
344
+
345
+ This is a standalone tool, it is useful to check for errors on an existing tagset or modifying directly the text rather than the UI on the left.
346
  """,
347
  )
348
+
349
+ yamlblock = rightcol.text_area("Input your yaml here")
350
+ if yamlblock.strip() != "":
351
+ inputdict = yaml.safe_load(yamlblock)
352
+ valid = validate_dict(inputdict)
353
+ rightcol.markdown(valid)