Quentin Lhoest commited on
Commit
bdf93e0
β€’
1 Parent(s): c2de6fa

update tags

Browse files
.gitattributes CHANGED
@@ -26,3 +26,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
  *.json filter=lfs diff=lfs merge=lfs -text
 
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
  *.json filter=lfs diff=lfs merge=lfs -text
29
+ metadata_942418870f35246ce32d98caf3f230024a58428b.json filter=lfs diff=lfs merge=lfs -text
metadata_526dba9dce6f983ebcacd65bef35a868a5735df6.json β†’ metadata_942418870f35246ce32d98caf3f230024a58428b.json RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5232c816fea8dbdca0878f9520848496cacb64e5d28e375d46471cbdcc81fb11
3
- size 30119915
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7052da9dcb564799fdfb47ac8f50232fd3022e8d14023d76c280c51eb613e405
3
+ size 30721389
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  pyyaml
2
  streamlit>=0.88.0
3
  langcodes[data]
4
- datasets~=2.1
1
  pyyaml
2
  streamlit>=0.88.0
3
  langcodes[data]
4
+ git+https://github.com/huggingface/datasets@7e514c3#egg=datasets
tagging_app.py CHANGED
@@ -76,10 +76,10 @@ def multiselect(
76
  return w.multiselect(markdown, valid_set, default=valid_values, format_func=format_func, key=title)
77
 
78
 
79
- def validate_dict(w: st.delta_generator.DeltaGenerator, state_dict: Dict):
80
  try:
81
- DatasetMetadata(**state_dict)
82
- if not state_dict.get("pretty_name"):
83
  raise ValueError("Please specify a non-empty Dataset name.")
84
  w.markdown("βœ… This is a valid tagset! πŸ€—")
85
  except Exception as e:
@@ -168,10 +168,11 @@ if leftbtn.button("pre-load"):
168
  state = initial_state or new_state()
169
  st.experimental_set_query_params(preload_dataset=preloaded_id)
170
  if not is_state_empty(state):
171
- if rightbtn.button("flush state"):
172
  state = new_state()
173
  initial_state = None
174
  preloaded_id = None
 
175
  st.experimental_set_query_params()
176
 
177
  if preloaded_id is not None and initial_state is not None:
@@ -181,7 +182,7 @@ if preloaded_id is not None and initial_state is not None:
181
  The current base tagset is [`{preloaded_id}`](https://huggingface.co/datasets/{preloaded_id})
182
  """
183
  )
184
- validate_dict(st.sidebar, initial_state)
185
  st.sidebar.markdown(
186
  f"""
187
  Here is the matching yaml block:
@@ -382,7 +383,7 @@ rightcol.markdown(
382
  if is_state_empty(state):
383
  rightcol.markdown("❌ This is an invalid tagset: it's empty!")
384
  else:
385
- validate_dict(rightcol, state)
386
 
387
 
388
  rightcol.markdown(
@@ -400,5 +401,4 @@ This is a standalone tool, it is useful to check for errors on an existing tagse
400
 
401
  yamlblock = rightcol.text_area("Input your yaml here")
402
  if yamlblock.strip() != "":
403
- inputdict = yaml.safe_load(yamlblock)
404
- validate_dict(rightcol, inputdict)
76
  return w.multiselect(markdown, valid_set, default=valid_values, format_func=format_func, key=title)
77
 
78
 
79
+ def validate_yaml(w: st.delta_generator.DeltaGenerator, yamlblock: str):
80
  try:
81
+ DatasetMetadata.from_yaml_string(yamlblock)
82
+ if "pretty_name: " not in yamlblock or "pretty_name: ''" in yamlblock:
83
  raise ValueError("Please specify a non-empty Dataset name.")
84
  w.markdown("βœ… This is a valid tagset! πŸ€—")
85
  except Exception as e:
168
  state = initial_state or new_state()
169
  st.experimental_set_query_params(preload_dataset=preloaded_id)
170
  if not is_state_empty(state):
171
+ if rightbtn.button("reset"):
172
  state = new_state()
173
  initial_state = None
174
  preloaded_id = None
175
+ initial_infos = None
176
  st.experimental_set_query_params()
177
 
178
  if preloaded_id is not None and initial_state is not None:
182
  The current base tagset is [`{preloaded_id}`](https://huggingface.co/datasets/{preloaded_id})
183
  """
184
  )
185
+ validate_yaml(st.sidebar, yaml.dump(initial_state))
186
  st.sidebar.markdown(
187
  f"""
188
  Here is the matching yaml block:
383
  if is_state_empty(state):
384
  rightcol.markdown("❌ This is an invalid tagset: it's empty!")
385
  else:
386
+ validate_yaml(rightcol, yaml.dump(state))
387
 
388
 
389
  rightcol.markdown(
401
 
402
  yamlblock = rightcol.text_area("Input your yaml here")
403
  if yamlblock.strip() != "":
404
+ validate_yaml(rightcol, yamlblock)