Spaces:

huggingface
/

datasets-tagging

Running

lhoestq HF Staff

julien-c HF Staff commited on Jul 1, 2022

Commit

c2de6fa

1 Parent(s): f208d84

Align more metadata with other repo types (models,spaces) (#2)

- rm GitHub Action (4a3daec80d4aed1eaee735aec7c87dd38354f998)
- `licenses:` => `license:` (35237ea94ff6c278cca5b56e1fa3c92b28564c35)
- `languages:` => `language:` (560591921d23cfc84360565ab9bc17be8da9f0f8)

Co-authored-by: Julien Chaumond <julien-c@users.noreply.huggingface.co>

Files changed (3) hide show

.github/workflows/deploy.yml DELETED Viewed

@@ -1,37 +0,0 @@
-name: Deployment
-on:
-  push:
-    branches:
-      - main
-  workflow_dispatch:
-jobs:
-  deployment:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out
-        uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Set up Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: "3.6"
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements.txt
-      - name: Build metadata file
-        run: |
-          python build_metadata_file.py
-      - name: Commit metadata file
-        run: |
-          git config user.name github-actions
-          git config user.email github-actions@github.com
-          git add -f metadata_*
-          git commit -m "Add metadata file"
-      - name: Push to Hub
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-        run: git push --force https://albertvillanova:$HF_TOKEN@huggingface.co/spaces/huggingface/datasets-tagging main

apputils.py CHANGED Viewed

@@ -6,11 +6,11 @@ def new_state() -> Dict[str, List]:
         "task_categories": [],
         "task_ids": [],
         "multilinguality": [],
-        "languages": [],
         "language_creators": [],
         "annotations_creators": [],
         "source_datasets": [],
         "size_categories": [],
-        "licenses": [],
         "pretty_name": None,
     }

         "task_categories": [],
         "task_ids": [],
         "multilinguality": [],
+        "language": [],
         "language_creators": [],
         "annotations_creators": [],
         "source_datasets": [],
         "size_categories": [],
+        "license": [],
         "pretty_name": None,
     }

tagging_app.py CHANGED Viewed

@@ -258,7 +258,7 @@ if "other" in state["multilinguality"]:
     state["multilinguality"][state["multilinguality"].index("other")] = f"other-{other_multilinguality}"
 valid_values, invalid_values = list(), list()
-for langtag in state["languages"]:
     try:
         lc.get(langtag)
         valid_values.append(langtag)
@@ -273,7 +273,7 @@ langtags = leftcol.text_area(
     "What languages are represented in the dataset? expected format is BCP47 tags separated for ';' e.g. 'en;fr'",
     value=";".join(valid_values),
 )
-state["languages"] = langtags.strip().split(";") if langtags.strip() != "" else []
 #
@@ -297,23 +297,16 @@ state["annotations_creators"] = multiselect(
 #
-# LICENSES
 #
-state["licenses"] = multiselect(
     leftcol,
-    "Licenses",
-    "What licenses is the dataset under?",
     valid_set=list(known_licenses.keys()),
-    values=state["licenses"],
     format_func=lambda l: f"{l} : {known_licenses[l]}",
 )
-if "other" in state["licenses"]:
-    other_license = st.text_input(
-        "You selected 'other' type of license. Please enter a short hyphen-separated description:",
-        value="my-license",
-    )
-    st.write(f"Registering other-{other_license} license")
-    state["licenses"][state["licenses"].index("other")] = f"other-{other_license}"
 #

     state["multilinguality"][state["multilinguality"].index("other")] = f"other-{other_multilinguality}"
 valid_values, invalid_values = list(), list()
+for langtag in state["language"]:
     try:
         lc.get(langtag)
         valid_values.append(langtag)
     "What languages are represented in the dataset? expected format is BCP47 tags separated for ';' e.g. 'en;fr'",
     value=";".join(valid_values),
 )
+state["language"] = langtags.strip().split(";") if langtags.strip() != "" else []
 #
 #
+# LICENSE
 #
+state["license"] = multiselect(
     leftcol,
+    "License",
+    "What license(s) is the dataset under?",
     valid_set=list(known_licenses.keys()),
+    values=state["license"],
     format_func=lambda l: f"{l} : {known_licenses[l]}",
 )
 #