Spaces:

LukasGe
/

Master_Thesis

Runtime error

LukasGe commited on Nov 1, 2023

Commit

9a11954

•

1 Parent(s): b60dfbe

Update load_data.py

Files changed (1) hide show

load_data.py CHANGED Viewed

@@ -12,11 +12,11 @@ from datasets import load_dataset, concatenate_datasets
 from argilla.listeners import listener
 HF_TOKEN = os.environ.get("HF_TOKEN") #get HF_TOKEN from space env variables
-HUB_DATASET_NAME = os.environ.get('HUB_DATASET_NAME') #get dataset name
 @listener(
-    dataset='job_tags_save',
     query="status:Validated", # https://docs.argilla.io/en/latest/guides/features/queries.html
     execution_interval_in_seconds=10, # interval to check the execution of `save_validated_to_hub`
 )
@@ -26,7 +26,7 @@ def save_validated_to_hub(records, ctx):
         if HF_TOKEN:
             print("Pushing the dataset")
             print(ds)
-            ds.push_to_hub(HUB_DATASET_NAME, token=HF_TOKEN, private=True)
         else:
             print("SET HF_TOKEN and HUB_DATASET_NAME TO SYNC YOUR DATASET!!!")
     else:
@@ -62,12 +62,12 @@ class LoadDatasets:
         settings = rg.TokenClassificationSettings(
             label_schema=["B-FUN", "I-FUN", "B-RES", "I-RES", "B-LEVEL", "I-LEVEL"]
         )
-        rg.configure_dataset(name='job_tags_save', settings=settings, workspace="admin")
         # Log the dataset
         rg.log(
             records,
-            name='job_tags_save',
             tags={"description": "Tagging of the DVS Job Titles"},
             batch_size=200
         )

 from argilla.listeners import listener
 HF_TOKEN = os.environ.get("HF_TOKEN") #get HF_TOKEN from space env variables
+HUB_DATASET_NAME = os.environ.get('HUB_DATASET_NAME') #get dataset name with annotated titles
 @listener(
+    dataset='job titles DV', #name of the dataset in argilla frontend
     query="status:Validated", # https://docs.argilla.io/en/latest/guides/features/queries.html
     execution_interval_in_seconds=10, # interval to check the execution of `save_validated_to_hub`
 )
         if HF_TOKEN:
             print("Pushing the dataset")
             print(ds)
+            ds.push_to_hub(HUB_DATASET_NAME, token=HF_TOKEN, private=True)
         else:
             print("SET HF_TOKEN and HUB_DATASET_NAME TO SYNC YOUR DATASET!!!")
     else:
         settings = rg.TokenClassificationSettings(
             label_schema=["B-FUN", "I-FUN", "B-RES", "I-RES", "B-LEVEL", "I-LEVEL"]
         )
+        rg.configure_dataset(name='job titles DV', settings=settings, workspace="admin")
         # Log the dataset
         rg.log(
             records,
+            name='job titles DV',
             tags={"description": "Tagging of the DVS Job Titles"},
             batch_size=200
         )