Spaces:
Runtime error
Runtime error
Update load_data.py
Browse files- load_data.py +5 -5
load_data.py
CHANGED
@@ -12,11 +12,11 @@ from datasets import load_dataset, concatenate_datasets
|
|
12 |
from argilla.listeners import listener
|
13 |
|
14 |
HF_TOKEN = os.environ.get("HF_TOKEN") #get HF_TOKEN from space env variables
|
15 |
-
HUB_DATASET_NAME = os.environ.get('HUB_DATASET_NAME') #get dataset name
|
16 |
|
17 |
|
18 |
@listener(
|
19 |
-
dataset='
|
20 |
query="status:Validated", # https://docs.argilla.io/en/latest/guides/features/queries.html
|
21 |
execution_interval_in_seconds=10, # interval to check the execution of `save_validated_to_hub`
|
22 |
)
|
@@ -26,7 +26,7 @@ def save_validated_to_hub(records, ctx):
|
|
26 |
if HF_TOKEN:
|
27 |
print("Pushing the dataset")
|
28 |
print(ds)
|
29 |
-
ds.push_to_hub(HUB_DATASET_NAME, token=HF_TOKEN, private=True)
|
30 |
else:
|
31 |
print("SET HF_TOKEN and HUB_DATASET_NAME TO SYNC YOUR DATASET!!!")
|
32 |
else:
|
@@ -62,12 +62,12 @@ class LoadDatasets:
|
|
62 |
settings = rg.TokenClassificationSettings(
|
63 |
label_schema=["B-FUN", "I-FUN", "B-RES", "I-RES", "B-LEVEL", "I-LEVEL"]
|
64 |
)
|
65 |
-
rg.configure_dataset(name='
|
66 |
|
67 |
# Log the dataset
|
68 |
rg.log(
|
69 |
records,
|
70 |
-
name='
|
71 |
tags={"description": "Tagging of the DVS Job Titles"},
|
72 |
batch_size=200
|
73 |
)
|
|
|
12 |
from argilla.listeners import listener
|
13 |
|
14 |
HF_TOKEN = os.environ.get("HF_TOKEN") #get HF_TOKEN from space env variables
|
15 |
+
HUB_DATASET_NAME = os.environ.get('HUB_DATASET_NAME') #get dataset name with annotated titles
|
16 |
|
17 |
|
18 |
@listener(
|
19 |
+
dataset='job titles DV', #name of the dataset in argilla frontend
|
20 |
query="status:Validated", # https://docs.argilla.io/en/latest/guides/features/queries.html
|
21 |
execution_interval_in_seconds=10, # interval to check the execution of `save_validated_to_hub`
|
22 |
)
|
|
|
26 |
if HF_TOKEN:
|
27 |
print("Pushing the dataset")
|
28 |
print(ds)
|
29 |
+
ds.push_to_hub(HUB_DATASET_NAME, token=HF_TOKEN, private=True)
|
30 |
else:
|
31 |
print("SET HF_TOKEN and HUB_DATASET_NAME TO SYNC YOUR DATASET!!!")
|
32 |
else:
|
|
|
62 |
settings = rg.TokenClassificationSettings(
|
63 |
label_schema=["B-FUN", "I-FUN", "B-RES", "I-RES", "B-LEVEL", "I-LEVEL"]
|
64 |
)
|
65 |
+
rg.configure_dataset(name='job titles DV', settings=settings, workspace="admin")
|
66 |
|
67 |
# Log the dataset
|
68 |
rg.log(
|
69 |
records,
|
70 |
+
name='job titles DV',
|
71 |
tags={"description": "Tagging of the DVS Job Titles"},
|
72 |
batch_size=200
|
73 |
)
|