LukasGe commited on
Commit
9a11954
1 Parent(s): b60dfbe

Update load_data.py

Browse files
Files changed (1) hide show
  1. load_data.py +5 -5
load_data.py CHANGED
@@ -12,11 +12,11 @@ from datasets import load_dataset, concatenate_datasets
12
  from argilla.listeners import listener
13
 
14
  HF_TOKEN = os.environ.get("HF_TOKEN") #get HF_TOKEN from space env variables
15
- HUB_DATASET_NAME = os.environ.get('HUB_DATASET_NAME') #get dataset name
16
 
17
 
18
  @listener(
19
- dataset='job_tags_save',
20
  query="status:Validated", # https://docs.argilla.io/en/latest/guides/features/queries.html
21
  execution_interval_in_seconds=10, # interval to check the execution of `save_validated_to_hub`
22
  )
@@ -26,7 +26,7 @@ def save_validated_to_hub(records, ctx):
26
  if HF_TOKEN:
27
  print("Pushing the dataset")
28
  print(ds)
29
- ds.push_to_hub(HUB_DATASET_NAME, token=HF_TOKEN, private=True)
30
  else:
31
  print("SET HF_TOKEN and HUB_DATASET_NAME TO SYNC YOUR DATASET!!!")
32
  else:
@@ -62,12 +62,12 @@ class LoadDatasets:
62
  settings = rg.TokenClassificationSettings(
63
  label_schema=["B-FUN", "I-FUN", "B-RES", "I-RES", "B-LEVEL", "I-LEVEL"]
64
  )
65
- rg.configure_dataset(name='job_tags_save', settings=settings, workspace="admin")
66
 
67
  # Log the dataset
68
  rg.log(
69
  records,
70
- name='job_tags_save',
71
  tags={"description": "Tagging of the DVS Job Titles"},
72
  batch_size=200
73
  )
 
12
  from argilla.listeners import listener
13
 
14
  HF_TOKEN = os.environ.get("HF_TOKEN") #get HF_TOKEN from space env variables
15
+ HUB_DATASET_NAME = os.environ.get('HUB_DATASET_NAME') #get dataset name with annotated titles
16
 
17
 
18
  @listener(
19
+ dataset='job titles DV', #name of the dataset in argilla frontend
20
  query="status:Validated", # https://docs.argilla.io/en/latest/guides/features/queries.html
21
  execution_interval_in_seconds=10, # interval to check the execution of `save_validated_to_hub`
22
  )
 
26
  if HF_TOKEN:
27
  print("Pushing the dataset")
28
  print(ds)
29
+ ds.push_to_hub(HUB_DATASET_NAME, token=HF_TOKEN, private=True)
30
  else:
31
  print("SET HF_TOKEN and HUB_DATASET_NAME TO SYNC YOUR DATASET!!!")
32
  else:
 
62
  settings = rg.TokenClassificationSettings(
63
  label_schema=["B-FUN", "I-FUN", "B-RES", "I-RES", "B-LEVEL", "I-LEVEL"]
64
  )
65
+ rg.configure_dataset(name='job titles DV', settings=settings, workspace="admin")
66
 
67
  # Log the dataset
68
  rg.log(
69
  records,
70
+ name='job titles DV',
71
  tags={"description": "Tagging of the DVS Job Titles"},
72
  batch_size=200
73
  )