LukasGe commited on
Commit
577daa9
1 Parent(s): 848ab28

Update load_data.py

Browse files
Files changed (1) hide show
  1. load_data.py +7 -6
load_data.py CHANGED
@@ -9,11 +9,12 @@ from datasets import load_dataset, concatenate_datasets
9
 
10
  from argilla.listeners import listener
11
 
12
- HF_TOKEN = os.environ.get("HF_TOKEN") #set HF_TOKEN
13
- HUB_DATASET_NAME = os.environ.get('HUB_DATASET_NAME') #set dataset name
 
14
 
15
  @listener(
16
- dataset="somos-alpaca-es",
17
  query="status:Validated", # https://docs.argilla.io/en/latest/guides/features/queries.html
18
  execution_interval_in_seconds=1200, # interval to check the execution of `save_validated_to_hub`
19
  )
@@ -44,7 +45,7 @@ class LoadDatasets:
44
  print(e)
45
  old_ds = None
46
 
47
- dataset = load_dataset("somosnlp/somos-clean-alpaca-es", split="train")
48
 
49
 
50
  if old_ds:
@@ -64,8 +65,8 @@ class LoadDatasets:
64
  # Log the dataset
65
  rg.log(
66
  records,
67
- name="somos-alpaca-es",
68
- tags={"description": "SomosNLP Hackathon dataset"},
69
  batch_size=200
70
  )
71
 
 
9
 
10
  from argilla.listeners import listener
11
 
12
+ HF_TOKEN = os.environ.get("HF_TOKEN") #get HF_TOKEN
13
+ HUB_DATASET_NAME = os.environ.get('HUB_DATASET_NAME') #get dataset name
14
+
15
 
16
  @listener(
17
+ dataset="job-title-tagging",
18
  query="status:Validated", # https://docs.argilla.io/en/latest/guides/features/queries.html
19
  execution_interval_in_seconds=1200, # interval to check the execution of `save_validated_to_hub`
20
  )
 
45
  print(e)
46
  old_ds = None
47
 
48
+ dataset = load_dataset("LukasGe/argilla-test", split="train")
49
 
50
 
51
  if old_ds:
 
65
  # Log the dataset
66
  rg.log(
67
  records,
68
+ name="job-title-tagging",
69
+ tags={"description": "Tagging of the DVS Job Titles"},
70
  batch_size=200
71
  )
72