Spaces:
Sleeping
Sleeping
Update load_data.py
Browse files- load_data.py +7 -7
load_data.py
CHANGED
@@ -9,7 +9,7 @@ from datasets import load_dataset, concatenate_datasets
|
|
9 |
|
10 |
from argilla.listeners import listener
|
11 |
|
12 |
-
HF_TOKEN = os.environ.get("HF_TOKEN") #get HF_TOKEN
|
13 |
HUB_DATASET_NAME = os.environ.get('HUB_DATASET_NAME') #get dataset name
|
14 |
|
15 |
|
@@ -24,7 +24,7 @@ def save_validated_to_hub(records, ctx):
|
|
24 |
if HF_TOKEN:
|
25 |
print("Pushing the dataset")
|
26 |
print(ds)
|
27 |
-
ds.push_to_hub(HUB_DATASET_NAME, token=HF_TOKEN)
|
28 |
else:
|
29 |
print("SET HF_TOKEN and HUB_DATASET_NAME TO SYNC YOUR DATASET!!!")
|
30 |
else:
|
@@ -35,17 +35,17 @@ class LoadDatasets:
|
|
35 |
rg.init(api_key=api_key, workspace=workspace)
|
36 |
|
37 |
@staticmethod
|
38 |
-
def
|
39 |
-
#
|
40 |
try:
|
41 |
print(f"Trying to sync with {HUB_DATASET_NAME}")
|
42 |
-
old_ds = load_dataset(HUB_DATASET_NAME, split="train")
|
43 |
except Exception as e:
|
44 |
print(f"Not possible to sync with {HUB_DATASET_NAME}")
|
45 |
print(e)
|
46 |
old_ds = None
|
47 |
|
48 |
-
dataset = load_dataset(
|
49 |
|
50 |
|
51 |
if old_ds:
|
@@ -85,7 +85,7 @@ if __name__ == "__main__":
|
|
85 |
response = requests.get("http://0.0.0.0:6900/")
|
86 |
if response.status_code == 200:
|
87 |
ld = LoadDatasets(API_KEY)
|
88 |
-
ld.
|
89 |
break
|
90 |
|
91 |
except requests.exceptions.ConnectionError:
|
|
|
9 |
|
10 |
from argilla.listeners import listener
|
11 |
|
12 |
+
HF_TOKEN = os.environ.get("HF_TOKEN") #get HF_TOKEN from space env variables
|
13 |
HUB_DATASET_NAME = os.environ.get('HUB_DATASET_NAME') #get dataset name
|
14 |
|
15 |
|
|
|
24 |
if HF_TOKEN:
|
25 |
print("Pushing the dataset")
|
26 |
print(ds)
|
27 |
+
ds.push_to_hub(HUB_DATASET_NAME, token=HF_TOKEN, private=True, revision = 'validated') #push annotations on the validated branch of the dataset
|
28 |
else:
|
29 |
print("SET HF_TOKEN and HUB_DATASET_NAME TO SYNC YOUR DATASET!!!")
|
30 |
else:
|
|
|
35 |
rg.init(api_key=api_key, workspace=workspace)
|
36 |
|
37 |
@staticmethod
|
38 |
+
def load_saved_data():
|
39 |
+
# load data from validated branch
|
40 |
try:
|
41 |
print(f"Trying to sync with {HUB_DATASET_NAME}")
|
42 |
+
old_ds = load_dataset(HUB_DATASET_NAME, split="train", use_auth_token=HF_TOKEN, revision='validated')
|
43 |
except Exception as e:
|
44 |
print(f"Not possible to sync with {HUB_DATASET_NAME}")
|
45 |
print(e)
|
46 |
old_ds = None
|
47 |
|
48 |
+
dataset = load_dataset({HUB_DATASET_NAME}, split="train", use_auth_token=HF_TOKEN, revision='main') # get starting file from the main branch of the dataset
|
49 |
|
50 |
|
51 |
if old_ds:
|
|
|
85 |
response = requests.get("http://0.0.0.0:6900/")
|
86 |
if response.status_code == 200:
|
87 |
ld = LoadDatasets(API_KEY)
|
88 |
+
ld.load_saved_data()
|
89 |
break
|
90 |
|
91 |
except requests.exceptions.ConnectionError:
|