File size: 1,819 Bytes
839621c
3c9d064
 
839621c
cdb761d
839621c
 
 
 
 
 
 
 
 
 
 
 
 
cdb761d
8c543d4
cdb761d
8c543d4
 
 
cdb761d
8c543d4
 
cdb761d
 
 
 
 
8c543d4
cdb761d
 
839621c
 
 
cdb761d
 
 
839621c
cdb761d
 
839621c
cdb761d
f92d1a9
cdb761d
 
 
 
 
 
 
 
 
f92d1a9
 
cdb761d
 
 
 
 
 
839621c
3c9d064
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import json
from tempfile import mktemp


from huggingface_hub import duplicate_space, HfApi


hf_api = HfApi()


def setup_dataset_on_hub(repo_id, hub_token):
    # create an empty dataset repo on the hub
    hf_api.create_repo(
        repo_id=repo_id,
        token=hub_token,
        repo_type="dataset",
    )

    # upload the seed data
    hf_api.upload_file(
        path_or_fileobj="seed_data.json",
        path_in_repo="seed_data.json",
        repo_id=repo_id,
        repo_type="dataset",
        token=hub_token,
    )


def duplicate_space_on_hub(source_repo, target_repo, hub_token, private=False):
    duplicate_space(
        from_id=source_repo,
        to_id=target_repo,
        token=hub_token,
        private=private,
        exist_ok=True,
    )


def add_project_config_to_space_repo(
    dataset_repo_id,
    hub_token,
    project_name,
    argilla_space_repo_id,
    project_space_repo_id,
):
    #  upload the seed data and readme to the hub

    with open("project_config.json", "w") as f:
        json.dump(
            {
                "project_name": project_name,
                "argilla_space_repo_id": argilla_space_repo_id,
                "project_space_repo_id": project_space_repo_id,
                "dataset_repo_id": dataset_repo_id,
            },
            f,
        )

    hf_api.upload_file(
        path_or_fileobj="project_config.json",
        path_in_repo="project_config.json",
        token=hub_token,
        repo_id=project_space_repo_id,
        repo_type="space",
    )


def pull_seed_data_from_repo(repo_id, hub_token):
    tempfile_path = mktemp()
    # pull the dataset repo from the hub
    hf_api.hf_hub_download(
        repo_id=repo_id, token=hub_token, repo_type="dataset", filename=tempfile_path
    )
    return json.load(open(tempfile_path))