File size: 3,944 Bytes
626eca0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import argparse
import json
import logging
import os
import tempfile
import zipfile
from datetime import datetime
from pathlib import Path
from typing import Optional, Union

import huggingface_hub

from relik.common.log import get_logger
from relik.common.utils import SAPIENZANLP_DATE_FORMAT, get_md5

logger = get_logger(level=logging.DEBUG)


def create_info_file(tmpdir: Path):
    logger.debug("Computing md5 of model.zip")
    md5 = get_md5(tmpdir / "model.zip")
    date = datetime.now().strftime(SAPIENZANLP_DATE_FORMAT)

    logger.debug("Dumping info.json file")
    with (tmpdir / "info.json").open("w") as f:
        json.dump(dict(md5=md5, upload_date=date), f, indent=2)


def zip_run(
    dir_path: Union[str, os.PathLike],
    tmpdir: Union[str, os.PathLike],
    zip_name: str = "model.zip",
) -> Path:
    logger.debug(f"zipping {dir_path} to {tmpdir}")
    # creates a zip version of the provided dir_path
    run_dir = Path(dir_path)
    zip_path = tmpdir / zip_name

    with zipfile.ZipFile(zip_path, "w") as zip_file:
        # fully zip the run directory maintaining its structure
        for file in run_dir.rglob("*.*"):
            if file.is_dir():
                continue

            zip_file.write(file, arcname=file.relative_to(run_dir))

    return zip_path


def upload(
    model_dir: Union[str, os.PathLike],
    model_name: str,
    organization: Optional[str] = None,
    repo_name: Optional[str] = None,
    commit: Optional[str] = None,
    archive: bool = False,
):
    token = huggingface_hub.HfFolder.get_token()
    if token is None:
        print(
            "No HuggingFace token found. You need to execute `huggingface-cli login` first!"
        )
        return

    repo_id = repo_name or model_name
    if organization is not None:
        repo_id = f"{organization}/{repo_id}"
    with tempfile.TemporaryDirectory() as tmpdir:
        api = huggingface_hub.HfApi()
        repo_url = api.create_repo(
            token=token,
            repo_id=repo_id,
            exist_ok=True,
        )
        repo = huggingface_hub.Repository(
            str(tmpdir), clone_from=repo_url, use_auth_token=token
        )

        tmp_path = Path(tmpdir)
        if archive:
            # otherwise we zip the model_dir
            logger.debug(f"Zipping {model_dir} to {tmp_path}")
            zip_run(model_dir, tmp_path)
            create_info_file(tmp_path)
        else:
            # if the user wants to upload a transformers model, we don't need to zip it
            # we just need to copy the files to the tmpdir
            logger.debug(f"Copying {model_dir} to {tmpdir}")
            os.system(f"cp -r {model_dir}/* {tmpdir}")

        # this method automatically puts large files (>10MB) into git lfs
        repo.push_to_hub(commit_message=commit or "Automatic push from sapienzanlp")


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "model_dir", help="The directory of the model you want to upload"
    )
    parser.add_argument("model_name", help="The model you want to upload")
    parser.add_argument(
        "--organization",
        help="the name of the organization where you want to upload the model",
    )
    parser.add_argument(
        "--repo_name",
        help="Optional name to use when uploading to the HuggingFace repository",
    )
    parser.add_argument(
        "--commit", help="Commit message to use when pushing to the HuggingFace Hub"
    )
    parser.add_argument(
        "--archive",
        action="store_true",
        help="""
            Whether to compress the model directory before uploading it.
            If True, the model directory will be zipped and the zip file will be uploaded.
            If False, the model directory will be uploaded as is.""",
    )
    return parser.parse_args()


def main():
    upload(**vars(parse_args()))


if __name__ == "__main__":
    main()