File size: 1,658 Bytes
06f5c28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os, shutil
os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
import time
from fastcore.utils import *
from datetime import datetime
from huggingface_hub import snapshot_download, upload_folder, create_repo, repo_exists, whoami

__all__ = ['download', 'upload', 'setup_hf_backup']
def _token(): return os.getenv("HF_TOKEN")

def get_cfg():
    return Config('.', 'config.ini',
                  types=dict(dataset_id=str, db_dir=str, private_backup=bool, interval=int),
                  create=dict(dataset_id='space-backup', db_dir='data', private_backup=True, interval=15))

def get_dataset_id(cfg):
    did = cfg.dataset_id
    if "/" in did or _token() is None: return did
    return f"{whoami(_token())['name']}/{did}"

def download():
    cfg = get_cfg()
    did = get_dataset_id(cfg)
    upload_on_schedule()
    if os.getenv("SPACE_ID") and repo_exists(did, repo_type="dataset", token=_token()):
        cache_path = snapshot_download(repo_id=did, repo_type='dataset', token=_token())
        shutil.copytree(cache_path, cfg.db_dir, dirs_exist_ok=True)

def upload():
    cfg = get_cfg()
    if not os.getenv("SPACE_ID"): return
    did = get_dataset_id(cfg)
    create_repo(did, token=_token(), private=cfg.private_backup, repo_type='dataset', exist_ok=True)
    upload_folder(folder_path=cfg.db_dir, token=_token(), repo_id=did,
                  repo_type='dataset', commit_message=f"backup {datetime.now()}")


@threaded
def upload_on_schedule():
    cfg = get_cfg()
    while True:
        time.sleep(cfg.interval*60)
        upload()


def setup_hf_backup(app):
    app.on_event("startup")(download)
    app.on_event("shutdown")(upload)