File size: 1,956 Bytes
dcafc9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from pathlib import Path
from threading import Thread
from cryptography.fernet import Fernet
import torch
import io


class HFPetDatasetManager(Thread):
    def __init__(self, ds_name, hf_token, queue, local_path='collected'):
        Thread.__init__(self)
        self.queue = queue
        import huggingface_hub
        repo_id = huggingface_hub.get_full_repo_name(
            ds_name, token=hf_token
        )
        self.path_to_dataset_repo = huggingface_hub.create_repo(
            repo_id=repo_id,
            token=hf_token,
            private=True,
            repo_type="dataset",
            exist_ok=True,
        )
        self.repo = huggingface_hub.Repository(
            local_dir=local_path,
            clone_from=self.path_to_dataset_repo,
            use_auth_token=hf_token,
        )
        self.repo.git_pull()
        self.mistakes_dir = Path(local_path) / "mistakes"
        self.normal_dir = Path(local_path) / "normal"

        self.true_different_dir = self.normal_dir / "different"
        self.true_same_dir = self.normal_dir / "same"

        self.false_different_dir = self.mistakes_dir / "different"
        self.false_same_dir = self.mistakes_dir / "same"

        self.true_same_dir.mkdir(parents=True, exist_ok=True)
        self.true_different_dir.mkdir(parents=True, exist_ok=True)
        self.false_same_dir.mkdir(parents=True, exist_ok=True)
        self.false_different_dir.mkdir(parents=True, exist_ok=True)

    def run(self):
        while True:
            _signal = self.queue.get()
            self.repo.git_pull()
            self.repo.push_to_hub(commit_message=f"Upload data changes...")
            print('Changes pushed to dataset!')


def load_enc_cls_model(file_name, key):
    with open(file_name, "rb") as f:
        data = f.read()
    fernet = Fernet(key)
    decrypted_data = fernet.decrypt(data)
    decrypted_bytes = io.BytesIO(decrypted_data)
    return torch.jit.load(decrypted_bytes)