File size: 3,120 Bytes
6d6662e
d30410b
 
 
 
de0f093
 
 
d30410b
 
6d6662e
 
d30410b
6d6662e
d30410b
 
 
 
 
6d6662e
d30410b
 
6d6662e
d30410b
6d6662e
d30410b
313c3d8
 
 
 
 
 
e3b30bf
d30410b
6d6662e
 
 
 
 
 
 
 
 
d30410b
6d6662e
d30410b
 
 
 
 
6d6662e
 
 
 
 
 
d30410b
 
 
 
 
 
 
 
 
 
 
 
 
 
6d6662e
d30410b
6d6662e
d30410b
 
 
 
 
 
 
 
 
6d6662e
d30410b
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
from constants import EVAL_REQUESTS_PATH
from pathlib import Path
from huggingface_hub import HfApi, Repository

TOKEN_HUB = os.environ.get("TOKEN_HUB", None)
QUEUE_REPO = os.environ.get("QUEUE_REPO", None)
QUEUE_PATH = os.environ.get("QUEUE_PATH", None)

hf_api = HfApi(
    endpoint="https://huggingface.co",
    token=TOKEN_HUB,
)

# Language code for Persian
PERSIAN_LANGUAGE_CODE = "fa"

def load_all_info_from_dataset_hub():
    eval_queue_repo = None
    requested_models = []

    if TOKEN_HUB is None:
        raise ValueError("No Hugging Face token provided. Skipping evaluation requests and results.")

    print("Pulling evaluation requests and results.")

    # eval_queue_repo = Repository(
    #     local_dir=QUEUE_PATH,
    #     clone_from=QUEUE_REPO,
    #     use_auth_token=TOKEN_HUB,
    #     repo_type="dataset",
    # )
    # eval_queue_repo.git_pull()

    # Local directory where dataset repo is cloned + folder with eval requests
    directory = QUEUE_PATH / EVAL_REQUESTS_PATH
    requested_models = get_all_requested_models(directory)
    requested_models = [p.stem for p in requested_models]

    # Local directory where dataset repo is cloned
    csv_results = get_csv_with_results(QUEUE_PATH)
    if csv_results is None:
        raise ValueError("CSV results file not found.")

    return eval_queue_repo, requested_models, csv_results

def upload_file(requested_model_name, path_or_fileobj):
    dest_repo_file = Path(EVAL_REQUESTS_PATH) / path_or_fileobj.name
    dest_repo_file = str(dest_repo_file)
    hf_api.upload_file(
        path_or_fileobj=path_or_fileobj,
        path_in_repo=str(dest_repo_file),
        repo_id=QUEUE_REPO,
        token=TOKEN_HUB,
        repo_type="dataset",
        commit_message=f"Add {requested_model_name} to eval queue")

def get_all_requested_models(directory):
    directory = Path(directory)
    all_requested_models = list(directory.glob("*.txt"))
    return all_requested_models

def get_csv_with_results(directory):
    directory = Path(directory)
    all_csv_files = list(directory.glob("*.csv"))
    latest = [f for f in all_csv_files if f.stem.endswith("latest")]
    if len(latest) != 1:
        return None
    return latest[0]

def is_model_on_hub(model_name, revision="main") -> (bool, str):
    try:
        model_name = model_name.replace(" ", "")
        author = model_name.split("/")[0]
        model_id = model_name.split("/")[1]
        if len(author) == 0 or len(model_id) == 0:
            return False, "is not a valid model name. Please use the format `author/model_name`."
    except Exception as e:
        return False, "is not a valid model name. Please use the format `author/model_name`."

    try:
        models = list(hf_api.list_models(author=author, search=model_id))
        matched = [m.modelId for m in models if m.modelId == model_name]
        if len(matched) != 1:
            return False, "was not found on the hub!"
        else:
            return True, None
    except Exception as e:
        print(f"Could not get the model from the hub.: {e}")
        return False, "was not found on hub!"