Upload catalog.py with huggingface_hub
Browse files- catalog.py +39 -1
catalog.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1 |
import os
|
2 |
import re
|
|
|
3 |
from pathlib import Path
|
4 |
from typing import Optional
|
5 |
|
6 |
import requests
|
7 |
|
8 |
-
from .artifact import Artifact, Artifactory, reset_artifacts_cache
|
9 |
from .logging_utils import get_logger
|
|
|
10 |
from .version import version
|
11 |
|
12 |
logger = get_logger()
|
@@ -35,6 +37,7 @@ default_catalog_path = os.path.join(lib_dir, "catalog")
|
|
35 |
class LocalCatalog(Catalog):
|
36 |
name: str = "local"
|
37 |
location: str = default_catalog_path
|
|
|
38 |
|
39 |
def path(self, artifact_identifier: str):
|
40 |
assert (
|
@@ -92,6 +95,7 @@ class GithubCatalog(LocalCatalog):
|
|
92 |
repo = "unitxt"
|
93 |
repo_dir = "src/unitxt/catalog"
|
94 |
user = "IBM"
|
|
|
95 |
|
96 |
def prepare(self):
|
97 |
tag = version
|
@@ -133,3 +137,37 @@ def add_to_catalog(
|
|
133 |
artifact, name, overwrite=overwrite, verbose=verbose
|
134 |
) # remove collection (its actually the dir).
|
135 |
# verify name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import re
|
3 |
+
from collections import Counter
|
4 |
from pathlib import Path
|
5 |
from typing import Optional
|
6 |
|
7 |
import requests
|
8 |
|
9 |
+
from .artifact import Artifact, Artifactories, Artifactory, reset_artifacts_cache
|
10 |
from .logging_utils import get_logger
|
11 |
+
from .text_utils import print_dict
|
12 |
from .version import version
|
13 |
|
14 |
logger = get_logger()
|
|
|
37 |
class LocalCatalog(Catalog):
|
38 |
name: str = "local"
|
39 |
location: str = default_catalog_path
|
40 |
+
is_local: bool = True
|
41 |
|
42 |
def path(self, artifact_identifier: str):
|
43 |
assert (
|
|
|
95 |
repo = "unitxt"
|
96 |
repo_dir = "src/unitxt/catalog"
|
97 |
user = "IBM"
|
98 |
+
is_local: bool = False
|
99 |
|
100 |
def prepare(self):
|
101 |
tag = version
|
|
|
137 |
artifact, name, overwrite=overwrite, verbose=verbose
|
138 |
) # remove collection (its actually the dir).
|
139 |
# verify name
|
140 |
+
|
141 |
+
|
142 |
+
def get_local_catalogs_paths():
|
143 |
+
result = []
|
144 |
+
for artifactory in Artifactories():
|
145 |
+
if isinstance(artifactory, LocalCatalog):
|
146 |
+
if artifactory.is_local:
|
147 |
+
result.append(artifactory.location)
|
148 |
+
return result
|
149 |
+
|
150 |
+
|
151 |
+
def count_files_recursively(folder):
|
152 |
+
file_count = 0
|
153 |
+
for _, _, files in os.walk(folder):
|
154 |
+
file_count += len(files)
|
155 |
+
return file_count
|
156 |
+
|
157 |
+
|
158 |
+
def local_catalog_summary(catalog_path):
|
159 |
+
result = {}
|
160 |
+
|
161 |
+
for dir in os.listdir(catalog_path):
|
162 |
+
if os.path.isdir(os.path.join(catalog_path, dir)):
|
163 |
+
result[dir] = count_files_recursively(os.path.join(catalog_path, dir))
|
164 |
+
|
165 |
+
return result
|
166 |
+
|
167 |
+
|
168 |
+
def summary():
|
169 |
+
result = Counter()
|
170 |
+
for local_catalog_path in get_local_catalogs_paths():
|
171 |
+
result += Counter(local_catalog_summary(local_catalog_path))
|
172 |
+
print_dict(result)
|
173 |
+
return result
|