testbot commited on
Commit
63f0f28
β€’
1 Parent(s): 526c343
Files changed (1) hide show
  1. app.py +80 -1
app.py CHANGED
@@ -1,10 +1,13 @@
 
1
  import os
2
  import shutil
 
3
  from pathlib import Path
4
  from tempfile import TemporaryDirectory
 
5
 
6
  import gradio as gr
7
- from huggingface_hub import HfApi, ModelCard, scan_cache_dir
8
  from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError
9
 
10
  from convert import convert
@@ -15,6 +18,22 @@ try:
15
  except:
16
  MAX_REPO_SIZE = 24 * 1000 * 1000 * 1000
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  class Generator:
20
  # Taken from https://stackoverflow.com/a/34073559
@@ -29,6 +48,14 @@ class Generator:
29
  def run(
30
  token: str, model_id: str, precision: str, quantization: bool, destination: str
31
  ):
 
 
 
 
 
 
 
 
32
  _all_logs = []
33
 
34
  def _log(msg: str):
@@ -38,6 +65,14 @@ def run(
38
 
39
  if token == "" or model_id == "":
40
  yield _log("### Invalid input 🐞\n\nPlease fill a token and model_id.")
 
 
 
 
 
 
 
 
41
  return
42
  if destination == "":
43
  _log("Destination not provided. Will default to the initial repo.")
@@ -52,6 +87,14 @@ def run(
52
  yield _log(
53
  f"\n### Error 😒😒😒\n\nRepository {model_id} not found. Only public models are convertible at the moment."
54
  )
 
 
 
 
 
 
 
 
55
  return
56
 
57
  try:
@@ -64,6 +107,14 @@ def run(
64
  yield _log(
65
  f"### Unprocessable 😒😒😒\n\nModel {model_id} is too big and cannot be processed in this Space. This Space needs to be able to load the model in memory before converting it. To avoid a memory issue, we do not process models bigger than {MAX_REPO_SIZE}b.\n\nYou have 2 options:\n- [Duplicate this Space](https://huggingface.co/spaces/Wauplin/bloomz.cpp-converter?duplicate=true) and assign a bigger machine. You will need to set 'MAX_REPO_SIZE' as a secret to overwrite the default value. Once you are done, remove the upgraded hardware and/or delete the Space.\n- Manually convert the weights by following [this guide](https://github.com/NouamaneTazi/bloomz.cpp#usage)."
66
  )
 
 
 
 
 
 
 
 
67
  return
68
 
69
  with TemporaryDirectory() as cache_folder:
@@ -130,10 +181,26 @@ def run(
130
  yield _log(
131
  f"### Success πŸ”₯\n\nYay! This model was successfully converted! Make sure to let the repo owner know about it and review your PR. You might need to complete the PR manually, especially to add information in the model card."
132
  )
 
 
 
 
 
 
 
 
133
  shutil.rmtree(model_path.parent)
134
  _delete_cache()
135
  return
136
  except Exception as e:
 
 
 
 
 
 
 
 
137
  yield _log(f"### Error 😒😒😒\n\n{e}")
138
  _delete_cache()
139
  return
@@ -147,6 +214,18 @@ def _delete_cache():
147
  )
148
 
149
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  TITLE = """
151
  <h1 style="font-weight: 900; font-size: 32px; margin-bottom: 10px; margin-top: 10px; text-align: center;">
152
  Make any BLOOM-like model compatible with bloomz.cpp
 
1
+ import csv
2
  import os
3
  import shutil
4
+ from datetime import datetime
5
  from pathlib import Path
6
  from tempfile import TemporaryDirectory
7
+ from typing import Optional
8
 
9
  import gradio as gr
10
+ from huggingface_hub import HfApi, ModelCard, Repository, scan_cache_dir
11
  from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError
12
 
13
  from convert import convert
 
18
  except:
19
  MAX_REPO_SIZE = 24 * 1000 * 1000 * 1000
20
 
21
+ # Used to log Space usage
22
+ # Taken from https://huggingface.co/spaces/onnx/export
23
+ DATASET_REPO_ID = "Wauplin/bloom.cpp-converters"
24
+ DATASET_LOCAL_DIR = "usage_data"
25
+ DATASET_LOCAL_FILE = Path(DATASET_LOCAL_DIR) / "data" / "data.csv"
26
+ HF_TOKEN = os.environ.get("HF_TOKEN")
27
+
28
+ repo: Optional[Repository] = None
29
+ if HF_TOKEN:
30
+ repo = Repository(
31
+ local_dir=DATASET_LOCAL_DIR,
32
+ clone_from=DATASET_REPO_ID,
33
+ repo_type="dataset",
34
+ token=HF_TOKEN,
35
+ )
36
+
37
 
38
  class Generator:
39
  # Taken from https://stackoverflow.com/a/34073559
 
48
  def run(
49
  token: str, model_id: str, precision: str, quantization: bool, destination: str
50
  ):
51
+ _log_usage(
52
+ status="start",
53
+ model_id=model_id,
54
+ precision=precision,
55
+ quantization=quantization,
56
+ destination=destination,
57
+ pr_url=None,
58
+ )
59
  _all_logs = []
60
 
61
  def _log(msg: str):
 
65
 
66
  if token == "" or model_id == "":
67
  yield _log("### Invalid input 🐞\n\nPlease fill a token and model_id.")
68
+ _log_usage(
69
+ status="invalid input",
70
+ model_id=model_id,
71
+ precision=precision,
72
+ quantization=quantization,
73
+ destination=destination,
74
+ pr_url=None,
75
+ )
76
  return
77
  if destination == "":
78
  _log("Destination not provided. Will default to the initial repo.")
 
87
  yield _log(
88
  f"\n### Error 😒😒😒\n\nRepository {model_id} not found. Only public models are convertible at the moment."
89
  )
90
+ _log_usage(
91
+ status="model not found",
92
+ model_id=model_id,
93
+ precision=precision,
94
+ quantization=quantization,
95
+ destination=destination,
96
+ pr_url=None,
97
+ )
98
  return
99
 
100
  try:
 
107
  yield _log(
108
  f"### Unprocessable 😒😒😒\n\nModel {model_id} is too big and cannot be processed in this Space. This Space needs to be able to load the model in memory before converting it. To avoid a memory issue, we do not process models bigger than {MAX_REPO_SIZE}b.\n\nYou have 2 options:\n- [Duplicate this Space](https://huggingface.co/spaces/Wauplin/bloomz.cpp-converter?duplicate=true) and assign a bigger machine. You will need to set 'MAX_REPO_SIZE' as a secret to overwrite the default value. Once you are done, remove the upgraded hardware and/or delete the Space.\n- Manually convert the weights by following [this guide](https://github.com/NouamaneTazi/bloomz.cpp#usage)."
109
  )
110
+ _log_usage(
111
+ status="unprocessable",
112
+ model_id=model_id,
113
+ precision=precision,
114
+ quantization=quantization,
115
+ destination=destination,
116
+ pr_url=None,
117
+ )
118
  return
119
 
120
  with TemporaryDirectory() as cache_folder:
 
181
  yield _log(
182
  f"### Success πŸ”₯\n\nYay! This model was successfully converted! Make sure to let the repo owner know about it and review your PR. You might need to complete the PR manually, especially to add information in the model card."
183
  )
184
+ _log_usage(
185
+ status="success",
186
+ model_id=model_id,
187
+ precision=precision,
188
+ quantization=quantization,
189
+ destination=destination,
190
+ pr_url=pr_url,
191
+ )
192
  shutil.rmtree(model_path.parent)
193
  _delete_cache()
194
  return
195
  except Exception as e:
196
+ _log_usage(
197
+ status="error",
198
+ model_id=model_id,
199
+ precision=precision,
200
+ quantization=quantization,
201
+ destination=destination,
202
+ pr_url=None,
203
+ )
204
  yield _log(f"### Error 😒😒😒\n\n{e}")
205
  _delete_cache()
206
  return
 
214
  )
215
 
216
 
217
+ def _log_usage(**kwargs):
218
+ # save in a private dataset
219
+ # Taken from https://huggingface.co/spaces/onnx/export
220
+ if repo is not None:
221
+ repo.git_pull(rebase=True)
222
+ with DATASET_LOCAL_FILE.open("a") as csv_file:
223
+ writer = csv.DictWriter(csv_file, fieldnames=["time"] + list(kwargs.keys()))
224
+ writer.writerow({"time": str(datetime.now()), **kwargs})
225
+ commit_url = repo.push_to_hub()
226
+ print("[dataset]", commit_url)
227
+
228
+
229
  TITLE = """
230
  <h1 style="font-weight: 900; font-size: 32px; margin-bottom: 10px; margin-top: 10px; text-align: center;">
231
  Make any BLOOM-like model compatible with bloomz.cpp