Spaces:
Running
on
A100
Running
on
A100
Merge branch apscheduler pr from 'main' into pr/30
Browse files- app.py +23 -2
- requirements.txt +1 -0
app.py
CHANGED
@@ -16,6 +16,8 @@ from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
|
|
16 |
from mergekit.config import MergeConfiguration
|
17 |
|
18 |
from clean_community_org import garbage_collect_empty_models
|
|
|
|
|
19 |
|
20 |
has_gpu = torch.cuda.is_available()
|
21 |
|
@@ -255,10 +257,29 @@ def extract(finetuned_model: str, base_model: str, rank: int, hf_token: str, rep
|
|
255 |
)
|
256 |
yield runner.log(f"Lora successfully uploaded to HF: {repo_url.repo_id}")
|
257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
|
259 |
with gr.Blocks() as demo:
|
260 |
gr.Markdown(MARKDOWN_DESCRIPTION)
|
261 |
-
|
|
|
262 |
with gr.Tabs():
|
263 |
with gr.TabItem("Merge Model"):
|
264 |
with gr.Row():
|
@@ -335,7 +356,7 @@ with gr.Blocks() as demo:
|
|
335 |
|
336 |
|
337 |
# Run garbage collection every hour to keep the community org clean.
|
338 |
-
# Empty models might
|
339 |
def _garbage_collect_every_hour():
|
340 |
while True:
|
341 |
try:
|
|
|
16 |
from mergekit.config import MergeConfiguration
|
17 |
|
18 |
from clean_community_org import garbage_collect_empty_models
|
19 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
20 |
+
from datetime import datetime, timezone
|
21 |
|
22 |
has_gpu = torch.cuda.is_available()
|
23 |
|
|
|
257 |
)
|
258 |
yield runner.log(f"Lora successfully uploaded to HF: {repo_url.repo_id}")
|
259 |
|
260 |
+
# This is workaround. As the space always getting stuck.
|
261 |
+
def _restart_space():
|
262 |
+
huggingface_hub.HfApi().restart_space(repo_id="arcee-ai/mergekit-gui", token=COMMUNITY_HF_TOKEN, factory_reboot=False)
|
263 |
+
# Run garbage collection every hour to keep the community org clean.
|
264 |
+
# Empty models might exists if the merge fails abruptly (e.g. if user leaves the Space).
|
265 |
+
def _garbage_remover():
|
266 |
+
try:
|
267 |
+
garbage_collect_empty_models(token=COMMUNITY_HF_TOKEN)
|
268 |
+
except Exception as e:
|
269 |
+
print("Error running garbage collection", e)
|
270 |
+
|
271 |
+
scheduler = BackgroundScheduler()
|
272 |
+
restart_space_job = scheduler.add_job(_restart_space, "interval", seconds=21600)
|
273 |
+
garbage_remover_job = scheduler.add_job(_garbage_remover, "interval", seconds=3600)
|
274 |
+
scheduler.start()
|
275 |
+
next_run_time_utc = restart_space_job.next_run_time.astimezone(timezone.utc)
|
276 |
+
|
277 |
+
NEXT_RESTART = f"Next Restart: {next_run_time_utc.strftime('%Y-%m-%d %H:%M:%S')} (UTC)"
|
278 |
|
279 |
with gr.Blocks() as demo:
|
280 |
gr.Markdown(MARKDOWN_DESCRIPTION)
|
281 |
+
gr.Markdown(NEXT_RESTART)
|
282 |
+
|
283 |
with gr.Tabs():
|
284 |
with gr.TabItem("Merge Model"):
|
285 |
with gr.Row():
|
|
|
356 |
|
357 |
|
358 |
# Run garbage collection every hour to keep the community org clean.
|
359 |
+
# Empty models might exists if the merge fails abruptly (e.g. if user leaves the Space).
|
360 |
def _garbage_collect_every_hour():
|
361 |
while True:
|
362 |
try:
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
torch
|
2 |
bitsandbytes
|
3 |
git+https://github.com/arcee-ai/mergekit.git
|
|
|
1 |
+
apscheduler
|
2 |
torch
|
3 |
bitsandbytes
|
4 |
git+https://github.com/arcee-ai/mergekit.git
|