from __future__ import annotations import yaml import json import gradio as gr from huggingface_hub import whoami, HfApi from src.envs import TOKEN, REPO_ID import os, time, threading, logging from gradio_client import Client SPACE_APP_URL = f"https://{REPO_ID.replace('/', '-')}.hf.space/" CHECK_INTERVAL = 600 API_TIMEOUT = 300 def get_profile(profile: gr.OAuthProfile | None) -> str: if profile is None: return "Anonymous" return profile.username def get_organizations(oauth_token: gr.OAuthToken | None) -> str: if oauth_token is None: return "No Organization" org_names = [org["name"] for org in whoami(oauth_token.token)["orgs"]] return org_names def get_profile_and_organizations(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> tuple[str, str]: if profile is None: output_profile = "Anonymous" else: output_profile = profile.username if oauth_token is None: output_org = "No Organization" else: output_org = [org["name"] for org in whoami(oauth_token.token)["orgs"]] return output_profile, output_org def download_with_restart(snapshot_download_func, repo_id, local_dir, repo_type, token, restart_func): try: snapshot_download_func( repo_id=repo_id, local_dir=local_dir, repo_type=repo_type, tqdm_class=None, etag_timeout=30, token=token ) except Exception: restart_func() api = HfApi(token=TOKEN) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s") def api_ok() -> bool: try: client = Client(SPACE_APP_URL, hf_token=TOKEN) resp = client.predict(api_name="/health") logging.info(f"/health response: {resp}") val = resp[0] if isinstance(resp, (list, tuple)) else resp return str(val).strip().lower() == "ok" except Exception as e: logging.warning(f"health API call failed: {e}") return False def restart_with_fallback(): logging.error("Space looks unhealthy → restarting...") api.restart_space(repo_id=REPO_ID, factory_reboot=False) time.sleep(300) if not api_ok(): logging.error("Still unhealthy after restart → factory rebuild...") api.restart_space(repo_id=REPO_ID, factory_reboot=True) def watchdog(): backoff = 1 while True: try: if not api_ok(): restart_with_fallback() backoff = 1 else: logging.info("Space healthy via /health.") backoff = 1 except Exception as e: logging.exception(e) backoff = min(backoff * 2, 300) finally: time.sleep(CHECK_INTERVAL if backoff == 1 else backoff) def start_watchdog_in_background(): t = threading.Thread(target=watchdog, daemon=True) t.start()