Spaces:

huggingface-projects
/

auto-retrain

Runtime error

App Files Files Community

auto-retrain / src /main.py

sbrandeis HF staff

✨ Implement Auto-Retrain

55d6386 verified over 1 year ago

raw

history blame contribute delete

No virus

3.7 kB

	import os
	import requests
	from typing import Optional

	from fastapi import FastAPI, Header, HTTPException, BackgroundTasks
	from fastapi.responses import FileResponse
	from huggingface_hub.hf_api import HfApi

	from .models import config, WebhookPayload

	WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET")
	HF_ACCESS_TOKEN = os.getenv("HF_ACCESS_TOKEN")
	AUTOTRAIN_API_URL = "https://api.autotrain.huggingface.co"
	AUTOTRAIN_UI_URL = "https://ui.autotrain.huggingface.co"


	app = FastAPI()

	@app.get("/")
	async def home():
	return FileResponse("home.html")

	@app.post("/webhook")
	async def post_webhook(
	payload: WebhookPayload,
	task_queue: BackgroundTasks,
	x_webhook_secret: Optional[str] = Header(default=None),
	):
	if x_webhook_secret is None:
	raise HTTPException(401)
	if x_webhook_secret != WEBHOOK_SECRET:
	raise HTTPException(403)
	if not (
	payload.event.action == "update"
	and payload.event.scope.startswith("repo.content")
	and payload.repo.name == config.input_dataset
	and payload.repo.type == "dataset"
	):
	# no-op
	return {"processed": False}

	task_queue.add_task(
	schedule_retrain,
	payload
	)

	return {"processed": True}


	def schedule_retrain(payload: WebhookPayload):
	# Create the autotrain project
	try:
	project = AutoTrain.create_project(payload)
	AutoTrain.add_data(project_id=project["id"])
	AutoTrain.start_processing(project_id=project["id"])
	except requests.HTTPError as err:
	print("ERROR while requesting AutoTrain API:")
	print(f" code: {err.response.status_code}")
	print(f" {err.response.json()}")
	raise
	# Notify in the community tab
	notify_success(project["id"])

	return {"processed": True}


	class AutoTrain:
	@staticmethod
	def create_project(payload: WebhookPayload) -> dict:
	project_resp = requests.post(
	f"{AUTOTRAIN_API_URL}/projects/create",
	json={
	"username": config.target_namespace,
	"proj_name": f"{config.autotrain_project_prefix}-{payload.repo.headSha[:7]}",
	"task": 18, # image-multi-class-classification
	"config": {
	"hub-model": config.input_model,
	"max_models": 1,
	"language": "unk",
	}
	},
	headers={
	"Authorization": f"Bearer {HF_ACCESS_TOKEN}"
	}
	)
	project_resp.raise_for_status()
	return project_resp.json()

	@staticmethod
	def add_data(project_id:int):
	requests.post(
	f"{AUTOTRAIN_API_URL}/projects/{project_id}/data/dataset",
	json={
	"dataset_id": config.input_dataset,
	"dataset_split": "train",
	"split": 4,
	"col_mapping": {
	"image": "image",
	"label": "target",
	}
	},
	headers={
	"Authorization": f"Bearer {HF_ACCESS_TOKEN}",
	}
	).raise_for_status()

	@staticmethod
	def start_processing(project_id: int):
	resp = requests.post(
	f"{AUTOTRAIN_API_URL}/projects/{project_id}/data/start_processing",
	headers={
	"Authorization": f"Bearer {HF_ACCESS_TOKEN}",
	}
	)
	resp.raise_for_status()
	return resp


	def notify_success(project_id: int):
	message = NOTIFICATION_TEMPLATE.format(
	input_model=config.input_model,
	input_dataset=config.input_dataset,
	project_id=project_id,
	ui_url=AUTOTRAIN_UI_URL,
	)
	return HfApi(token=HF_ACCESS_TOKEN).create_discussion(
	repo_id=config.input_dataset,
	repo_type="dataset",
	title="✨ Retraining started!",
	description=message,
	token=HF_ACCESS_TOKEN,
	)

	NOTIFICATION_TEMPLATE = """\
	🌸 Hello there!

	Following an update of [{input_dataset}](https://huggingface.co/datasets/{input_dataset}), an automatic re-training of [{input_model}](https://huggingface.co/{input_model}) has been scheduled on AutoTrain!

	Please review and approve the project [here]({ui_url}/{project_id}/trainings) to start the training job.

	(This is an automated message)
	"""