Spaces:

huggingface-projects
/

auto-retrain

Runtime error

App Files Files Community

sbrandeis HF staff commited on Jan 20, 2023

Commit

55d6386

•

1 Parent(s): 583a982

✨ Implement Auto-Retrain

Browse files

Files changed (9) hide show

.gitignore +3 -0
Dockerfile +16 -0
README.md +2 -2
config.json +6 -0
home.html +16 -0
requirements.txt +4 -0
src/main.py +142 -0
src/models.py +28 -0
style.css +28 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.venv
+.vscode
+__pycache__

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . .
+CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: Actvie Learning Webhook
-emoji: 🏢
 colorFrom: yellow
 colorTo: red
 sdk: docker

 ---
+title: Auto Re-Train
+emoji: ♻
 colorFrom: yellow
 colorTo: red
 sdk: docker

config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "target_namespace": "sbrandeis-test-org",
+    "input_dataset": "sbrandeis-test-org/input-dataset",
+    "input_model": "microsoft/resnet-50",
+    "autotrain_project_prefix": "auto-retrain-"
+}

home.html ADDED Viewed

	@@ -0,0 +1,16 @@

+<!DOCTYPE html>
+<html>
+	<head>
+		<meta charset="utf-8" />
+		<meta name="viewport" content="width=device-width" />
+		<title>Auto Re-Train</title>
+		<link rel="stylesheet" href="style.css" />
+	</head>
+    <body>
+        <div class="card">
+            <h1>Auto Re-Train webhook</h1>
+            <p>This is a webhook space to auto-retrain on model when a dataset changes.</p>
+        </div>
+    </body>
+</html>

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+fastapi==0.74.*
+requests==2.27.*
+huggingface_hub==0.11.*
+uvicorn[standard]==0.17.*

src/main.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import os
+import requests
+from typing import Optional
+from fastapi import FastAPI, Header, HTTPException, BackgroundTasks
+from fastapi.responses import FileResponse
+from huggingface_hub.hf_api import HfApi
+from .models import config, WebhookPayload
+WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET")
+HF_ACCESS_TOKEN = os.getenv("HF_ACCESS_TOKEN")
+AUTOTRAIN_API_URL = "https://api.autotrain.huggingface.co"
+AUTOTRAIN_UI_URL = "https://ui.autotrain.huggingface.co"
+app = FastAPI()
+@app.get("/")
+async def home():
+	return FileResponse("home.html")
+@app.post("/webhook")
+async def post_webhook(
+		payload: WebhookPayload,
+		task_queue: BackgroundTasks,
+		x_webhook_secret:  Optional[str] = Header(default=None),
+	):
+	if x_webhook_secret is None:
+		raise HTTPException(401)
+	if x_webhook_secret != WEBHOOK_SECRET:
+		raise HTTPException(403)
+	if not (
+		payload.event.action == "update"
+		and payload.event.scope.startswith("repo.content")
+		and payload.repo.name == config.input_dataset
+		and payload.repo.type == "dataset"
+	):
+		# no-op
+		return {"processed": False}
+	task_queue.add_task(
+		schedule_retrain,
+		payload
+	)
+	return {"processed": True}
+def schedule_retrain(payload: WebhookPayload):
+	# Create the autotrain project
+	try:
+		project = AutoTrain.create_project(payload)
+		AutoTrain.add_data(project_id=project["id"])
+		AutoTrain.start_processing(project_id=project["id"])
+	except requests.HTTPError as err:
+		print("ERROR while requesting AutoTrain API:")
+		print(f"  code: {err.response.status_code}")
+		print(f"  {err.response.json()}")
+		raise
+	# Notify in the community tab
+	notify_success(project["id"])
+	return {"processed": True}
+class AutoTrain:
+	@staticmethod
+	def create_project(payload: WebhookPayload) -> dict:
+		project_resp = requests.post(
+			f"{AUTOTRAIN_API_URL}/projects/create",
+			json={
+				"username": config.target_namespace,
+				"proj_name": f"{config.autotrain_project_prefix}-{payload.repo.headSha[:7]}",
+				"task": 18, # image-multi-class-classification
+				"config": {
+					"hub-model": config.input_model,
+					"max_models": 1,
+					"language": "unk",
+				}
+			},
+			headers={
+				"Authorization": f"Bearer {HF_ACCESS_TOKEN}"
+			}
+		)
+		project_resp.raise_for_status()
+		return project_resp.json()
+	@staticmethod
+	def add_data(project_id:int):
+		requests.post(
+			f"{AUTOTRAIN_API_URL}/projects/{project_id}/data/dataset",
+			json={
+				"dataset_id": config.input_dataset,
+				"dataset_split": "train",
+				"split": 4,
+				"col_mapping": {
+					"image": "image",
+					"label": "target",
+				}
+			},
+			headers={
+				"Authorization": f"Bearer {HF_ACCESS_TOKEN}",
+			}
+		).raise_for_status()
+	@staticmethod
+	def start_processing(project_id: int):
+		resp = requests.post(
+			f"{AUTOTRAIN_API_URL}/projects/{project_id}/data/start_processing",
+			headers={
+				"Authorization": f"Bearer {HF_ACCESS_TOKEN}",
+			}
+		)
+		resp.raise_for_status()
+		return resp
+def notify_success(project_id: int):
+	message = NOTIFICATION_TEMPLATE.format(
+		input_model=config.input_model,
+		input_dataset=config.input_dataset,
+		project_id=project_id,
+		ui_url=AUTOTRAIN_UI_URL,
+	)
+	return HfApi(token=HF_ACCESS_TOKEN).create_discussion(
+		repo_id=config.input_dataset,
+		repo_type="dataset",
+		title="✨ Retraining started!",
+		description=message,
+		token=HF_ACCESS_TOKEN,
+	)
+NOTIFICATION_TEMPLATE = """\
+🌸 Hello there!
+Following an update of [{input_dataset}](https://huggingface.co/datasets/{input_dataset}), an automatic re-training of [{input_model}](https://huggingface.co/{input_model}) has been scheduled on AutoTrain!
+Please review and approve the project [here]({ui_url}/{project_id}/trainings) to start the training job.
+(This is an automated message)
+"""

src/models.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import os
+from pydantic import BaseModel
+from typing import Literal
+class Config(BaseModel):
+	target_namespace: str
+	input_dataset: str
+	input_model: str
+	autotrain_project_prefix: str
+class WebhookPayloadEvent(BaseModel):
+	action: Literal["create", "update", "delete"]
+	scope: str
+class WebhookPayloadRepo(BaseModel):
+	type: Literal["dataset", "model", "space"]
+	name: str
+	id: str
+	private: bool
+	headSha: str
+class WebhookPayload(BaseModel):
+	event: WebhookPayloadEvent
+	repo: WebhookPayloadRepo
+config = Config.parse_file(os.path.join(os.getcwd(), "config.json"))

style.css ADDED Viewed

	@@ -0,0 +1,28 @@

+body {
+	padding: 2rem;
+	font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
+}
+h1 {
+	font-size: 16px;
+	margin-top: 0;
+}
+p {
+	color: rgb(107, 114, 128);
+	font-size: 15px;
+	margin-bottom: 10px;
+	margin-top: 5px;
+}
+.card {
+	max-width: 620px;
+	margin: 0 auto;
+	padding: 16px;
+	border: 1px solid lightgray;
+	border-radius: 16px;
+}
+.card p:last-child {
+	margin-bottom: 0;
+}