diffusers-gallery-bot

Sleeping

App Files Files Community

dpredrag

radames HF staff commited on Mar 12, 2023

Commit

e4724c0

•

0 Parent(s):

Duplicate from huggingface-projects/diffusers-gallery-bot

Browse files

Co-authored-by: Radamés Ajna <radames@users.noreply.huggingface.co>

Files changed (10) hide show

.gitattributes +34 -0
.gitignore +22 -0
Dockerfile +25 -0
README.md +12 -0
app.py +330 -0
classifier.py +70 -0
db.py +36 -0
packages.txt +1 -0
requirements.txt +10 -0
schema.sql +13 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,22 @@

+.DS_Store
+node_modules
+/build
+/.svelte-kit
+/package
+.env
+.env.*
+!.env.example
+# Ignore files for PNPM, NPM and YARN
+pnpm-lock.yaml
+package-lock.json
+yarn.lock
+venv/
+__pycache__/
+flagged/
+data
+data.db
+data.json
+rooms_data.db
+sd-multiplayer-data/
+diffusers-gallery-data/

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Git LFS
+RUN apt-get update && apt-get install -y git-lfs
+RUN git lfs install
+# User
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME /home/user
+ENV PATH $HOME/.local/bin:$PATH
+WORKDIR $HOME
+RUN mkdir app
+WORKDIR $HOME/app
+COPY . $HOME/app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Diffusers Gallery Bot
+emoji: 🤖
+colorFrom: red
+colorTo: indigo
+sdk: docker
+app_port: 7860
+pinned: false
+duplicated_from: huggingface-projects/diffusers-gallery-bot
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,330 @@

+from enum import Enum
+import os
+import re
+import aiohttp
+import requests
+import json
+import subprocess
+import asyncio
+from io import BytesIO
+import uuid
+from math import ceil
+from tqdm import tqdm
+from pathlib import Path
+from huggingface_hub import Repository
+from PIL import Image, ImageOps
+from fastapi import FastAPI, BackgroundTasks
+from fastapi.responses import HTMLResponse
+from fastapi_utils.tasks import repeat_every
+from fastapi.middleware.cors import CORSMiddleware
+import boto3
+from datetime import datetime
+from db import Database
+AWS_ACCESS_KEY_ID = os.getenv('MY_AWS_ACCESS_KEY_ID')
+AWS_SECRET_KEY = os.getenv('MY_AWS_SECRET_KEY')
+AWS_S3_BUCKET_NAME = os.getenv('MY_AWS_S3_BUCKET_NAME')
+HF_TOKEN = os.environ.get("HF_TOKEN")
+S3_DATA_FOLDER = Path("sd-multiplayer-data")
+DB_FOLDER = Path("diffusers-gallery-data")
+CLASSIFIER_URL = "https://radames-aesthetic-style-nsfw-classifier.hf.space/run/inference"
+ASSETS_URL = "https://d26smi9133w0oo.cloudfront.net/diffusers-gallery/"
+s3 = boto3.client(service_name='s3',
+                  aws_access_key_id=AWS_ACCESS_KEY_ID,
+                  aws_secret_access_key=AWS_SECRET_KEY)
+repo = Repository(
+    local_dir=DB_FOLDER,
+    repo_type="dataset",
+    clone_from="huggingface-projects/diffusers-gallery-data",
+    use_auth_token=True,
+)
+repo.git_pull()
+database = Database(DB_FOLDER)
+async def upload_resize_image_url(session, image_url):
+    print(f"Uploading image {image_url}")
+    try:
+        async with session.get(image_url) as response:
+            if response.status == 200 and (response.headers['content-type'].startswith('image') or response.headers['content-type'].startswith('application')):
+                image = Image.open(BytesIO(await response.read())).convert('RGB')
+                # resize image proportional
+                image = ImageOps.fit(image, (400, 400), Image.LANCZOS)
+                image_bytes = BytesIO()
+                image.save(image_bytes, format="JPEG")
+                image_bytes.seek(0)
+                fname = f'{uuid.uuid4()}.jpg'
+                s3.upload_fileobj(Fileobj=image_bytes, Bucket=AWS_S3_BUCKET_NAME, Key="diffusers-gallery/" + fname,
+                                  ExtraArgs={"ContentType": "image/jpeg", "CacheControl": "max-age=31536000"})
+                return fname
+    except Exception as e:
+        print(f"Error uploading image {image_url}: {e}")
+        return None
+def fetch_models(page=0):
+    response = requests.get(
+        f'https://huggingface.co/models-json?pipeline_tag=text-to-image&p={page}')
+    data = response.json()
+    return {
+        "models": [model for model in data['models'] if not model['private']],
+        "numItemsPerPage": data['numItemsPerPage'],
+        "numTotalItems": data['numTotalItems'],
+        "pageIndex": data['pageIndex']
+    }
+def fetch_model_card(model_id):
+    response = requests.get(
+        f'https://huggingface.co/{model_id}/raw/main/README.md')
+    return response.text
+async def find_image_in_model_card(text):
+    image_regex = re.compile(r'https?://\S+(?:png|jpg|jpeg|webp)')
+    urls = re.findall(image_regex, text)
+    if not urls:
+        return []
+    async with aiohttp.ClientSession() as session:
+        tasks = [asyncio.ensure_future(upload_resize_image_url(
+            session, image_url)) for image_url in urls[0:3]]
+        return await asyncio.gather(*tasks)
+def run_classifier(images):
+    images = [i for i in images if i is not None]
+    if len(images) > 0:
+        # classifying only the first image
+        images_urls = [ASSETS_URL + images[0]]
+        response = requests.post(CLASSIFIER_URL, json={"data": [
+            {"urls": images_urls},  # json urls: list of images urls
+            False,  # enable/disable gallery image output
+            None,  # single image input
+            None,  # files input
+        ]}).json()
+        # data response is array data:[[{img0}, {img1}, {img2}...], Label, Gallery],
+        class_data = response['data'][0][0]
+        class_data_parsed = {row['label']: round(
+            row['score'], 3) for row in class_data}
+        # update row data with classificator data
+        return class_data_parsed
+    else:
+        return {}
+async def get_all_new_models():
+    initial = fetch_models(0)
+    num_pages = ceil(initial['numTotalItems'] / initial['numItemsPerPage'])
+    print(
+        f"Total items: {initial['numTotalItems']} - Items per page: {initial['numItemsPerPage']}")
+    print(f"Found {num_pages} pages")
+    # fetch all models
+    new_models = []
+    for page in tqdm(range(0, num_pages)):
+        print(f"Fetching page {page} of {num_pages}")
+        page_models = fetch_models(page)
+        new_models += page_models['models']
+    return new_models
+async def sync_data():
+    print("Fetching models")
+    repo.git_pull()
+    all_models = await get_all_new_models()
+    print(f"Found {len(all_models)} models")
+    # save list of all models for ids
+    with open(DB_FOLDER / "models.json", "w") as f:
+        json.dump(all_models, f)
+    # with open(DB_FOLDER / "models.json", "r") as f:
+    #     new_models = json.load(f)
+    new_models_ids = [model['id'] for model in all_models]
+    # get existing models
+    with database.get_db() as db:
+        cursor = db.cursor()
+        cursor.execute("SELECT id FROM models")
+        existing_models = [row['id'] for row in cursor.fetchall()]
+    models_ids_to_add = list(set(new_models_ids) - set(existing_models))
+    # find all models id to add from new_models
+    models = [model for model in all_models if model['id'] in models_ids_to_add]
+    print(f"Found {len(models)} new models")
+    for model in tqdm(models):
+        model_id = model['id']
+        likes = model['likes']
+        downloads = model['downloads']
+        model_card = fetch_model_card(model_id)
+        images = await find_image_in_model_card(model_card)
+        classifier = run_classifier(images)
+        print(images, classifier)
+        # update model row with image and classifier data
+        with database.get_db() as db:
+            cursor = db.cursor()
+            cursor.execute("INSERT INTO models(id, data, likes, downloads) VALUES (?, ?, ?, ?)",
+                           [model_id,
+                            json.dumps({
+                                **model,
+                                "images": images,
+                                "class": classifier
+                            }),
+                            likes,
+                            downloads
+                            ])
+            db.commit()
+    print("Try to update images again")
+    with database.get_db() as db:
+        cursor = db.cursor()
+        cursor.execute(
+            "SELECT * from models WHERE json_array_length(data, '$.images') < 1;")
+        models_no_images = list(cursor.fetchall())
+        for model in tqdm(models_no_images):
+            model_id = model['id']
+            model_data = json.loads(model['data'])
+            print("Updating model", model_id)
+            model_card = fetch_model_card(model_id)
+            images = await find_image_in_model_card(model_card)
+            classifier = run_classifier(images)
+            # update model row with image and classifier data
+            with database.get_db() as db:
+                cursor = db.cursor()
+                cursor.execute("UPDATE models SET data = ? WHERE id = ?",
+                               [json.dumps(model_data), model_id])
+                db.commit()
+    print("Update likes and downloads")
+    for model in tqdm(all_models):
+        model_id = model['id']
+        likes = model['likes']
+        downloads = model['downloads']
+        with database.get_db() as db:
+            cursor = db.cursor()
+            cursor.execute("UPDATE models SET likes = ?, downloads = ? WHERE id = ?",
+                           [likes, downloads, model_id])
+            db.commit()
+    print("Updating DB repository")
+    time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    cmd = f"git add . && git commit --amend -m 'update at {time}' && git push --force"
+    print(cmd)
+    subprocess.Popen(cmd, cwd=DB_FOLDER, shell=True)
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# @ app.get("/sync")
+# async def sync(background_tasks: BackgroundTasks):
+#     await sync_data()
+#     return "Synced data to huggingface datasets"
+MAX_PAGE_SIZE = 30
+class Sort(str, Enum):
+    trending = "trending"
+    recent = "recent"
+    likes = "likes"
+class Style(str, Enum):
+    all = "all"
+    anime = "anime"
+    s3D = "3d"
+    realistic = "realistic"
+    nsfw = "nsfw"
+@ app.get("/api/models")
+def get_page(page: int = 1, sort: Sort = Sort.trending, style: Style = Style.all):
+    page = page if page > 0 else 1
+    if sort == Sort.trending:
+        sort_query = "likes / MYPOWER((JULIANDAY('now') - JULIANDAY(datetime(json_extract(data, '$.lastModified')))) + 2, 2) DESC"
+    elif sort == Sort.recent:
+        sort_query = "datetime(json_extract(data, '$.lastModified')) DESC"
+    elif sort == Sort.likes:
+        sort_query = "likes DESC"
+    if style == Style.all:
+        style_query = "isNFSW = false"
+    elif style == Style.anime:
+        style_query = "json_extract(data, '$.class.anime') > 0.1 AND isNFSW = false"
+    elif style == Style.s3D:
+        style_query = "json_extract(data, '$.class.3d') > 0.1 AND isNFSW = false"
+    elif style == Style.realistic:
+        style_query = "json_extract(data, '$.class.real_life') > 0.1 AND isNFSW = false"
+    elif style == Style.nsfw:
+        style_query = "isNFSW = true"
+    with database.get_db() as db:
+        cursor = db.cursor()
+        cursor.execute(f"""
+            SELECT *, COUNT(*) OVER() AS total,  isNFSW
+            FROM (
+                SELECT * ,
+                        json_extract(data, '$.class.explicit') > 0.3 OR json_extract(data, '$.class.suggestive') > 0.3 AS isNFSW
+                FROM models
+            )
+            WHERE likes > 3 AND {style_query}
+            ORDER BY {sort_query}
+            LIMIT {MAX_PAGE_SIZE} OFFSET {(page - 1) * MAX_PAGE_SIZE}
+        """)
+        results = cursor.fetchall()
+        total = results[0]['total'] if results else 0
+        total_pages = (total + MAX_PAGE_SIZE - 1) // MAX_PAGE_SIZE
+        models_data = []
+        for result in results:
+            data = json.loads(result['data'])
+            # update downloads and likes from db table
+            data['downloads'] = result['downloads']
+            data['likes'] = result['likes']
+            data['isNFSW'] = bool(result['isNFSW'])
+            models_data.append(data)
+    return {
+        "models": models_data,
+        "totalPages": total_pages
+    }
+@app.get("/")
+def read_root():
+    # return html page from string
+    return HTMLResponse("""
+    <p>Just a bot to sync data from diffusers gallery please go to
+    <a href="https://huggingface.co/spaces/huggingface-projects/diffusers-gallery">https://huggingface.co/spaces/huggingface-projects/diffusers-gallery</a>
+    </p>""")
+@app.on_event("startup")
+@repeat_every(seconds=60 * 60 * 6, wait_first=False)
+async def repeat_sync():
+    await sync_data()
+    return "Synced data to huggingface datasets"

classifier.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import os
+import re
+import requests
+import json
+import subprocess
+from io import BytesIO
+import uuid
+from math import ceil
+from tqdm import tqdm
+from pathlib import Path
+from db import Database
+DB_FOLDER = Path("diffusers-gallery-data")
+database = Database(DB_FOLDER)
+CLASSIFIER_URL = "https://radames-aesthetic-style-nsfw-classifier.hf.space/run/inference"
+ASSETS_URL = "https://d26smi9133w0oo.cloudfront.net/diffusers-gallery/"
+def main():
+    with database.get_db() as db:
+        cursor = db.cursor()
+        cursor.execute("""
+            SELECT *
+            FROM models
+        """)
+        results = list(cursor.fetchall())
+    for row in tqdm(results):
+        row_id = row['id']
+        # keep json data on row_data
+        row_data = json.loads(row['data'])
+        print("updating row", row_id)
+        images = row_data['images']
+        # filter nones
+        images = [i for i in images if i is not None]
+        if len(images) > 0:
+            # classifying only the first image
+            images_urls = [ASSETS_URL + images[0]]
+            response = requests.post(CLASSIFIER_URL, json={"data": [
+                {"urls": images_urls},  # json urls: list of images urls
+                False,  # enable/disable gallery image output
+                None,  # single image input
+                None,  # files input
+            ]}).json()
+            # data response is array data:[[{img0}, {img1}, {img2}...], Label, Gallery],
+            class_data = response['data'][0][0]
+            class_data_parsed = {row['label']: round(
+                row['score'], 3) for row in class_data}
+            # update row data with classificator data
+            row_data['class'] = class_data_parsed
+        else:
+            row_data['class'] = {}
+        with database.get_db() as db:
+            cursor = db.cursor()
+            cursor.execute("UPDATE models SET data = ? WHERE id = ?",
+                           [json.dumps(row_data), row_id])
+            db.commit()
+if __name__ == "__main__":
+    main()

db.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import sqlite3
+from pathlib import Path
+import math
+def power(x, y):
+    return math.pow(x, y)
+class Database:
+    def __init__(self, db_path=None):
+        if db_path is None:
+            raise ValueError("db_path must be provided")
+        self.db_path = db_path
+        self.db_file = self.db_path / "models.db"
+        if not self.db_file.exists():
+            print("Creating database")
+            print("DB_FILE", self.db_file)
+            db = sqlite3.connect(self.db_file)
+            with open(Path("schema.sql"), "r") as f:
+                db.executescript(f.read())
+            db.commit()
+            db.close()
+    def get_db(self):
+        db = sqlite3.connect(self.db_file, check_same_thread=False)
+        db.create_function("MYPOWER", 2, power)
+        db.row_factory = sqlite3.Row
+        return db
+    def __enter__(self):
+        self.db = self.get_db()
+        return self.db
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.db.close()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ sqlite3

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+huggingface-hub
+fastapi-utils
+uvicorn
+tqdm
+fastapi
+requests
+asyncio
+aiohttp
+Pillow
+boto3

schema.sql ADDED Viewed

	@@ -0,0 +1,13 @@

+PRAGMA foreign_keys = OFF;
+BEGIN TRANSACTION;
+CREATE TABLE models (
+    id TEXT PRIMARY KEY NOT NULL,
+    data json,
+    likes INTEGER DEFAULT 0 NOT NULL,
+    downloads INTEGER DEFAULT 0 NOT NULL,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
+);
+COMMIT;