|
import os |
|
from pathlib import Path |
|
|
|
import gradio as gr |
|
from huggingface_hub import WebhookPayload, WebhooksServer |
|
|
|
from src.my_logger import setup_logger |
|
from src.utilities import load_datasets, merge_and_update_datasets |
|
from src.visualize_logs import log_file_to_html_string |
|
from src.build_nomic import build_nomic |
|
|
|
proj_dir = Path(__name__).parent |
|
|
|
logger = setup_logger(__name__) |
|
|
|
SUBREDDIT = os.environ["SUBREDDIT"] |
|
USERNAME = os.environ["USERNAME"] |
|
OG_DATASET = f"{USERNAME}/dataset-creator-reddit-{SUBREDDIT}" |
|
PROCESSED_DATASET = os.environ['PROCESSED_DATASET'] |
|
HUGGINGFACE_AUTH_TOKEN = os.environ["HUGGINGFACE_AUTH_TOKEN"] |
|
WEBHOOK_SECRET = os.getenv("HF_WEBHOOK_SECRET", 'secret') |
|
|
|
intro_md = """ |
|
# Processing BORU |
|
This space is triggered by a webhook for changes on |
|
[derek-thomas/dataset-creator-reddit-bestofredditorupdates](https://huggingface.co/datasets/derek-thomas/dataset-creator-reddit-bestofredditorupdates). |
|
It then takes the updates from that dataset and get embeddings and puts the results in |
|
[https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed](https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed) |
|
""" |
|
|
|
with gr.Blocks() as ui: |
|
with gr.Tab("Application"): |
|
gr.Markdown(intro_md) |
|
output = gr.HTML(log_file_to_html_string, every=1) |
|
|
|
app = WebhooksServer(ui=ui.queue(), webhook_secret=WEBHOOK_SECRET) |
|
|
|
|
|
@app.add_webhook("/dataset_repo") |
|
async def community(payload: WebhookPayload): |
|
if payload.event.scope.startswith("repo"): |
|
logger.info(f"Webhook received from {payload.repo.name} indicating a repo {payload.event.action}") |
|
else: |
|
return |
|
|
|
logger.info(f"Loading new dataset...") |
|
dataset, original_dataset = load_datasets() |
|
logger.info(f"Loaded new dataset") |
|
|
|
logger.info(f"Merging and Updating row...") |
|
dataset = merge_and_update_datasets(dataset, original_dataset) |
|
|
|
|
|
logger.info(f"Pushing processed data to the Hugging Face Hub...") |
|
dataset.push_to_hub(PROCESSED_DATASET, token=HUGGINGFACE_AUTH_TOKEN) |
|
logger.info(f"Pushed processed data to the Hugging Face Hub") |
|
|
|
logger.info(f"Building Nomic...") |
|
build_nomic(dataset=dataset) |
|
logger.info(f"Built Nomic") |
|
|
|
if __name__ == '__main__': |
|
app.launch(server_name="0.0.0.0", show_error=True, server_port=7860) |
|
|
|
|