derek-thomas HF staff commited on
Commit
fceefe7
1 Parent(s): 075c34d

Removed health and added webhook

Browse files
Files changed (1) hide show
  1. app.py +27 -20
app.py CHANGED
@@ -1,22 +1,28 @@
1
  import os
2
  from pathlib import Path
3
- from datetime import datetime
4
 
5
  import gradio as gr
6
  from bs4 import BeautifulSoup
 
7
  from rich.console import Console
8
  from rich.syntax import Syntax
9
 
 
 
10
  proj_dir = Path(__name__).parent
11
 
12
- subreddit = os.environ["SUBREDDIT"]
13
- username = os.environ["USERNAME"]
14
- dataset_name = f"{username}/dataset-creator-reddit-{subreddit}"
15
 
16
- frequency = os.environ.get("FREQUENCY", '').lower()
17
- if frequency not in ["daily", "hourly"]:
18
  raise gr.Error("FREQUENCY environment variable must be 'daily' or 'hourly'")
19
 
 
 
 
 
20
 
21
  def log_file_to_html_string():
22
  log_file = "mylog.log"
@@ -63,8 +69,8 @@ pre, code {
63
 
64
  intro_md = f"""
65
  # Reddit Dataset Creator
66
- This is a reddit dataset creator which builds and updates [{dataset_name}](https://huggingface.co/datasets/{dataset_name})
67
- which pulls from [/r/{subreddit}](http://www.reddit.com/r/{subreddit}). Check the dataset for more details.
68
 
69
  As shown in the below diagram this space pulls data from reddit via [PRAW](https://praw.readthedocs.io/en/stable/), processes it, and puts it in a corresponding dataset.
70
  """
@@ -98,22 +104,15 @@ log files. I use gradio for `app` and map that to the open port of huggingface s
98
 
99
  The only communication between `app` and `main` is the log file.
100
  """
101
- def health(text):
102
- # Get the current date and time
103
- current_time = datetime.now()
104
-
105
- # Print it in the format YYYY-MM-DD HH:MM:SS
106
- print(current_time.strftime("%Y-%m-%d %H:%M:%S"))
107
- return "Healthy"
108
 
109
- with gr.Blocks() as demo:
110
  with gr.Tab("Application"):
111
  gr.Markdown(intro_md)
112
  gr.Image(proj_dir / 'media' / 'reddit_scraper.drawio.png')
113
  gr.Markdown("# Logs")
114
  output = gr.HTML(log_file_to_html_string, every=1)
115
- demo.load(None,
116
- _js="""
117
  () => {
118
  document.body.classList.toggle('dark');
119
  document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)'
@@ -130,7 +129,15 @@ with gr.Blocks() as demo:
130
  with gr.Column():
131
  output_text = gr.Textbox(label="Output Text")
132
 
133
- health_btn.click(health, inputs=input_text, outputs=output_text, api_name="health")
 
 
 
 
 
 
 
134
 
135
  if __name__ == '__main__':
136
- demo.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860)
 
 
1
  import os
2
  from pathlib import Path
 
3
 
4
  import gradio as gr
5
  from bs4 import BeautifulSoup
6
+ from huggingface_hub import WebhookPayload, WebhooksServer
7
  from rich.console import Console
8
  from rich.syntax import Syntax
9
 
10
+ from utilities.my_logger import setup_logger
11
+
12
  proj_dir = Path(__name__).parent
13
 
14
+ SUBREDDIT = os.environ["SUBREDDIT"]
15
+ USERNAME = os.environ["USERNAME"]
16
+ DATASET_NAME = f"{USERNAME}/dataset-creator-reddit-{SUBREDDIT}"
17
 
18
+ FREQUENCY = os.environ.get("FREQUENCY", '').lower()
19
+ if FREQUENCY not in ["daily", "hourly"]:
20
  raise gr.Error("FREQUENCY environment variable must be 'daily' or 'hourly'")
21
 
22
+ SECRET = os.getenv("HF_WEBHOOK_SECRET")
23
+
24
+ logger = setup_logger(__name__)
25
+
26
 
27
  def log_file_to_html_string():
28
  log_file = "mylog.log"
 
69
 
70
  intro_md = f"""
71
  # Reddit Dataset Creator
72
+ This is a reddit dataset creator which builds and updates [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME})
73
+ which pulls from [/r/{SUBREDDIT}](http://www.reddit.com/r/{SUBREDDIT}). Check the dataset for more details.
74
 
75
  As shown in the below diagram this space pulls data from reddit via [PRAW](https://praw.readthedocs.io/en/stable/), processes it, and puts it in a corresponding dataset.
76
  """
 
104
 
105
  The only communication between `app` and `main` is the log file.
106
  """
 
 
 
 
 
 
 
107
 
108
+ with gr.Blocks() as ui:
109
  with gr.Tab("Application"):
110
  gr.Markdown(intro_md)
111
  gr.Image(proj_dir / 'media' / 'reddit_scraper.drawio.png')
112
  gr.Markdown("# Logs")
113
  output = gr.HTML(log_file_to_html_string, every=1)
114
+ ui.load(None,
115
+ _js="""
116
  () => {
117
  document.body.classList.toggle('dark');
118
  document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)'
 
129
  with gr.Column():
130
  output_text = gr.Textbox(label="Output Text")
131
 
132
+ app = WebhooksServer(ui=ui, webhook_secret=SECRET)
133
+
134
+
135
+ @app.add_webhook("/community")
136
+ async def community(payload: WebhookPayload):
137
+ if payload.event.scope.startswith("repo"):
138
+ logger.info(f"Webhook received from {DATASET_NAME} indicating a repo {payload.event.action}")
139
+
140
 
141
  if __name__ == '__main__':
142
+ app.run()
143
+ # ui.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860)