nbroad HF staff commited on
Commit
b12faee
1 Parent(s): 1606cd0

background scheduler

Browse files
Files changed (3) hide show
  1. app.py +47 -15
  2. requirements.txt +2 -1
  3. update.py +41 -7
app.py CHANGED
@@ -1,14 +1,18 @@
1
  import json
2
  import os
3
  from datetime import datetime, timezone, timedelta
 
4
 
5
  import meilisearch
6
  from fasthtml.common import *
7
  from markdown import markdown
8
  from dotenv import load_dotenv
 
 
 
9
 
10
  from constants import MeilisearchIndexFields
11
- from update import process_webhook
12
 
13
  loaded = load_dotenv("./.env", override=True)
14
  print("Loaded .env file:", loaded)
@@ -19,7 +23,21 @@ ms_client = meilisearch.Client(MS_URL, MS_SEARCH_KEY)
19
 
20
  css_content = open("styles.css").read()
21
 
22
- app, rt = fast_app(hdrs=(Style(css_content),))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  md_exts = "codehilite", "smarty", "extra", "sane_lists"
@@ -29,7 +47,8 @@ def Markdown(s, exts=md_exts, **kw):
29
  return Div(NotStr(markdown(s, extensions=exts)), **kw)
30
 
31
 
32
- scroll_script = Script("""
 
33
  document.addEventListener('DOMContentLoaded', function() {
34
  var scrollButton = document.getElementById('scroll-top-btn');
35
 
@@ -46,7 +65,8 @@ document.addEventListener('DOMContentLoaded', function() {
46
  document.documentElement.scrollTop = 0; // For Chrome, Firefox, IE and Opera
47
  };
48
  });
49
- """)
 
50
 
51
 
52
  def date_range_inputs(start_date, end_date):
@@ -80,7 +100,7 @@ def search_form(start_date, end_date):
80
 
81
 
82
  def iso_to_unix_timestamp(iso_string):
83
- dt = datetime.fromisoformat(iso_string)
84
  return int(dt.timestamp())
85
 
86
 
@@ -94,7 +114,10 @@ def make_query(query, start_date, end_date, page=1, limit=10):
94
  twenty_three_hours_59_minutes_59_seconds_in_seconds = (23 * 60 + 59) * 60 + 59
95
 
96
  after_timestamp = iso_to_unix_timestamp(start_date)
97
- before_timestamp = iso_to_unix_timestamp(end_date) + twenty_three_hours_59_minutes_59_seconds_in_seconds
 
 
 
98
 
99
  options = {
100
  "limit": limit,
@@ -102,13 +125,18 @@ def make_query(query, start_date, end_date, page=1, limit=10):
102
  "filter": f"{MeilisearchIndexFields.UPDATED_AT.value} >= {after_timestamp} AND {MeilisearchIndexFields.UPDATED_AT.value} < {before_timestamp}",
103
  "attributesToCrop": [MeilisearchIndexFields.CONTENT.value],
104
  "cropLength": 30,
105
- "attributesToHighlight": [MeilisearchIndexFields.CONTENT.value, MeilisearchIndexFields.TITLE.value],
 
 
 
106
  "highlightPreTag": '<span class="highlight">',
107
  "highlightPostTag": "</span>",
 
108
  }
109
 
110
-
111
- return ms_client.index(MeilisearchIndexFields.INDEX_NAME.value).search(query=query, opt_params=options)
 
112
 
113
 
114
  def search_results(query, start_date, end_date, page=1):
@@ -136,7 +164,9 @@ def make_card(result):
136
  result = result["_formatted"]
137
 
138
  url = result[MeilisearchIndexFields.URL.value]
139
- date = unix_timestamp_to_nice_format(int(result[MeilisearchIndexFields.UPDATED_AT.value]))
 
 
140
 
141
  return Div(
142
  Div(
@@ -156,7 +186,7 @@ def make_pagination(current_page, total_hits, limit=10):
156
 
157
  if current_page > 1:
158
  children.append(
159
- Button(
160
  "Previous",
161
  hx_post=f"/search?page={current_page-1}",
162
  hx_target="#search-results",
@@ -178,8 +208,10 @@ def make_pagination(current_page, total_hits, limit=10):
178
 
179
  return Div(*children, cls="pagination")
180
 
181
- scroll_button = Button("Scroll to Top",
182
- id="scroll-top-btn",
 
 
183
  style="""
184
  position: fixed;
185
  bottom: 20px;
@@ -191,9 +223,10 @@ scroll_button = Button("Scroll to Top",
191
  border-radius: 5px;
192
  padding: 10px 15px;
193
  cursor: pointer;
194
- """
195
  )
196
 
 
197
  @rt("/")
198
  def get():
199
  end_date = datetime.now()
@@ -217,7 +250,6 @@ def post(query: str, start_date: str, end_date: str, page: int = 1):
217
 
218
  @app.post("/webhook")
219
  async def hf_webhook(request):
220
-
221
  return await process_webhook(request)
222
 
223
 
 
1
  import json
2
  import os
3
  from datetime import datetime, timezone, timedelta
4
+ from dateutil import parser as dateparser
5
 
6
  import meilisearch
7
  from fasthtml.common import *
8
  from markdown import markdown
9
  from dotenv import load_dotenv
10
+ from apscheduler.schedulers.background import BackgroundScheduler
11
+ from apscheduler.triggers.cron import CronTrigger
12
+ from contextlib import asynccontextmanager
13
 
14
  from constants import MeilisearchIndexFields
15
+ from update import process_webhook, update_webhooks
16
 
17
  loaded = load_dotenv("./.env", override=True)
18
  print("Loaded .env file:", loaded)
 
23
 
24
  css_content = open("styles.css").read()
25
 
26
+
27
+ @asynccontextmanager
28
+ async def lifespan(app):
29
+ # Setup
30
+ scheduler = BackgroundScheduler()
31
+ scheduler.add_job(update_webhooks, CronTrigger.from_crontab("0 */3 * * *"))
32
+ scheduler.start()
33
+
34
+ yield
35
+
36
+ # Cleanup
37
+ scheduler.shutdown()
38
+
39
+
40
+ app, rt = fast_app(hdrs=(Style(css_content),), lifespan=lifespan)
41
 
42
 
43
  md_exts = "codehilite", "smarty", "extra", "sane_lists"
 
47
  return Div(NotStr(markdown(s, extensions=exts)), **kw)
48
 
49
 
50
+ scroll_script = Script(
51
+ """
52
  document.addEventListener('DOMContentLoaded', function() {
53
  var scrollButton = document.getElementById('scroll-top-btn');
54
 
 
65
  document.documentElement.scrollTop = 0; // For Chrome, Firefox, IE and Opera
66
  };
67
  });
68
+ """
69
+ )
70
 
71
 
72
  def date_range_inputs(start_date, end_date):
 
100
 
101
 
102
  def iso_to_unix_timestamp(iso_string):
103
+ dt = dateparser.isoparse(iso_string)
104
  return int(dt.timestamp())
105
 
106
 
 
114
  twenty_three_hours_59_minutes_59_seconds_in_seconds = (23 * 60 + 59) * 60 + 59
115
 
116
  after_timestamp = iso_to_unix_timestamp(start_date)
117
+ before_timestamp = (
118
+ iso_to_unix_timestamp(end_date)
119
+ + twenty_three_hours_59_minutes_59_seconds_in_seconds
120
+ )
121
 
122
  options = {
123
  "limit": limit,
 
125
  "filter": f"{MeilisearchIndexFields.UPDATED_AT.value} >= {after_timestamp} AND {MeilisearchIndexFields.UPDATED_AT.value} < {before_timestamp}",
126
  "attributesToCrop": [MeilisearchIndexFields.CONTENT.value],
127
  "cropLength": 30,
128
+ "attributesToHighlight": [
129
+ MeilisearchIndexFields.CONTENT.value,
130
+ MeilisearchIndexFields.TITLE.value,
131
+ ],
132
  "highlightPreTag": '<span class="highlight">',
133
  "highlightPostTag": "</span>",
134
+ "distinct": MeilisearchIndexFields.URL.value,
135
  }
136
 
137
+ return ms_client.index(MeilisearchIndexFields.INDEX_NAME.value).search(
138
+ query=query, opt_params=options
139
+ )
140
 
141
 
142
  def search_results(query, start_date, end_date, page=1):
 
164
  result = result["_formatted"]
165
 
166
  url = result[MeilisearchIndexFields.URL.value]
167
+ date = unix_timestamp_to_nice_format(
168
+ int(result[MeilisearchIndexFields.UPDATED_AT.value])
169
+ )
170
 
171
  return Div(
172
  Div(
 
186
 
187
  if current_page > 1:
188
  children.append(
189
+ Button(
190
  "Previous",
191
  hx_post=f"/search?page={current_page-1}",
192
  hx_target="#search-results",
 
208
 
209
  return Div(*children, cls="pagination")
210
 
211
+
212
+ scroll_button = Button(
213
+ "Scroll to Top",
214
+ id="scroll-top-btn",
215
  style="""
216
  position: fixed;
217
  bottom: 20px;
 
223
  border-radius: 5px;
224
  padding: 10px 15px;
225
  cursor: pointer;
226
+ """,
227
  )
228
 
229
+
230
  @rt("/")
231
  def get():
232
  end_date = datetime.now()
 
250
 
251
  @app.post("/webhook")
252
  async def hf_webhook(request):
 
253
  return await process_webhook(request)
254
 
255
 
requirements.txt CHANGED
@@ -5,4 +5,5 @@ fasthtml-hf
5
  markdown
6
  meilisearch
7
  huggingface_hub
8
- requests
 
 
5
  markdown
6
  meilisearch
7
  huggingface_hub
8
+ requests
9
+ apscheduler
update.py CHANGED
@@ -171,28 +171,62 @@ def update_discussion_status(payload):
171
  print("Update request:", update_request)
172
 
173
 
 
 
 
 
 
174
 
175
  def update_webhooks():
176
  """
177
- Delete the old
178
  """
 
 
179
 
180
  existing_webhooks = api.list_webhooks()
181
 
182
  webhook_url = os.environ["HF_WEBHOOK_URL"]
183
 
184
- id2update = [x for x in existing_webhooks if x.url == webhook_url]
185
 
186
- if len(id2update) > 1:
187
  print("More than one webhook found")
188
- print(id2update)
189
  print("updating the first one")
190
 
191
- id2update = id2update[0]
 
 
 
 
 
192
 
193
  # get trending models
194
 
195
- trending_models = api.list_models(sort="likes7d", direction=-1, limit=100)
196
 
197
  to_add = []
198
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  print("Update request:", update_request)
172
 
173
 
174
+ def is_user(user_or_org):
175
+ api_url = f"https://huggingface.co/api/users/{user_or_org}/overview"
176
+ response = requests.get(api_url)
177
+ return response.status_code == 200
178
+
179
 
180
  def update_webhooks():
181
  """
182
+ Update the old webhook every so often with trending models.
183
  """
184
+
185
+ print("Updating webhook")
186
 
187
  existing_webhooks = api.list_webhooks()
188
 
189
  webhook_url = os.environ["HF_WEBHOOK_URL"]
190
 
191
+ webhook2update = [x for x in existing_webhooks if x.url == webhook_url]
192
 
193
+ if len(webhook2update) > 1:
194
  print("More than one webhook found")
195
+ print(webhook2update)
196
  print("updating the first one")
197
 
198
+ id2update = webhook2update[0].id
199
+
200
+ watch_dict = {}
201
+
202
+ for ww in webhook2update[0].watched:
203
+ watch_dict[ww.name] = ww.type
204
 
205
  # get trending models
206
 
207
+ trending_models = api.list_models(sort="likes7d", direction=-1, limit=1000)
208
 
209
  to_add = []
210
+
211
+ for m in trending_models:
212
+ org_or_user = m.id.split("/")[0]
213
+ if org_or_user in watch_dict:
214
+ continue
215
+ if is_user(org_or_user):
216
+ to_add.append({"name": m.id, "type": "user"})
217
+ else:
218
+ to_add.append({"name": m.id, "type": "org"})
219
+
220
+ new_watched = webhook2update[0].watched + to_add
221
+
222
+ print("There are now", len(new_watched), "items in the watched list")
223
+
224
+ api.update_webhook(
225
+ id=id2update,
226
+ url=webhook_url,
227
+ watched=new_watched,
228
+ domains=["discussion"],
229
+ secret=WEBHOOK_SECRET,
230
+ )
231
+
232
+