davanstrien HF staff commited on
Commit
212995a
β€’
1 Parent(s): adf5d9a

schedule refreshing of data

Browse files
Files changed (3) hide show
  1. app.py +14 -1
  2. requirements.in +2 -2
  3. requirements.txt +11 -4
app.py CHANGED
@@ -8,20 +8,25 @@ from cachetools import TTLCache, cached
8
  from cytoolz import groupby
9
  from huggingface_hub import CollectionItem, get_collection, list_datasets, list_models
10
  from tqdm.auto import tqdm
 
 
11
 
12
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
13
  is_macos = platform.system() == "Darwin"
 
14
  LIMIT = 1000 if is_macos else None # limit for local dev because slooow internet
15
- CACHE_TIME = 60 * 5 # 5 minutes
16
 
17
 
18
  @cached(cache=TTLCache(maxsize=100, ttl=CACHE_TIME))
19
  def get_models():
 
20
  return list(tqdm(iter(list_models(full=True, limit=LIMIT))))
21
 
22
 
23
  @cached(cache=TTLCache(maxsize=100, ttl=CACHE_TIME))
24
  def get_datasets():
 
25
  return list(tqdm(iter(list_datasets(full=True, limit=LIMIT))))
26
 
27
 
@@ -79,6 +84,7 @@ def group_collection_items(collection_slug: str):
79
  items = collection.items
80
  return groupby(get_collection_type, items)
81
 
 
82
  @cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))
83
  def get_papers_for_collection(collection_slug: str):
84
  dataset_to_arxiv_id = create_dataset_to_arxiv_id_dict()
@@ -132,6 +138,11 @@ def get_papers_for_collection(collection_slug: str):
132
  }
133
 
134
 
 
 
 
 
 
135
  placeholder_url = "HF-IA-archiving/models-to-archive-65006a7fdadb8c628f33aac9"
136
  slug_input = gr.Textbox(
137
  placeholder=placeholder_url, interactive=True, label="Collection slug", max_lines=1
@@ -147,6 +158,8 @@ examples = [
147
  placeholder_url,
148
  "davanstrien/historic-language-modeling-64f99e243188ade79d7ad74b",
149
  ]
 
 
150
  gr.Interface(
151
  get_papers_for_collection,
152
  slug_input,
 
8
  from cytoolz import groupby
9
  from huggingface_hub import CollectionItem, get_collection, list_datasets, list_models
10
  from tqdm.auto import tqdm
11
+ from apscheduler.schedulers.background import BackgroundScheduler
12
+ from apscheduler.triggers.cron import CronTrigger
13
 
14
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
15
  is_macos = platform.system() == "Darwin"
16
+ local = platform.system() == "Darwin"
17
  LIMIT = 1000 if is_macos else None # limit for local dev because slooow internet
18
+ CACHE_TIME = 60 * 15 # 15 minutes
19
 
20
 
21
  @cached(cache=TTLCache(maxsize=100, ttl=CACHE_TIME))
22
  def get_models():
23
+ print("getting models...")
24
  return list(tqdm(iter(list_models(full=True, limit=LIMIT))))
25
 
26
 
27
  @cached(cache=TTLCache(maxsize=100, ttl=CACHE_TIME))
28
  def get_datasets():
29
+ print("getting datasets...")
30
  return list(tqdm(iter(list_datasets(full=True, limit=LIMIT))))
31
 
32
 
 
84
  items = collection.items
85
  return groupby(get_collection_type, items)
86
 
87
+
88
  @cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))
89
  def get_papers_for_collection(collection_slug: str):
90
  dataset_to_arxiv_id = create_dataset_to_arxiv_id_dict()
 
138
  }
139
 
140
 
141
+ scheduler = BackgroundScheduler()
142
+ scheduler.add_job(get_datasets, "interval", minutes=15)
143
+ scheduler.add_job(get_models, "interval", minutes=15)
144
+ scheduler.start()
145
+
146
  placeholder_url = "HF-IA-archiving/models-to-archive-65006a7fdadb8c628f33aac9"
147
  slug_input = gr.Textbox(
148
  placeholder=placeholder_url, interactive=True, label="Collection slug", max_lines=1
 
158
  placeholder_url,
159
  "davanstrien/historic-language-modeling-64f99e243188ade79d7ad74b",
160
  ]
161
+
162
+
163
  gr.Interface(
164
  get_papers_for_collection,
165
  slug_input,
requirements.in CHANGED
@@ -1,5 +1,5 @@
 
1
  cachetools
2
  cytoolz
3
  git+https://github.com/huggingface/huggingface_hub
4
- gradio
5
- httpx
 
1
+ apscheduler
2
  cachetools
3
  cytoolz
4
  git+https://github.com/huggingface/huggingface_hub
5
+ gradio
 
requirements.txt CHANGED
@@ -2,7 +2,7 @@
2
  # This file is autogenerated by pip-compile with Python 3.11
3
  # by the following command:
4
  #
5
- # pip-compile requirements.in
6
  #
7
  aiofiles==23.2.1
8
  # via gradio
@@ -15,6 +15,8 @@ anyio==3.7.1
15
  # fastapi
16
  # httpcore
17
  # starlette
 
 
18
  attrs==23.1.0
19
  # via
20
  # jsonschema
@@ -60,7 +62,6 @@ httpcore==0.18.0
60
  # via httpx
61
  httpx==0.25.0
62
  # via
63
- # -r requirements.in
64
  # gradio
65
  # gradio-client
66
  huggingface-hub @ git+https://github.com/huggingface/huggingface_hub
@@ -132,7 +133,9 @@ python-dateutil==2.8.2
132
  python-multipart==0.0.6
133
  # via gradio
134
  pytz==2023.3.post1
135
- # via pandas
 
 
136
  pyyaml==6.0.1
137
  # via
138
  # gradio
@@ -153,7 +156,9 @@ rpds-py==0.10.3
153
  semantic-version==2.10.0
154
  # via gradio
155
  six==1.16.0
156
- # via python-dateutil
 
 
157
  sniffio==1.3.0
158
  # via
159
  # anyio
@@ -177,6 +182,8 @@ typing-extensions==4.8.0
177
  # pydantic-core
178
  tzdata==2023.3
179
  # via pandas
 
 
180
  urllib3==2.0.5
181
  # via requests
182
  uvicorn==0.23.2
 
2
  # This file is autogenerated by pip-compile with Python 3.11
3
  # by the following command:
4
  #
5
+ # pip-compile
6
  #
7
  aiofiles==23.2.1
8
  # via gradio
 
15
  # fastapi
16
  # httpcore
17
  # starlette
18
+ apscheduler==3.10.4
19
+ # via -r requirements.in
20
  attrs==23.1.0
21
  # via
22
  # jsonschema
 
62
  # via httpx
63
  httpx==0.25.0
64
  # via
 
65
  # gradio
66
  # gradio-client
67
  huggingface-hub @ git+https://github.com/huggingface/huggingface_hub
 
133
  python-multipart==0.0.6
134
  # via gradio
135
  pytz==2023.3.post1
136
+ # via
137
+ # apscheduler
138
+ # pandas
139
  pyyaml==6.0.1
140
  # via
141
  # gradio
 
156
  semantic-version==2.10.0
157
  # via gradio
158
  six==1.16.0
159
+ # via
160
+ # apscheduler
161
+ # python-dateutil
162
  sniffio==1.3.0
163
  # via
164
  # anyio
 
182
  # pydantic-core
183
  tzdata==2023.3
184
  # via pandas
185
+ tzlocal==5.0.1
186
+ # via apscheduler
187
  urllib3==2.0.5
188
  # via requests
189
  uvicorn==0.23.2