davanstrien HF staff commited on
Commit
e34ec8b
1 Parent(s): 2be9bc2

Add huggingface_hub library and pre-populate cache

Browse files
Files changed (1) hide show
  1. main.py +10 -1
main.py CHANGED
@@ -23,6 +23,7 @@ from starlette.responses import RedirectResponse
23
  from card_processing import parse_markdown, try_load_text, is_empty_template
24
  import logging
25
  import logging.config
 
26
  import yaml
27
 
28
  disable_progress_bars()
@@ -73,6 +74,14 @@ async def lifespan(app: FastAPI):
73
  local_dir_use_symlinks=False,
74
  )
75
  logger.info(f"Downloaded votes.jsonl to {path}")
 
 
 
 
 
 
 
 
76
  else:
77
  logger.info("Votes file already exists")
78
  yield
@@ -88,7 +97,7 @@ app = FastAPI(lifespan=lifespan)
88
 
89
  # # Configure CORS settings
90
  # app.add_middleware(
91
- # CORSMiddleware,
92
  # allow_origins=["https://huggingface.co/*"], # Update with your frontend URL
93
  # allow_credentials=True,
94
  # allow_methods=["*"],
 
23
  from card_processing import parse_markdown, try_load_text, is_empty_template
24
  import logging
25
  import logging.config
26
+ from huggingface_hub import list_datasets
27
  import yaml
28
 
29
  disable_progress_bars()
 
74
  local_dir_use_symlinks=False,
75
  )
76
  logger.info(f"Downloaded votes.jsonl to {path}")
77
+ # pre-populate the cache
78
+ datasets = list(list_datasets(sort="likes7d", direction=-1, limit=1000))
79
+ dataset_ids = [dataset.id for dataset in datasets]
80
+ # batch into 100s
81
+ for i in range(0, len(dataset_ids), 10):
82
+ # sleep for 1 second to avoid rate limiting
83
+ await asyncio.sleep(1)
84
+ await get_summaries(SummariesRequest(dataset_ids=dataset_ids[i : i + 10]))
85
  else:
86
  logger.info("Votes file already exists")
87
  yield
 
97
 
98
  # # Configure CORS settings
99
  # app.add_middleware(
100
+ # CORSMiddleware,``
101
  # allow_origins=["https://huggingface.co/*"], # Update with your frontend URL
102
  # allow_credentials=True,
103
  # allow_methods=["*"],