Spaces:
Sleeping
Sleeping
import base64 | |
import os | |
from collections import defaultdict | |
from datetime import date, datetime, timedelta | |
from io import BytesIO | |
import dotenv | |
from datasets import load_dataset | |
from dateutil.parser import parse | |
from dateutil.tz import tzutc | |
from fasthtml.common import * | |
from huggingface_hub import login, whoami | |
dotenv.load_dotenv() | |
style = Style(""" | |
.grid { margin-bottom: 1rem; } | |
.card { display: flex; flex-direction: column; } | |
.card img { margin-bottom: 0.5rem; } | |
.card h5 { margin: 0; font-size: 0.9rem; line-height: 1.2; } | |
.card a { color: inherit; text-decoration: none; } | |
.card a:hover { text-decoration: underline; } | |
""") | |
app, rt = fast_app(html_style=(style,)) | |
login(token=os.environ.get("HF_TOKEN")) | |
hf_user = whoami(os.environ.get("HF_TOKEN"))["name"] | |
HF_REPO_ID = f"{hf_user}/zotero-articles" | |
abstract_ds = load_dataset(HF_REPO_ID, "abstracts", split="train") | |
article_ds = load_dataset(HF_REPO_ID, "articles", split="train") | |
image_ds = load_dataset(HF_REPO_ID, "images", split="train") | |
image_ds = image_ds.filter(lambda x: x["page_number"] == 1) | |
def parse_date(date_string): | |
try: | |
return parse(date_string).astimezone(tzutc()).date() | |
except ValueError: | |
return date.today() | |
def get_week_start(date_obj): | |
return date_obj - timedelta(days=date_obj.weekday()) | |
week2articles = defaultdict(list) | |
for article in article_ds: | |
date_added = parse_date(article["date_added"]) | |
week_start = get_week_start(date_added) | |
week2articles[week_start].append(article["arxiv_id"]) | |
weeks = sorted(week2articles.keys(), reverse=True) | |
def get_article_details(arxiv_id): | |
article = article_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)[0] | |
abstract = abstract_ds.filter(lambda x: x["arxiv_id"] == arxiv_id) | |
image = image_ds.filter(lambda x: x["arxiv_id"] == arxiv_id) | |
return article, abstract, image | |
def generate_week_content(current_week): | |
week_index = weeks.index(current_week) | |
prev_week = weeks[week_index + 1] if week_index < len(weeks) - 1 else None | |
next_week = weeks[week_index - 1] if week_index > 0 else None | |
nav_buttons = Group( | |
Button( | |
"β Previous Week", | |
hx_get=f"/week/{prev_week}" if prev_week else "#", | |
hx_target="#content", | |
hx_swap="innerHTML", | |
disabled=not prev_week, | |
), | |
Button( | |
"Next Week β", | |
hx_get=f"/week/{next_week}" if next_week else "#", | |
hx_target="#content", | |
hx_swap="innerHTML", | |
disabled=not next_week, | |
), | |
) | |
articles = week2articles[current_week] | |
article_cards = [] | |
for arxiv_id in articles: | |
article, abstract, image = get_article_details(arxiv_id) | |
article_title = article["contents"][0].get("paper_title", "article") if article["contents"] else "article" | |
card_content = [H5(A(article_title, href=f"https://arxiv.org/abs/{arxiv_id}", target="_blank"))] | |
if image: | |
pil_image = image[0]["image"] | |
img_byte_arr = BytesIO() | |
pil_image.save(img_byte_arr, format="JPEG") | |
img_byte_arr = img_byte_arr.getvalue() | |
image_url = f"data:image/jpeg;base64,{base64.b64encode(img_byte_arr).decode('utf-8')}" | |
card_content.insert( | |
1, Img(src=image_url, alt="Article image", style="max-width: 100%; height: auto; margin-bottom: 15px;") | |
) | |
article_cards.append(Card(*card_content, cls="mb-4")) | |
grid = Grid(*article_cards, style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem;") | |
week_end = current_week + timedelta(days=6) | |
return Div( | |
nav_buttons, | |
H3(f"Week of {current_week.strftime('%B %d')} - {week_end.strftime('%B %d, %Y')} ({len(articles)} articles)"), | |
grid, | |
nav_buttons, | |
id="content", | |
) | |
def get(): | |
return Titled("AnswerAI Zotero Weekly", generate_week_content(weeks[0])) | |
def get(date: str): | |
try: | |
current_week = datetime.strptime(date, "%Y-%m-%d").date() | |
return generate_week_content(current_week) | |
except Exception as e: | |
return Div(f"Error displaying articles: {str(e)}") | |
serve() | |