rbiswasfc's picture
images
748a8f9
raw
history blame
4.33 kB
import base64
import os
from collections import defaultdict
from datetime import date, datetime, timedelta
from io import BytesIO
import dotenv
from datasets import load_dataset
from dateutil.parser import parse
from dateutil.tz import tzutc
from fasthtml.common import *
from huggingface_hub import login, whoami
dotenv.load_dotenv()
style = Style("""
.grid { margin-bottom: 1rem; }
.card { display: flex; flex-direction: column; }
.card img { margin-bottom: 0.5rem; }
.card h5 { margin: 0; font-size: 0.9rem; line-height: 1.2; }
.card a { color: inherit; text-decoration: none; }
.card a:hover { text-decoration: underline; }
""")
app, rt = fast_app(html_style=(style,))
login(token=os.environ.get("HF_TOKEN"))
hf_user = whoami(os.environ.get("HF_TOKEN"))["name"]
HF_REPO_ID = f"{hf_user}/zotero-articles"
abstract_ds = load_dataset(HF_REPO_ID, "abstracts", split="train")
article_ds = load_dataset(HF_REPO_ID, "articles", split="train")
image_ds = load_dataset(HF_REPO_ID, "images", split="train")
image_ds = image_ds.filter(lambda x: x["page_number"] == 1)
def parse_date(date_string):
try:
return parse(date_string).astimezone(tzutc()).date()
except ValueError:
return date.today()
def get_week_start(date_obj):
return date_obj - timedelta(days=date_obj.weekday())
week2articles = defaultdict(list)
for article in article_ds:
date_added = parse_date(article["date_added"])
week_start = get_week_start(date_added)
week2articles[week_start].append(article["arxiv_id"])
weeks = sorted(week2articles.keys(), reverse=True)
def get_article_details(arxiv_id):
article = article_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)[0]
abstract = abstract_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)
image = image_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)
return article, abstract, image
def generate_week_content(current_week):
week_index = weeks.index(current_week)
prev_week = weeks[week_index + 1] if week_index < len(weeks) - 1 else None
next_week = weeks[week_index - 1] if week_index > 0 else None
nav_buttons = Group(
Button(
"← Previous Week",
hx_get=f"/week/{prev_week}" if prev_week else "#",
hx_target="#content",
hx_swap="innerHTML",
disabled=not prev_week,
),
Button(
"Next Week β†’",
hx_get=f"/week/{next_week}" if next_week else "#",
hx_target="#content",
hx_swap="innerHTML",
disabled=not next_week,
),
)
articles = week2articles[current_week]
article_cards = []
for arxiv_id in articles:
article, abstract, image = get_article_details(arxiv_id)
article_title = article["contents"][0].get("paper_title", "article") if article["contents"] else "article"
card_content = [H5(A(article_title, href=f"https://arxiv.org/abs/{arxiv_id}", target="_blank"))]
if image:
pil_image = image[0]["image"]
img_byte_arr = BytesIO()
pil_image.save(img_byte_arr, format="JPEG")
img_byte_arr = img_byte_arr.getvalue()
image_url = f"data:image/jpeg;base64,{base64.b64encode(img_byte_arr).decode('utf-8')}"
card_content.insert(
1, Img(src=image_url, alt="Article image", style="max-width: 100%; height: auto; margin-bottom: 15px;")
)
article_cards.append(Card(*card_content, cls="mb-4"))
grid = Grid(*article_cards, style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem;")
week_end = current_week + timedelta(days=6)
return Div(
nav_buttons,
H3(f"Week of {current_week.strftime('%B %d')} - {week_end.strftime('%B %d, %Y')} ({len(articles)} articles)"),
grid,
nav_buttons,
id="content",
)
@rt("/")
def get():
return Titled("AnswerAI Zotero Weekly", generate_week_content(weeks[0]))
@rt("/week/{date}")
def get(date: str):
try:
current_week = datetime.strptime(date, "%Y-%m-%d").date()
return generate_week_content(current_week)
except Exception as e:
return Div(f"Error displaying articles: {str(e)}")
serve()