|
import json |
|
import os |
|
from datetime import datetime, timezone |
|
|
|
from fasthtml.common import * |
|
from huggingface_hub import HfApi, hf_hub_download |
|
from starlette.responses import FileResponse |
|
from generate_newsletter import process_new_papers |
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
from apscheduler.triggers.cron import CronTrigger |
|
|
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
DATASET_NAME = "cmcmaster/this_week_in_rheumatology" |
|
api = HfApi(token=HF_TOKEN) |
|
|
|
|
|
scheduler = BackgroundScheduler() |
|
|
|
scheduler.add_job(process_new_papers, |
|
trigger="interval", |
|
hours=6, |
|
kwargs={ |
|
'end_date': '{{ (execution_date - timedelta(days=execution_date.weekday() + 1)).strftime("%Y-%m-%d") }}', |
|
'test': False |
|
}, |
|
id='generate_newsletter', |
|
name='Weekly newsletter generation', |
|
replace_existing=True) |
|
|
|
css = Style(""" |
|
body { |
|
font-family: Georgia, Times, serif; |
|
line-height: 1.6; |
|
color: #333; |
|
max-width: 800px; |
|
margin: 0 auto; |
|
padding: 20px; |
|
background: #fff; |
|
} |
|
|
|
h1, h2 { |
|
color: #2c3e50; |
|
font-family: Georgia, Times, serif; |
|
} |
|
|
|
a { |
|
color: #2c3e50; |
|
text-decoration: none; |
|
} |
|
|
|
a:hover { |
|
text-decoration: underline; |
|
} |
|
|
|
ul { |
|
list-style-type: none; |
|
padding: 0; |
|
} |
|
|
|
li { |
|
margin-bottom: 10px; |
|
} |
|
|
|
.newsletter-content { |
|
margin-top: 20px; |
|
} |
|
|
|
.download-links { |
|
margin: 20px 0; |
|
} |
|
|
|
.download-link { |
|
display: inline-block; |
|
padding: 10px 20px; |
|
background-color: #2c3e50; |
|
color: white; |
|
border-radius: 3px; |
|
margin: 0 10px 10px 0; |
|
font-family: Georgia, Times, serif; |
|
} |
|
|
|
.download-link:hover { |
|
background-color: #34495e; |
|
text-decoration: none; |
|
} |
|
""") |
|
|
|
app = FastHTML(hdrs=(css, MarkdownJS(), |
|
HighlightJS( |
|
langs=['python', 'javascript', 'html', 'css']))) |
|
|
|
|
|
|
|
@app.on_event("startup") |
|
async def start_scheduler(): |
|
scheduler.start() |
|
|
|
|
|
|
|
@app.on_event("shutdown") |
|
async def shutdown_scheduler(): |
|
scheduler.shutdown() |
|
|
|
|
|
def get_newsletter_list(): |
|
|
|
files = api.list_repo_files(repo_id=DATASET_NAME, repo_type="dataset") |
|
newsletters = [f for f in files if f.endswith('newsletter.json')] |
|
return sorted(newsletters, reverse=True) |
|
|
|
|
|
def get_newsletter_content(path): |
|
|
|
content = api.hf_hub_download(repo_id=DATASET_NAME, |
|
filename=path, |
|
repo_type="dataset") |
|
with open(content, 'r') as f: |
|
return json.load(f) |
|
|
|
|
|
def check_format_exists(date: str, format: str) -> bool: |
|
"""Check if a specific format exists for a given date""" |
|
try: |
|
api.hf_hub_download( |
|
repo_id=DATASET_NAME, |
|
filename=f"{date}/newsletter.{format}", |
|
repo_type="dataset" |
|
) |
|
return True |
|
except Exception: |
|
return False |
|
|
|
|
|
@app.get("/") |
|
def index(): |
|
newsletters = get_newsletter_list() |
|
links = [ |
|
Li( |
|
A(datetime.strptime(n.split('/')[0], '%Y%m%d').strftime('%B %d, %Y'), |
|
href=f"/newsletter/{n.split('/')[0]}")) for n in newsletters |
|
] |
|
return Titled("This Week in Rheumatology", H2("Available Newsletters"), |
|
Ul(*links)) |
|
|
|
|
|
@app.get("/newsletter/{date}") |
|
def newsletter(date: str): |
|
path = f"{date}/newsletter.json" |
|
try: |
|
content = get_newsletter_content(path) |
|
|
|
|
|
download_links = [] |
|
|
|
|
|
if check_format_exists(date, "pdf"): |
|
download_links.append( |
|
A("Download PDF", href=f"/download/{date}/pdf", cls="download-link") |
|
) |
|
|
|
|
|
if check_format_exists(date, "epub"): |
|
download_links.append( |
|
A("Download EPUB", href=f"/download/{date}/epub", cls="download-link") |
|
) |
|
|
|
return Titled( |
|
f"This Week in Rheumatology - {content['date']}", |
|
A("Back to Index", href="/"), |
|
Div(*download_links, cls="download-links"), |
|
Div(content['content'], cls="marked")) |
|
except Exception as e: |
|
return Titled("Error", H2("Newsletter not found"), |
|
P(f"Unable to load newsletter for date: {date}"), |
|
A("Back to Index", href="/")) |
|
|
|
|
|
@app.get("/download/{date}/{format}") |
|
def download_file(date: str, format: str): |
|
try: |
|
file_path = f"{date}/newsletter.{format}" |
|
content = api.hf_hub_download(repo_id=DATASET_NAME, |
|
filename=file_path, |
|
repo_type="dataset") |
|
|
|
|
|
if format == "pdf": |
|
media_type = "application/pdf" |
|
elif format == "epub": |
|
media_type = "application/epub+zip" |
|
else: |
|
raise ValueError(f"Unsupported format: {format}") |
|
|
|
return FileResponse(content, |
|
media_type=media_type, |
|
filename=f"newsletter_{date}.{format}") |
|
except Exception as e: |
|
return Titled("Error", H2(f"{format.upper()} not found"), |
|
P(f"Unable to load {format.upper()} for date: {date}"), |
|
A("Back to Index", href="/")) |
|
|
|
serve() |
|
|