Spaces:

cmcmaster
/

this_week_in_rheumatology

Running

App Files Files Community

this_week_in_rheumatology / main.py

cmcmaster

Update main.py

9ee0be3 verified 7 months ago

raw

history blame contribute delete

5.91 kB

	import json
	import os
	from datetime import datetime, timezone

	from fasthtml.common import *
	from huggingface_hub import HfApi, hf_hub_download
	from starlette.responses import FileResponse
	from generate_newsletter import process_new_papers
	from apscheduler.schedulers.background import BackgroundScheduler
	from apscheduler.triggers.cron import CronTrigger

	# Initialize Hugging Face API
	HF_TOKEN = os.environ.get("HF_TOKEN")
	DATASET_NAME = "cmcmaster/this_week_in_rheumatology"
	api = HfApi(token=HF_TOKEN)

	# Initialize scheduler
	scheduler = BackgroundScheduler()
	# Schedule newsletter generation to run every Monday at 1 AM UTC
	scheduler.add_job(process_new_papers,
	trigger="interval",
	hours=6,
	kwargs={
	'end_date': '{{ (execution_date - timedelta(days=execution_date.weekday() + 1)).strftime("%Y-%m-%d") }}',
	'test': False
	},
	id='generate_newsletter',
	name='Weekly newsletter generation',
	replace_existing=True)

	css = Style("""
	body {
	font-family: Georgia, Times, serif;
	line-height: 1.6;
	color: #333;
	max-width: 800px;
	margin: 0 auto;
	padding: 20px;
	background: #fff;
	}

	h1, h2 {
	color: #2c3e50;
	font-family: Georgia, Times, serif;
	}

	a {
	color: #2c3e50;
	text-decoration: none;
	}

	a:hover {
	text-decoration: underline;
	}

	ul {
	list-style-type: none;
	padding: 0;
	}

	li {
	margin-bottom: 10px;
	}

	.newsletter-content {
	margin-top: 20px;
	}

	.download-links {
	margin: 20px 0;
	}

	.download-link {
	display: inline-block;
	padding: 10px 20px;
	background-color: #2c3e50;
	color: white;
	border-radius: 3px;
	margin: 0 10px 10px 0;
	font-family: Georgia, Times, serif;
	}

	.download-link:hover {
	background-color: #34495e;
	text-decoration: none;
	}
	""")

	app = FastHTML(hdrs=(css, MarkdownJS(),
	HighlightJS(
	langs=['python', 'javascript', 'html', 'css'])))


	# Start the scheduler when the app starts
	@app.on_event("startup")
	async def start_scheduler():
	scheduler.start()


	# Shut down the scheduler when the app stops
	@app.on_event("shutdown")
	async def shutdown_scheduler():
	scheduler.shutdown()


	def get_newsletter_list():
	# Fetch the list of newsletters from the Hugging Face repository
	files = api.list_repo_files(repo_id=DATASET_NAME, repo_type="dataset")
	newsletters = [f for f in files if f.endswith('newsletter.json')]
	return sorted(newsletters, reverse=True)


	def get_newsletter_content(path):
	# Download and parse the newsletter content
	content = api.hf_hub_download(repo_id=DATASET_NAME,
	filename=path,
	repo_type="dataset")
	with open(content, 'r') as f:
	return json.load(f)


	def check_format_exists(date: str, format: str) -> bool:
	"""Check if a specific format exists for a given date"""
	try:
	api.hf_hub_download(
	repo_id=DATASET_NAME,
	filename=f"{date}/newsletter.{format}",
	repo_type="dataset"
	)
	return True
	except Exception:
	return False


	@app.get("/")
	def index():
	newsletters = get_newsletter_list()
	links = [
	Li(
	A(datetime.strptime(n.split('/')[0], '%Y%m%d').strftime('%B %d, %Y'),
	href=f"/newsletter/{n.split('/')[0]}")) for n in newsletters
	]
	return Titled("This Week in Rheumatology", H2("Available Newsletters"),
	Ul(*links))


	@app.get("/newsletter/{date}")
	def newsletter(date: str):
	path = f"{date}/newsletter.json"
	try:
	content = get_newsletter_content(path)

	# Create download links div
	download_links = []

	# Check for PDF
	if check_format_exists(date, "pdf"):
	download_links.append(
	A("Download PDF", href=f"/download/{date}/pdf", cls="download-link")
	)

	# Check for EPUB
	if check_format_exists(date, "epub"):
	download_links.append(
	A("Download EPUB", href=f"/download/{date}/epub", cls="download-link")
	)

	return Titled(
	f"This Week in Rheumatology - {content['date']}",
	A("Back to Index", href="/"),
	Div(*download_links, cls="download-links"),
	Div(content['content'], cls="marked"))
	except Exception as e:
	return Titled("Error", H2("Newsletter not found"),
	P(f"Unable to load newsletter for date: {date}"),
	A("Back to Index", href="/"))


	@app.get("/download/{date}/{format}")
	def download_file(date: str, format: str):
	try:
	file_path = f"{date}/newsletter.{format}"
	content = api.hf_hub_download(repo_id=DATASET_NAME,
	filename=file_path,
	repo_type="dataset")

	# Set appropriate media type and filename
	if format == "pdf":
	media_type = "application/pdf"
	elif format == "epub":
	media_type = "application/epub+zip"
	else:
	raise ValueError(f"Unsupported format: {format}")

	return FileResponse(content,
	media_type=media_type,
	filename=f"newsletter_{date}.{format}")
	except Exception as e:
	return Titled("Error", H2(f"{format.upper()} not found"),
	P(f"Unable to load {format.upper()} for date: {date}"),
	A("Back to Index", href="/"))

	serve()