Ali2206 commited on
Commit
97e2e8f
·
verified ·
1 Parent(s): 6eca372

Update api/routes/pdf.py

Browse files
Files changed (1) hide show
  1. api/routes/pdf.py +213 -76
api/routes/pdf.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import APIRouter, HTTPException, Depends, Response
2
  from db.mongo import patients_collection
3
  from core.security import get_current_user
4
  from utils.helpers import calculate_age, escape_latex_special_chars, hyphenate_long_strings, format_timestamp
@@ -10,6 +10,13 @@ import subprocess
10
  from tempfile import TemporaryDirectory
11
  from string import Template
12
  import logging
 
 
 
 
 
 
 
13
 
14
  # Configure logging
15
  logging.basicConfig(
@@ -20,26 +27,35 @@ logger = logging.getLogger(__name__)
20
 
21
  router = APIRouter()
22
 
23
- @router.get("/{patient_id}/pdf", response_class=Response)
24
- async def generate_patient_pdf(patient_id: str, current_user: dict = Depends(get_current_user)):
25
- # Suppress logging for this route
26
- logger.setLevel(logging.CRITICAL)
27
 
 
 
28
  try:
29
- if current_user.get('role') not in ['doctor', 'admin']:
30
- raise HTTPException(status_code=403, detail="Only clinicians can generate patient PDFs")
31
-
32
- # Determine if patient_id is ObjectId or fhir_id
33
- try:
34
- obj_id = ObjectId(patient_id)
35
- query = {"$or": [{"_id": obj_id}, {"fhir_id": patient_id}]}
36
- except InvalidId:
37
- query = {"fhir_id": patient_id}
38
-
39
- patient = await patients_collection.find_one(query)
40
- if not patient:
41
- raise HTTPException(status_code=404, detail="Patient not found")
 
 
 
 
42
 
 
 
 
43
  # Prepare table content with proper LaTeX formatting
44
  def prepare_table_content(items, columns, default_message):
45
  if not items:
@@ -54,20 +70,17 @@ async def generate_patient_pdf(patient_id: str, current_user: dict = Depends(get
54
  content.append(" & ".join(row) + " \\\\")
55
  return "\n".join(content)
56
 
57
- # Notes table
58
- notes = patient.get("notes", [])
59
  notes_content = prepare_table_content(
60
  [{
61
  "date": format_timestamp(n.get("date", "")),
62
  "type": n.get("type", ""),
63
  "text": n.get("text", "")
64
- } for n in notes],
65
  3,
66
  "No notes available"
67
  )
68
 
69
- # Conditions table
70
- conditions = patient.get("conditions", [])
71
  conditions_content = prepare_table_content(
72
  [{
73
  "id": c.get("id", ""),
@@ -75,13 +88,11 @@ async def generate_patient_pdf(patient_id: str, current_user: dict = Depends(get
75
  "status": c.get("status", ""),
76
  "onset": format_timestamp(c.get("onset_date", "")),
77
  "verification": c.get("verification_status", "")
78
- } for c in conditions],
79
  5,
80
  "No conditions available"
81
  )
82
 
83
- # Medications table
84
- medications = patient.get("medications", [])
85
  medications_content = prepare_table_content(
86
  [{
87
  "id": m.get("id", ""),
@@ -89,13 +100,11 @@ async def generate_patient_pdf(patient_id: str, current_user: dict = Depends(get
89
  "status": m.get("status", ""),
90
  "date": format_timestamp(m.get("prescribed_date", "")),
91
  "dosage": m.get("dosage", "")
92
- } for m in medications],
93
  5,
94
  "No medications available"
95
  )
96
 
97
- # Encounters table
98
- encounters = patient.get("encounters", [])
99
  encounters_content = prepare_table_content(
100
  [{
101
  "id": e.get("id", ""),
@@ -103,12 +112,12 @@ async def generate_patient_pdf(patient_id: str, current_user: dict = Depends(get
103
  "status": e.get("status", ""),
104
  "start": format_timestamp(e.get("period", {}).get("start", "")),
105
  "provider": e.get("service_provider", "")
106
- } for e in encounters],
107
  5,
108
  "No encounters available"
109
  )
110
 
111
- # LaTeX template with improved table formatting
112
  latex_template = Template(r"""
113
  \documentclass[a4paper,12pt]{article}
114
  \usepackage[utf8]{inputenc}
@@ -181,16 +190,14 @@ $encounters
181
  \end{document}
182
  """)
183
 
184
- # Set the generated_on date to 02:54 PM CET, May 17, 2025
185
- generated_on = datetime.strptime("2025-05-17 14:54:00+02:00", "%Y-%m-%d %H:%M:%S%z").strftime("%A, %B %d, %Y at %I:%M %p %Z")
186
-
187
  latex_filled = latex_template.substitute(
188
- generated_on=generated_on,
189
- fhir_id=escape_latex_special_chars(hyphenate_long_strings(patient.get("fhir_id", "") or "")),
190
  full_name=escape_latex_special_chars(patient.get("full_name", "") or ""),
191
  gender=escape_latex_special_chars(patient.get("gender", "") or ""),
192
  dob=escape_latex_special_chars(patient.get("date_of_birth", "") or ""),
193
- age=escape_latex_special_chars(str(calculate_age(patient.get("date_of_birth", "")) or "N/A")),
194
  address=escape_latex_special_chars(", ".join(filter(None, [
195
  patient.get("address", ""),
196
  patient.get("city", ""),
@@ -206,7 +213,7 @@ $encounters
206
  encounters=encounters_content
207
  )
208
 
209
- # Compile LaTeX in a temporary directory
210
  with TemporaryDirectory() as tmpdir:
211
  tex_path = os.path.join(tmpdir, "report.tex")
212
  pdf_path = os.path.join(tmpdir, "report.pdf")
@@ -214,55 +221,185 @@ $encounters
214
  with open(tex_path, "w", encoding="utf-8") as f:
215
  f.write(latex_filled)
216
 
217
- try:
218
- # Run latexmk twice to ensure proper table rendering
219
- for _ in range(2):
220
- result = subprocess.run(
221
- ["latexmk", "-pdf", "-interaction=nonstopmode", tex_path],
222
- cwd=tmpdir,
223
- check=False,
224
- capture_output=True,
225
- text=True
226
- )
227
 
228
  if result.returncode != 0:
229
- raise HTTPException(
230
- status_code=500,
231
- detail=f"LaTeX compilation failed: stdout={result.stdout}, stderr={result.stderr}"
232
- )
233
 
234
- except subprocess.CalledProcessError as e:
235
- raise HTTPException(
236
- status_code=500,
237
- detail=f"LaTeX compilation failed: stdout={e.stdout}, stderr={e.stderr}"
238
- )
239
 
240
- if not os.path.exists(pdf_path):
241
- raise HTTPException(
242
- status_code=500,
243
- detail="PDF file was not generated"
244
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
- with open(pdf_path, "rb") as f:
247
- pdf_bytes = f.read()
 
 
 
 
 
 
 
 
248
 
249
- response = Response(
250
- content=pdf_bytes,
251
- media_type="application/pdf",
252
- headers={"Content-Disposition": f"attachment; filename=patient_{patient.get('fhir_id', 'unknown')}_report.pdf"}
253
- )
254
- return response
 
 
 
 
 
 
 
 
255
 
256
- except HTTPException as http_error:
257
- raise http_error
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  except Exception as e:
259
  raise HTTPException(
260
  status_code=500,
261
- detail=f"Unexpected error generating PDF: {str(e)}"
262
  )
263
- finally:
264
- # Restore the logger level for other routes
265
- logger.setLevel(logging.INFO)
266
 
267
  # Export the router as 'pdf' for api.__init__.py
268
  pdf = router
 
1
+ from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
2
  from db.mongo import patients_collection
3
  from core.security import get_current_user
4
  from utils.helpers import calculate_age, escape_latex_special_chars, hyphenate_long_strings, format_timestamp
 
10
  from tempfile import TemporaryDirectory
11
  from string import Template
12
  import logging
13
+ import asyncio
14
+ import aiohttp
15
+ import json
16
+ from typing import List, Dict, Optional
17
+ from pymongo import MongoClient
18
+ from pymongo.errors import PyMongoError
19
+ from pymongo.change_stream import CollectionChangeStream
20
 
21
  # Configure logging
22
  logging.basicConfig(
 
27
 
28
  router = APIRouter()
29
 
30
+ # Configuration
31
+ FILE_IO_API_URL = "https://file.io"
32
+ FILE_IO_EXPIRATION = "1w" # 1 week expiration
33
+ PDF_METADATA_COLLECTION = "pdf_metadata" # Collection to store file.io links
34
 
35
+ async def upload_to_fileio(file_bytes: bytes, filename: str) -> Optional[Dict]:
36
+ """Upload a file to file.io and return the response"""
37
  try:
38
+ async with aiohttp.ClientSession() as session:
39
+ form_data = aiohttp.FormData()
40
+ form_data.add_field('file', file_bytes, filename=filename)
41
+ form_data.add_field('expires', FILE_IO_EXPIRATION)
42
+
43
+ async with session.post(FILE_IO_API_URL, data=form_data) as response:
44
+ if response.status == 200:
45
+ data = await response.json()
46
+ if data.get('success'):
47
+ return data
48
+ logger.error(f"File.io upload failed: {data.get('message')}")
49
+ else:
50
+ logger.error(f"File.io upload failed with status {response.status}")
51
+ return None
52
+ except Exception as e:
53
+ logger.error(f"Error uploading to file.io: {str(e)}")
54
+ return None
55
 
56
+ async def generate_pdf_bytes(patient: dict) -> Optional[bytes]:
57
+ """Generate PDF bytes for a patient"""
58
+ try:
59
  # Prepare table content with proper LaTeX formatting
60
  def prepare_table_content(items, columns, default_message):
61
  if not items:
 
70
  content.append(" & ".join(row) + " \\\\")
71
  return "\n".join(content)
72
 
73
+ # Prepare all table contents
 
74
  notes_content = prepare_table_content(
75
  [{
76
  "date": format_timestamp(n.get("date", "")),
77
  "type": n.get("type", ""),
78
  "text": n.get("text", "")
79
+ } for n in patient.get("notes", [])],
80
  3,
81
  "No notes available"
82
  )
83
 
 
 
84
  conditions_content = prepare_table_content(
85
  [{
86
  "id": c.get("id", ""),
 
88
  "status": c.get("status", ""),
89
  "onset": format_timestamp(c.get("onset_date", "")),
90
  "verification": c.get("verification_status", "")
91
+ } for c in patient.get("conditions", [])],
92
  5,
93
  "No conditions available"
94
  )
95
 
 
 
96
  medications_content = prepare_table_content(
97
  [{
98
  "id": m.get("id", ""),
 
100
  "status": m.get("status", ""),
101
  "date": format_timestamp(m.get("prescribed_date", "")),
102
  "dosage": m.get("dosage", "")
103
+ } for m in patient.get("medications", [])],
104
  5,
105
  "No medications available"
106
  )
107
 
 
 
108
  encounters_content = prepare_table_content(
109
  [{
110
  "id": e.get("id", ""),
 
112
  "status": e.get("status", ""),
113
  "start": format_timestamp(e.get("period", {}).get("start", "")),
114
  "provider": e.get("service_provider", "")
115
+ } for e in patient.get("encounters", [])],
116
  5,
117
  "No encounters available"
118
  )
119
 
120
+ # LaTeX template
121
  latex_template = Template(r"""
122
  \documentclass[a4paper,12pt]{article}
123
  \usepackage[utf8]{inputenc}
 
190
  \end{document}
191
  """)
192
 
193
+ # Fill template with patient data
 
 
194
  latex_filled = latex_template.substitute(
195
+ generated_on=datetime.now().strftime("%A, %B %d, %Y at %I:%M %p %Z"),
196
+ fhir_id=escape_latex_special_chars(hyphenate_long_strings(patient.get("fhir_id", "") or ""),
197
  full_name=escape_latex_special_chars(patient.get("full_name", "") or ""),
198
  gender=escape_latex_special_chars(patient.get("gender", "") or ""),
199
  dob=escape_latex_special_chars(patient.get("date_of_birth", "") or ""),
200
+ age=escape_latex_special_chars(str(calculate_age(patient.get("date_of_birth", "")) or "N/A"),
201
  address=escape_latex_special_chars(", ".join(filter(None, [
202
  patient.get("address", ""),
203
  patient.get("city", ""),
 
213
  encounters=encounters_content
214
  )
215
 
216
+ # Compile LaTeX to PDF
217
  with TemporaryDirectory() as tmpdir:
218
  tex_path = os.path.join(tmpdir, "report.tex")
219
  pdf_path = os.path.join(tmpdir, "report.pdf")
 
221
  with open(tex_path, "w", encoding="utf-8") as f:
222
  f.write(latex_filled)
223
 
224
+ # Run latexmk twice to ensure proper table rendering
225
+ for _ in range(2):
226
+ result = subprocess.run(
227
+ ["latexmk", "-pdf", "-interaction=nonstopmode", tex_path],
228
+ cwd=tmpdir,
229
+ check=False,
230
+ capture_output=True,
231
+ text=True
232
+ )
 
233
 
234
  if result.returncode != 0:
235
+ logger.error(f"LaTeX compilation failed: {result.stderr}")
236
+ return None
 
 
237
 
238
+ if os.path.exists(pdf_path):
239
+ with open(pdf_path, "rb") as f:
240
+ return f.read()
241
+ return None
 
242
 
243
+ except Exception as e:
244
+ logger.error(f"Error generating PDF bytes: {str(e)}")
245
+ return None
246
+
247
+ async def generate_and_upload_pdf(patient: dict) -> Optional[Dict]:
248
+ """Generate PDF and upload to file.io, returning metadata"""
249
+ try:
250
+ # Generate PDF bytes
251
+ pdf_bytes = await generate_pdf_bytes(patient)
252
+ if not pdf_bytes:
253
+ logger.error(f"Failed to generate PDF for patient {patient.get('fhir_id')}")
254
+ return None
255
+
256
+ # Create filename
257
+ patient_name = patient.get("full_name", "unknown").replace(" ", "_").lower()
258
+ patient_id = patient.get("fhir_id", "unknown")
259
+ filename = f"patient_{patient_id}_{patient_name}_report.pdf"
260
+
261
+ # Upload to file.io
262
+ upload_response = await upload_to_fileio(pdf_bytes, filename)
263
+ if not upload_response:
264
+ logger.error(f"Failed to upload PDF for patient {patient.get('fhir_id')}")
265
+ return None
266
 
267
+ # Prepare metadata
268
+ metadata = {
269
+ "patient_id": patient.get("fhir_id"),
270
+ "patient_name": patient.get("full_name"),
271
+ "file_key": upload_response.get('key'),
272
+ "file_url": upload_response.get('link'),
273
+ "expires_at": upload_response.get('expires'),
274
+ "generated_at": datetime.utcnow(),
275
+ "filename": filename
276
+ }
277
 
278
+ # Store metadata in MongoDB
279
+ db = patients_collection.database
280
+ await db[PDF_METADATA_COLLECTION].update_one(
281
+ {"patient_id": patient.get("fhir_id")},
282
+ {"$set": metadata},
283
+ upsert=True
284
+ )
285
+
286
+ logger.info(f"Successfully uploaded PDF for patient {patient.get('fhir_id')}")
287
+ return metadata
288
+
289
+ except Exception as e:
290
+ logger.error(f"Error in generate_and_upload_pdf: {str(e)}")
291
+ return None
292
 
293
+ async def generate_all_patient_pdfs() -> List[Dict]:
294
+ """Generate and upload PDFs for all patients"""
295
+ generated_files = []
296
+ try:
297
+ cursor = patients_collection.find({})
298
+ patients = await cursor.to_list(length=None)
299
+
300
+ if not patients:
301
+ logger.warning("No patients found in the database")
302
+ return []
303
+
304
+ logger.info(f"Starting PDF generation for {len(patients)} patients")
305
+
306
+ # Process patients in batches
307
+ batch_size = 5 # Smaller batch size for API rate limiting
308
+ for i in range(0, len(patients), batch_size):
309
+ batch = patients[i:i + batch_size]
310
+ tasks = [generate_and_upload_pdf(patient) for patient in batch]
311
+ results = await asyncio.gather(*tasks)
312
+
313
+ for result in results:
314
+ if result:
315
+ generated_files.append(result)
316
+
317
+ logger.info(f"Processed batch {i//batch_size + 1}/{(len(patients)-1)//batch_size + 1}")
318
+ await asyncio.sleep(1) # Brief pause between batches
319
+
320
+ logger.info(f"Successfully processed {len(generated_files)} patients")
321
+ return generated_files
322
+
323
+ except Exception as e:
324
+ logger.error(f"Error in generate_all_patient_pdfs: {str(e)}")
325
+ return generated_files
326
+
327
+ async def watch_for_new_patients():
328
+ """Watch MongoDB change stream for new patients and generate PDFs"""
329
+ try:
330
+ logger.info("Starting MongoDB change stream watcher for new patients")
331
+
332
+ # Get the database from the collection
333
+ db = patients_collection.database
334
+
335
+ # Open a change stream on the patients collection
336
+ pipeline = [{'$match': {'operationType': 'insert'}}]
337
+
338
+ while True:
339
+ try:
340
+ async with patients_collection.watch(pipeline) as stream:
341
+ async for change in stream:
342
+ try:
343
+ patient = change['fullDocument']
344
+ logger.info(f"New patient detected: {patient.get('fhir_id')}")
345
+
346
+ # Generate and upload PDF
347
+ result = await generate_and_upload_pdf(patient)
348
+ if result:
349
+ logger.info(f"Generated PDF for new patient {patient.get('fhir_id')}")
350
+ else:
351
+ logger.error(f"Failed to generate PDF for new patient {patient.get('fhir_id')}")
352
+
353
+ except Exception as e:
354
+ logger.error(f"Error processing change stream event: {str(e)}")
355
+
356
+ except PyMongoError as e:
357
+ logger.error(f"MongoDB change stream error: {str(e)}")
358
+ await asyncio.sleep(5) # Wait before reconnecting
359
+
360
+ except Exception as e:
361
+ logger.error(f"Fatal error in watch_for_new_patients: {str(e)}")
362
+
363
+ @router.on_event("startup")
364
+ async def startup_event():
365
+ """Start background tasks on application startup"""
366
+ # Start the change stream watcher
367
+ asyncio.create_task(watch_for_new_patients())
368
+
369
+ @router.post("/generate-all-pdfs", status_code=202)
370
+ async def trigger_pdf_generation(
371
+ background_tasks: BackgroundTasks,
372
+ current_user: dict = Depends(get_current_user)
373
+ ):
374
+ """Trigger background task to generate PDFs for all patients"""
375
+ if current_user.get('role') not in ['admin', 'doctor']:
376
+ raise HTTPException(
377
+ status_code=403,
378
+ detail="Only administrators and doctors can generate PDFs"
379
+ )
380
+
381
+ background_tasks.add_task(generate_all_patient_pdfs)
382
+ return {"status": "accepted", "message": "PDF generation started in the background"}
383
+
384
+ @router.get("/list-pdf-links", response_model=List[Dict])
385
+ async def list_pdf_links(current_user: dict = Depends(get_current_user)):
386
+ """List all stored PDF metadata"""
387
+ if current_user.get('role') not in ['admin', 'doctor']:
388
+ raise HTTPException(
389
+ status_code=403,
390
+ detail="Only administrators and doctors can list PDFs"
391
+ )
392
+
393
+ try:
394
+ db = patients_collection.database
395
+ cursor = db[PDF_METADATA_COLLECTION].find({})
396
+ pdfs = await cursor.to_list(length=None)
397
+ return pdfs
398
  except Exception as e:
399
  raise HTTPException(
400
  status_code=500,
401
+ detail=f"Error listing PDF files: {str(e)}"
402
  )
 
 
 
403
 
404
  # Export the router as 'pdf' for api.__init__.py
405
  pdf = router