coldn00dl3s commited on
Commit
22ea3d2
1 Parent(s): a628181

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -12
app.py CHANGED
@@ -3,7 +3,8 @@ import pandas as pd
3
  import requests
4
  import json
5
  from datetime import datetime
6
- from PyPDF2 import PdfReader
 
7
 
8
  st.title("FHIR Converter")
9
 
@@ -125,31 +126,27 @@ elif conversion_type == "Clinical Notes to FHIR":
125
  if uploaded_pdf is not None and st.button("Convert"):
126
  try:
127
  # Extract text from PDF
128
- pdf_reader = PdfReader(uploaded_pdf)
129
- paragraphs = []
130
- for page_num in range(len(pdf_reader.pages)):
131
- page = pdf_reader.pages[page_num]
132
- text = page.extract_text()
133
- paragraphs.extend(text.split("\n\n")) # Split by double newline for paragraph separation
134
-
135
  # Send paragraphs to API
136
- total_paragraphs = len(paragraphs)
137
  start_time = datetime.now()
138
  result_text = ""
139
 
140
- for idx, paragraph in enumerate(paragraphs):
141
  data = {"note_content": paragraph}
142
  response = requests.post("https://fhir-api-9jsn.onrender.com/convert_notes", json=data)
143
 
144
  if response.status_code == 200:
145
  result_text += response.json().get("output", "") + "\n"
146
  else:
147
- st.error(f"An error occurred at paragraph {idx + 1}: {response.json().get('error', 'Unknown error')}")
148
  break
149
 
150
  # Update progress
151
  elapsed_time = datetime.now() - start_time
152
- progress_bar.progress(int(((idx + 1) / total_paragraphs) * 100))
153
  time_placeholder.text(f"Time elapsed: {elapsed_time}")
154
 
155
  if result_text:
 
3
  import requests
4
  import json
5
  from datetime import datetime
6
+ from pdfminer.high_level import extract_text
7
+ import regex as re
8
 
9
  st.title("FHIR Converter")
10
 
 
126
  if uploaded_pdf is not None and st.button("Convert"):
127
  try:
128
  # Extract text from PDF
129
+ full_text = extract_text(uploaded_pdf)
130
+ pattern = r'(?<!\n)\n\n(?![@#\$%\^&\*\(\)\[\]\{\};:,\.])'
131
+ paragraphs = re.split(pattern, full_text)
 
 
 
 
132
  # Send paragraphs to API
133
+ total_paragraphs = len(paragraphs) - 1
134
  start_time = datetime.now()
135
  result_text = ""
136
 
137
+ for i, paragraph in enumerate(paragraphs, 1):
138
  data = {"note_content": paragraph}
139
  response = requests.post("https://fhir-api-9jsn.onrender.com/convert_notes", json=data)
140
 
141
  if response.status_code == 200:
142
  result_text += response.json().get("output", "") + "\n"
143
  else:
144
+ st.error(f"An error occurred at paragraph {i + 1}: {response.json().get('error', 'Unknown error')}")
145
  break
146
 
147
  # Update progress
148
  elapsed_time = datetime.now() - start_time
149
+ progress_bar.progress(int(((i + 1) / total_paragraphs) * 100))
150
  time_placeholder.text(f"Time elapsed: {elapsed_time}")
151
 
152
  if result_text: