samyak152002 commited on
Commit
97905e6
·
verified ·
1 Parent(s): eb90936

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -347
app.py CHANGED
@@ -4,386 +4,221 @@ import fitz # PyMuPDF
4
  from pdfminer.high_level import extract_text
5
  from pdfminer.layout import LAParams
6
  import language_tool_python
7
- from typing import List, Dict, Any, Tuple
8
  from collections import Counter
9
  import json
10
  import traceback
11
  import io
12
  import tempfile
13
  import os
14
-
 
15
 
16
  # Set JAVA_HOME environment variable
17
  os.environ['JAVA_HOME'] = '/usr/lib/jvm/java-11-openjdk-amd64'
18
 
19
- # Optional: Verify Java installation
20
- # try:
21
- # java_version = subprocess.check_output(['java', '-version'], stderr=subprocess.STDOUT).decode()
22
- # st.write(f"Java Version: {java_version}")
23
- # except Exception as e:
24
- # st.error("Java is not installed correctly.")
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  # ------------------------------
27
- # Analysis Functions
28
  # ------------------------------
29
 
30
  def extract_pdf_text_by_page(file) -> List[str]:
31
  """Extracts text from a PDF file, page by page, using PyMuPDF."""
32
- file.seek(0)
33
- with fitz.open(stream=file.read(), filetype="pdf") as doc:
34
- return [page.get_text("text") for page in doc]
 
 
 
35
 
36
  def extract_pdf_text(file) -> str:
37
  """Extracts text from a PDF file using pdfminer."""
38
- file.seek(0)
39
- return extract_text(file, laparams=LAParams())
 
40
 
41
- def check_text_presence(full_text: str, search_terms: List[str]) -> Dict[str, bool]:
42
- """Checks for the presence of required terms in the text."""
43
- return {term: term.lower() in full_text.lower() for term in search_terms}
44
 
45
- def label_authors(full_text: str) -> str:
46
- """Label authors in the text with 'Authors:' if not already labeled."""
47
- author_line_regex = r"^(?:.*\n)(.*?)(?:\n\n)"
48
- match = re.search(author_line_regex, full_text, re.MULTILINE)
49
- if match:
50
- authors = match.group(1).strip()
51
- return full_text.replace(authors, f"Authors: {authors}")
52
- return full_text
53
-
54
- def check_metadata(full_text: str) -> Dict[str, Any]:
55
- """Check for metadata elements."""
56
- return {
57
- "author_email": bool(re.search(r'\b[\w.-]+?@\w+?\.\w+?\b', full_text)),
58
- "list_of_authors": bool(re.search(r'Authors?:', full_text, re.IGNORECASE)),
59
- "keywords_list": bool(re.search(r'Keywords?:', full_text, re.IGNORECASE)),
60
- "word_count": len(full_text.split()) or "Missing"
61
- }
62
-
63
- def check_disclosures(full_text: str) -> Dict[str, bool]:
64
- """Check for disclosure statements."""
65
- search_terms = [
66
- "author contributions statement",
67
- "conflict of interest statement",
68
- "ethics statement",
69
- "funding statement",
70
- "data access statement"
71
- ]
72
- return check_text_presence(full_text, search_terms)
73
-
74
- def check_figures_and_tables(full_text: str) -> Dict[str, bool]:
75
- """Check for figures and tables."""
76
- return {
77
- "figures_with_citations": bool(re.search(r'Figure \d+.*?citation', full_text, re.IGNORECASE)),
78
- "figures_legends": bool(re.search(r'Figure \d+.*?legend', full_text, re.IGNORECASE)),
79
- "tables_legends": bool(re.search(r'Table \d+.*?legend', full_text, re.IGNORECASE))
80
- }
81
 
82
- def check_references(full_text: str) -> Dict[str, Any]:
83
- """Check for references."""
84
- return {
85
- "old_references": bool(re.search(r'\b19[0-9]{2}\b', full_text)),
86
- "citations_in_abstract": bool(re.search(r'\b(citation|reference)\b', full_text[:1000], re.IGNORECASE)),
87
- "reference_count": len(re.findall(r'\[.*?\]', full_text)),
88
- "self_citations": bool(re.search(r'Self-citation', full_text, re.IGNORECASE))
89
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
- def check_structure(full_text: str) -> Dict[str, bool]:
92
- """Check document structure."""
93
- return {
94
- "imrad_structure": all(section in full_text for section in ["Introduction", "Methods", "Results", "Discussion"]),
95
- "abstract_structure": "structured abstract" in full_text.lower()
96
- }
97
 
98
- def check_language_issues(full_text: str) -> Dict[str, Any]:
99
- """Check for issues with capitalization, hyphenation, punctuation, spacing, etc."""
100
- language_tool = language_tool_python.LanguageTool('en-US')
101
- matches = language_tool.check(full_text)
102
- word_count = len(full_text.split())
103
- issues_count = len(matches)
104
- issues_per_1000 = (issues_count / word_count) * 1000 if word_count else 0
105
 
106
- serializable_matches = [
107
- {
108
- "message": match.message,
109
- "replacements": match.replacements,
110
- "offset": match.offset,
111
- "errorLength": match.errorLength,
112
- "category": match.category,
113
- "ruleIssueType": match.ruleIssueType,
114
- "sentence": match.sentence
115
  }
116
- for match in matches
117
- ]
118
-
119
- return {
120
- "issues_count": issues_count,
121
- "issues_per_1000": issues_per_1000,
122
- "failed": issues_per_1000 > 20,
123
- "matches": serializable_matches
124
- }
125
-
126
- def check_language(full_text: str) -> Dict[str, Any]:
127
- """Check language quality."""
128
- return {
129
- "plain_language": bool(re.search(r'plain language summary', full_text, re.IGNORECASE)),
130
- "readability_issues": False, # Placeholder for future implementation
131
- "language_issues": check_language_issues(full_text)
132
- }
133
-
134
- def check_figure_order(full_text: str) -> Dict[str, Any]:
135
- """Check if figures are referred to in sequential order."""
136
- figure_pattern = r'(?:Fig(?:ure)?\.?|Figure)\s*(\d+)'
137
- figure_references = re.findall(figure_pattern, full_text, re.IGNORECASE)
138
- figure_numbers = sorted(set(int(num) for num in figure_references))
139
-
140
- is_sequential = all(a + 1 == b for a, b in zip(figure_numbers, figure_numbers[1:]))
141
 
142
- if figure_numbers:
143
- expected_figures = set(range(1, max(figure_numbers) + 1))
144
- missing_figures = list(expected_figures - set(figure_numbers))
145
- else:
146
- missing_figures = None
147
 
148
- duplicates = [num for num, count in Counter(figure_references).items() if count > 1]
149
- duplicate_numbers = [int(num) for num in duplicates]
150
- not_mentioned = list(set(figure_references) - set(duplicates))
 
151
 
152
- return {
153
- "sequential_order": is_sequential,
154
- "figure_count": len(figure_numbers),
155
- "missing_figures": missing_figures,
156
- "figure_order": figure_numbers,
157
- "duplicate_references": duplicates,
158
- "not_mentioned": not_mentioned
159
- }
160
-
161
- def check_reference_order(full_text: str) -> Dict[str, Any]:
162
- """Check if references in the main body text are in order."""
163
- reference_pattern = r'\[(\d+)\]'
164
- references = re.findall(reference_pattern, full_text)
165
- ref_numbers = [int(ref) for ref in references]
 
 
 
 
 
 
 
 
 
 
 
166
 
167
- max_ref = 0
168
- out_of_order = []
169
- for i, ref in enumerate(ref_numbers):
170
- if ref > max_ref + 1:
171
- out_of_order.append((i+1, ref))
172
- max_ref = max(max_ref, ref)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
- all_refs = set(range(1, max_ref + 1))
175
- used_refs = set(ref_numbers)
176
- missing_refs = list(all_refs - used_refs)
 
177
 
178
- return {
179
- "max_reference": max_ref,
180
- "out_of_order": out_of_order,
181
- "missing_references": missing_refs,
182
- "is_ordered": len(out_of_order) == 0 and len(missing_refs) == 0
183
- }
184
-
185
- def check_reference_style(full_text: str) -> Dict[str, Any]:
186
- """Check the reference style used in the paper and identify inconsistencies."""
187
- reference_section_match = re.search(r'References\b([\s\S]*?)(?:\n\S|\Z)', full_text, re.IGNORECASE)
188
- if not reference_section_match:
189
- return {"style": "Unknown", "reason": "References section not found", "inconsistent_refs": []}
190
-
191
- references_text = reference_section_match.group(1)
192
- reference_list = re.split(r'\n(?=\[\d+\]|\d+\.\s|\(\w+,\s*\d{4}\))', references_text)
193
- references = [ref.strip() for ref in reference_list if ref.strip()]
194
-
195
- styles = []
196
- inconsistent_refs = []
197
- patterns = {
198
- "IEEE": r'^\[\d+\]',
199
- "Harvard": r'^[A-Z][a-z]+,?\s[A-Z]\.\s\(?\d{4}\)?',
200
- "APA": r'^[A-Z][a-z]+,?\s[A-Z]\.\s\(?\d{4}\)?',
201
- "MLA": r'^[A-Z][a-z]+,\s[A-Z][a-z]+\.',
202
- "Vancouver": r'^\d+\.\s',
203
- "Chicago": r'^\d+\s[A-Z][a-z]+\s[A-Z]',
204
- }
205
-
206
- for i, ref in enumerate(references, 1):
207
- matched = False
208
- for style, pattern in patterns.items():
209
- if re.match(pattern, ref):
210
- styles.append(style)
211
- matched = True
212
- break
213
- if not matched:
214
- styles.append("Unknown")
215
- inconsistent_refs.append((i, ref, "Unknown"))
216
-
217
- if not styles:
218
- return {"style": "Unknown", "reason": "No references found", "inconsistent_refs": []}
219
-
220
- style_counts = Counter(styles)
221
- majority_style, majority_count = style_counts.most_common(1)[0]
222
-
223
- for i, style in enumerate(styles, 1):
224
- if style != majority_style and style != "Unknown":
225
- inconsistent_refs.append((i, references[i-1], style))
226
-
227
- consistency = majority_count / len(styles)
228
-
229
- return {
230
- "majority_style": majority_style,
231
- "inconsistent_refs": inconsistent_refs,
232
- "consistency": consistency
233
- }
234
-
235
- # ------------------------------
236
- # Annotation Functions
237
- # ------------------------------
238
-
239
- def highlight_text(page, words, text, annotation):
240
- """Highlight text and add annotation."""
241
- text_instances = find_text_instances(words, text)
242
- highlighted = False
243
- for inst in text_instances:
244
- highlight = page.add_highlight_annot(inst)
245
- highlight.update()
246
- comment = page.add_text_annot(inst[:2], annotation)
247
- comment.update()
248
- highlighted = True
249
- return highlighted
250
-
251
- def find_text_instances(words, text):
252
- """Find all instances of text in words."""
253
- text_lower = text.lower()
254
- text_words = text_lower.split()
255
- instances = []
256
- for i in range(len(words) - len(text_words) + 1):
257
- if all(words[i+j][4].lower() == text_words[j] for j in range(len(text_words))):
258
- inst = fitz.Rect(words[i][:4])
259
- for j in range(1, len(text_words)):
260
- inst = inst | fitz.Rect(words[i+j][:4])
261
- instances.append(inst)
262
- return instances
263
-
264
- def highlight_issues_in_pdf(file, inconsistent_refs: List[Tuple[int, str, str]], language_matches: List[Dict[str, Any]]) -> bytes:
265
- """Highlight inconsistent references and add notes for language issues in a single PDF."""
266
- try:
267
- file.seek(0)
268
- doc = fitz.open(stream=file.read(), filetype="pdf")
269
- added_notes = set()
270
-
271
- for page_number, page in enumerate(doc, start=1):
272
- words = page.get_text("words")
273
-
274
- if inconsistent_refs:
275
- for ref_num, ref_text, ref_style in inconsistent_refs:
276
- annotation_text = f"Reference {ref_num}: Inconsistent style ({ref_style}). Should be consolidated to {ref_style}."
277
- highlight_text(page, words, ref_text, annotation_text)
278
-
279
- if language_matches:
280
- for match in language_matches:
281
- issue_text = match['sentence']
282
- error_message = f"{match['message']}\nSuggested correction: {match['replacements'][0] if match['replacements'] else 'No suggestion'}"
283
- issue_key = (issue_text, error_message)
284
-
285
- if issue_key not in added_notes:
286
- if highlight_text(page, words, issue_text, error_message):
287
- added_notes.add(issue_key)
288
-
289
- annotated_pdf_bytes = doc.write()
290
- doc.close()
291
- return annotated_pdf_bytes
292
-
293
- except Exception as e:
294
- print(f"An error occurred while annotating the PDF: {str(e)}")
295
- traceback.print_exc()
296
- return b""
297
-
298
- # ------------------------------
299
- # Main Analysis Function
300
- # ------------------------------
301
-
302
- def analyze_pdf(file) -> Tuple[Dict[str, Any], bytes]:
303
- """
304
- Analyze the uploaded PDF and return analysis results and annotated PDF bytes.
305
 
306
- Returns:
307
- Tuple containing:
308
- - Analysis results as a dictionary.
309
- - Annotated PDF as bytes.
310
- """
311
- try:
312
- # The 'file' is a BytesIO object provided by Streamlit
313
- file.seek(0)
314
- pages_text = extract_pdf_text_by_page(file)
315
- full_text = extract_pdf_text(file)
316
- full_text = label_authors(full_text)
317
-
318
- # Perform analyses
319
- metadata = check_metadata(full_text)
320
- disclosures = check_disclosures(full_text)
321
- figures_and_tables = check_figures_and_tables(full_text)
322
- figure_order = check_figure_order(full_text)
323
- references = check_references(full_text)
324
- reference_order = check_reference_order(full_text)
325
- reference_style = check_reference_style(full_text)
326
- structure = check_structure(full_text)
327
- language = check_language(full_text)
328
-
329
- # Compile results
330
- results = {
331
- "metadata": metadata,
332
- "disclosures": disclosures,
333
- "figures_and_tables": figures_and_tables,
334
- "figure_order": figure_order,
335
- "references": references,
336
- "reference_order": reference_order,
337
- "reference_style": reference_style,
338
- "structure": structure,
339
- "language": language
340
- }
341
-
342
- # Handle annotations
343
- inconsistent_refs = reference_style.get("inconsistent_refs", [])
344
- language_matches = language.get("language_issues", {}).get("matches", [])
345
-
346
- if inconsistent_refs or language_matches:
347
- annotated_pdf_bytes = highlight_issues_in_pdf(file, inconsistent_refs, language_matches)
348
- else:
349
- annotated_pdf_bytes = None
350
-
351
- return results, annotated_pdf_bytes
352
-
353
- except Exception as e:
354
- error_message = {
355
- "error": str(e),
356
- "traceback": traceback.format_exc()
357
- }
358
- return error_message, None
359
-
360
- # ------------------------------
361
- # Streamlit Interface
362
- # ------------------------------
363
 
364
  def main():
365
- st.title("PDF Analyzer")
366
- st.write("Upload a PDF document to analyze its structure, references, language, and more.")
367
-
368
- uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
369
-
 
 
 
 
 
370
  if uploaded_file is not None:
371
- with st.spinner("Analyzing PDF..."):
372
- results, annotated_pdf = analyze_pdf(uploaded_file)
373
-
374
- st.subheader("Analysis Results")
375
- st.json(results)
376
-
377
- if annotated_pdf:
378
- st.subheader("Download Annotated PDF")
379
- st.download_button(
380
- label="Download Annotated PDF",
381
- data=annotated_pdf,
382
- file_name="annotated.pdf",
383
- mime="application/pdf"
384
- )
385
- else:
386
- st.success("No issues found. No annotated PDF to download.")
387
-
388
- if __name__ == "__main__":
389
- main()
 
 
4
  from pdfminer.high_level import extract_text
5
  from pdfminer.layout import LAParams
6
  import language_tool_python
7
+ from typing import List, Dict, Any, Tuple, Optional
8
  from collections import Counter
9
  import json
10
  import traceback
11
  import io
12
  import tempfile
13
  import os
14
+ import base64
15
+ from dataclasses import dataclass
16
 
17
  # Set JAVA_HOME environment variable
18
  os.environ['JAVA_HOME'] = '/usr/lib/jvm/java-11-openjdk-amd64'
19
 
20
+ # ------------------------------
21
+ # Data Classes
22
+ # ------------------------------
23
+
24
+ @dataclass
25
+ class Highlight:
26
+ page: int
27
+ rect: Tuple[float, float, float, float]
28
+ color: str
29
+ message: str
30
+ category: str
31
+
32
+ @dataclass
33
+ class AnalysisResult:
34
+ highlights: List[Highlight]
35
+ messages: List[Dict[str, Any]]
36
+ summary: Dict[str, Any]
37
 
38
  # ------------------------------
39
+ # PDF Processing Functions
40
  # ------------------------------
41
 
42
  def extract_pdf_text_by_page(file) -> List[str]:
43
  """Extracts text from a PDF file, page by page, using PyMuPDF."""
44
+ if isinstance(file, (str, bytes, io.BytesIO)):
45
+ doc = fitz.open(stream=file.read() if hasattr(file, 'read') else file, filetype="pdf")
46
+ text_by_page = [page.get_text("text") for page in doc]
47
+ doc.close()
48
+ return text_by_page
49
+ return []
50
 
51
  def extract_pdf_text(file) -> str:
52
  """Extracts text from a PDF file using pdfminer."""
53
+ if isinstance(file, (str, bytes, io.BytesIO)):
54
+ return extract_text(file, laparams=LAParams())
55
+ return ""
56
 
57
+ # ... (keep all your existing analysis functions) ...
 
 
58
 
59
+ # ------------------------------
60
+ # Highlight Processing Functions
61
+ # ------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ def get_word_coordinates(doc: fitz.Document) -> Dict[int, List[Dict[str, Any]]]:
64
+ """Extract word coordinates from each page of the PDF."""
65
+ word_coordinates = {}
66
+ for page_num, page in enumerate(doc):
67
+ words = page.get_text("words")
68
+ word_coordinates[page_num] = [
69
+ {
70
+ "text": word[4],
71
+ "rect": fitz.Rect(word[:4]),
72
+ "origin": word[5:],
73
+ }
74
+ for word in words
75
+ ]
76
+ return word_coordinates
77
+
78
+ def find_text_location(text: str, word_coordinates: Dict[int, List[Dict[str, Any]]]) -> Optional[Highlight]:
79
+ """Find the location of text in the PDF and return a Highlight object."""
80
+ text_lower = text.lower()
81
+ for page_num, words in word_coordinates.items():
82
+ for i in range(len(words)):
83
+ if words[i]["text"].lower() in text_lower:
84
+ # Find the complete phrase
85
+ rect = words[i]["rect"]
86
+ j = i + 1
87
+ while j < len(words) and j - i < len(text.split()):
88
+ rect = rect | words[j]["rect"]
89
+ j += 1
90
+
91
+ return Highlight(
92
+ page=page_num,
93
+ rect=(rect.x0, rect.y0, rect.x1, rect.y1),
94
+ color="yellow",
95
+ message=text,
96
+ category="text"
97
+ )
98
+ return None
99
 
100
+ # ------------------------------
101
+ # Streamlit Interface
102
+ # ------------------------------
 
 
 
103
 
104
+ def create_sidebar():
105
+ """Create the sidebar with upload and analysis options."""
106
+ st.sidebar.title("PDF Analyzer")
107
+ uploaded_file = st.sidebar.file_uploader("Upload PDF", type=['pdf'])
 
 
 
108
 
109
+ analysis_options = st.sidebar.expander("Analysis Options", expanded=False)
110
+ with analysis_options:
111
+ options = {
112
+ "check_language": st.checkbox("Language Analysis", value=True),
113
+ "check_references": st.checkbox("Reference Analysis", value=True),
114
+ "check_structure": st.checkbox("Structure Analysis", value=True),
 
 
 
115
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ return uploaded_file, options
 
 
 
 
118
 
119
+ def display_pdf_viewer(pdf_bytes: bytes, highlights: List[Highlight]):
120
+ """Display the PDF with highlights using a custom viewer."""
121
+ # Convert PDF bytes to base64
122
+ b64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
123
 
124
+ # Create custom HTML for PDF viewer
125
+ html_content = f"""
126
+ <div style="position: relative; width: 100%; height: 800px;">
127
+ <iframe src="data:application/pdf;base64,{b64_pdf}"
128
+ width="100%"
129
+ height="100%"
130
+ style="border: none;">
131
+ </iframe>
132
+ <div id="highlight-container">
133
+ {generate_highlight_overlays(highlights)}
134
+ </div>
135
+ </div>
136
+ <style>
137
+ .highlight {{
138
+ position: absolute;
139
+ opacity: 0.3;
140
+ pointer-events: all;
141
+ cursor: pointer;
142
+ transition: opacity 0.2s;
143
+ }}
144
+ .highlight:hover {{
145
+ opacity: 0.5;
146
+ }}
147
+ </style>
148
+ """
149
 
150
+ st.components.v1.html(html_content, height=800)
151
+
152
+ def generate_highlight_overlays(highlights: List[Highlight]) -> str:
153
+ """Generate HTML for highlight overlays."""
154
+ overlay_html = ""
155
+ for i, highlight in enumerate(highlights):
156
+ overlay_html += f"""
157
+ <div class="highlight"
158
+ style="left: {highlight.rect[0]}px;
159
+ top: {highlight.rect[1]}px;
160
+ width: {highlight.rect[2] - highlight.rect[0]}px;
161
+ height: {highlight.rect[3] - highlight.rect[1]}px;
162
+ background-color: {highlight.color};"
163
+ onclick="showMessage({i})"
164
+ title="{highlight.message}">
165
+ </div>
166
+ """
167
+ return overlay_html
168
+
169
+ def display_analysis_results(results: AnalysisResult):
170
+ """Display analysis results in the sidebar."""
171
+ st.sidebar.markdown("## Analysis Results")
172
 
173
+ # Display summary statistics
174
+ st.sidebar.markdown("### Summary")
175
+ for key, value in results.summary.items():
176
+ st.sidebar.metric(key, value)
177
 
178
+ # Display messages grouped by category
179
+ messages_by_category = {}
180
+ for message in results.messages:
181
+ category = message.get("category", "Other")
182
+ if category not in messages_by_category:
183
+ messages_by_category[category] = []
184
+ messages_by_category[category].append(message)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
+ for category, messages in messages_by_category.items():
187
+ with st.sidebar.expander(f"{category} ({len(messages)})"):
188
+ for msg in messages:
189
+ st.markdown(f"**{msg['title']}**")
190
+ st.markdown(msg['description'])
191
+ st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  def main():
194
+ st.set_page_config(
195
+ page_title="PDF Analyzer",
196
+ page_icon="📄",
197
+ layout="wide",
198
+ initial_sidebar_state="expanded"
199
+ )
200
+
201
+ # Create sidebar and get user input
202
+ uploaded_file, options = create_sidebar()
203
+
204
  if uploaded_file is not None:
205
+ # Read PDF file
206
+ pdf_bytes = uploaded_file.read()
207
+
208
+ # Analyze PDF
209
+ try:
210
+ results, annotated_pdf = analyze_pdf(io.BytesIO(pdf_bytes))
211
+
212
+ # Create two columns
213
+ col1, col2 = st.columns([0.7, 0.3])
214
+
215
+ with col1:
216
+ st.markdown("### Document Preview")
217
+ # Display PDF with highlights
218
+ if annotated_pdf:
219
+ display_pdf_viewer(annotated_pdf, results.get("highlights", []))
220
+ else:
221
+ display_pdf_viewer(pdf_bytes, [])
222
+
223
+ with col2:
224
+ st.markdown("