Corran commited on
Commit
f0b6424
1 Parent(s): a917763

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -0
app.py CHANGED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .utils import GParse_Paper, Get_Bibliography
2
+ from bs4 import BeautifulSoup
3
+ import solara
4
+ from pathlib import Path
5
+
6
+ HERE = Path(__file__).parent
7
+ app_style = (HERE / "style.css").read_text()
8
+
9
+
10
+ def Get_HTMLTop(title):
11
+ # Top part of HTML
12
+ html_top = f"""
13
+ <h1>{title}</h1>
14
+ <span typeof="schema:Person" resource="http://orcid.org/0000-0003-1279-3709">
15
+
16
+ </span>
17
+ """
18
+ return html_top
19
+
20
+ def Get_Controls():
21
+ controls="""
22
+ <label for="textSize">Text Size: </label>
23
+ <select id="textSize" name="textSize" onchange="adjustTextSize(this.value)">
24
+ <option value="10">10px</option>
25
+ <option value="12">12px</option>
26
+ <option value="14">14px</option>
27
+ <option value="16" selected>16px</option>
28
+ <option value="18">18px</option>
29
+ <option value="20">20px</option>
30
+ <option value="24">24px</option>
31
+ <option value="28">28px</option>
32
+ <option value="32">32px</option>
33
+ <option value="36">36px</option>
34
+ <option value="40">40px</option>
35
+ <option value="44">44px</option>
36
+ <option value="48">48px</option>
37
+ <option value="50">50px</option>
38
+ </select>
39
+ <script>
40
+ function adjustTextSize(size) {
41
+ const baseSize = parseInt(size);
42
+ document.body.style.fontSize = baseSize + 'px';
43
+ }
44
+ </script>
45
+
46
+ <script>
47
+ function openDialog(event, dialogId) {
48
+ var dialog = document.getElementById(dialogId);
49
+ var rect = event.target.getBoundingClientRect();
50
+ dialog.style.top = rect.top + window.scrollY + 'px';
51
+ dialog.style.left = rect.left + window.scrollX + 'px';
52
+ dialog.style.display = 'block';
53
+
54
+ // Add an event listener to close the dialog when clicking outside of it
55
+ document.addEventListener('click', function(event) {
56
+ var isClickInside = dialog.contains(event.target);
57
+ var isClickOnText = event.target.classList.contains('text-area');
58
+
59
+ if (!isClickInside && !isClickOnText) {
60
+ closeDialog(dialogId);
61
+ }
62
+ }, { once: true });
63
+ }
64
+
65
+ function closeDialog(dialogId) {
66
+ document.getElementById(dialogId).style.display = 'none';
67
+ }
68
+ </script>
69
+ """
70
+ return Controls
71
+
72
+
73
+ def Get_Sections(soup):
74
+ # Generate sections from divs
75
+ sections_content = ""
76
+ sections_list = []
77
+ raw_text=""
78
+
79
+ bib = Get_Bibliography(soup)
80
+ citation_modals = []
81
+
82
+ for div in soup.find_all("div"):
83
+ header = div.find("head")
84
+ if header is not None:
85
+ section_number = header.get('n', "")
86
+ section_id = header.text.replace(" ", "_")
87
+ sections_list.append({'num': normalize_section(section_number), 'text': section_id})
88
+ sections_content += f"<section id='{section_id}'>"
89
+ sections_content += f"<h2>{section_number} {header.text}</h2>"
90
+ else:
91
+ sections_content += f"<section id=''>"
92
+
93
+ for i,paragraph in enumerate(div.find_all("p")):
94
+ new_paragraph = ""
95
+ for ii,element in enumerate(paragraph.contents):
96
+ if isinstance(element, NavigableString):
97
+ new_paragraph += element
98
+ elif isinstance(element, Tag) and element.name == "ref" and element.get("target")!=None:
99
+ ref_id = element.get("target").lstrip("#")
100
+ if ref_id in bib.keys():
101
+ citation = f"""<span class="text-area" onclick="openDialog(event, '{ref_id}')">{element.text}</span>"""
102
+ new_paragraph += citation
103
+ cit_info = bib[ref_id]
104
+ citation_modals.append(f"""<div id="{ref_id}" class="dialog">
105
+ <b>{element.text}</b><br>
106
+ <b>Title:</b> {cit_info['title']}<br>
107
+ <b>Authors:</b> {", ".join(cit_info['authors'])}<br>
108
+ <b>Year:</b> {cit_info['year']}<br>
109
+ <b>Journal:</b> {cit_info['journal']}<br>
110
+ <b>DOI:</b> <a href="https://doi.org/{cit_info['doi']}">{cit_info['doi']} </a><br>
111
+ <button class="close-button" onclick="closeDialog('{ref_id}')">Close</button>
112
+ </div>""")
113
+ else:
114
+ new_paragraph += element.text
115
+
116
+ sections_content += f"<p>{new_paragraph}</p>"
117
+ raw_text += "\n" + paragraph.text
118
+
119
+ sections_content += "</section>"
120
+ return sections_list, sections_content
121
+
122
+ def Get_Navigation(controls,):
123
+ # Generate navigation for sections
124
+ navigation = "<div class='sticky-content' style='max-height: 100%; overflow-y: auto;'>" + controls + " <h2> Navigation </h2>"
125
+ for section in sections_list:
126
+ no_tabs = section['num'].count(".")
127
+ if no_tabs>0:
128
+ left = str(20*no_tabs)+"px" # Adjust the multiplier for desired tab width
129
+ else:
130
+ left= "0px"
131
+ navigation += f'<p style="margin-left: {left}; font-size: 10px;"><a href="#{section["text"]}">{section["num"]} {section["text"]}</a></p>'
132
+
133
+ navigation += "</div>"
134
+
135
+
136
+
137
+ def Get_Article_HTML(pdf):
138
+
139
+ article = GParse_Paper(pdf)
140
+
141
+ soup = BeautifulSoup(article, "xml")
142
+
143
+ html = Get_Article_HTML(soup)
144
+
145
+ try:
146
+ document_title = soup.find("fileDesc").find("title").text
147
+ except:
148
+ document_title = ""
149
+
150
+ html_top = Get_HTMLTop(document_title)
151
+ sections_list, sections_content = Get_Sections(soup)
152
+ controls = Get_Controls()
153
+ navigation = Get_Navigation(controls, sections_list)
154
+
155
+ # Combine all parts into final HTML
156
+ html = style + "<body><article>" + html_top + sections_content + navigation + "n".join(citation_modals)+"</body></article>"
157
+
158
+ with open("article_demo.html","w") as f:
159
+ f.write(html)
160
+
161
+ return html
162
+
163
+
164
+ html = solara.reactive("")
165
+
166
+ @solara.component
167
+ def Page():
168
+ solara.Style(app_style)
169
+
170
+ def on_file():
171
+ html.value = Get_Article_HTML(f["file_obj"].read())
172
+
173
+ dropzone = solara.FileDrop(label="Drag and drop article pdf", on_file=on_file, lazy=True)
174
+
175
+ solara.HTML(html)