File size: 6,210 Bytes
f0b6424 315e4ca 6cab06b 315e4ca 007ba69 d697159 e3c7c31 105978d 315e4ca f0b6424 9537b09 f0b6424 2abddc0 f0b6424 b1646ac f0b6424 6cab06b f0b6424 3c79ebf f0b6424 31cd46f f0b6424 b950b09 f0b6424 63872ab f0b6424 3c79ebf f0b6424 47a068e bdb6fd9 3c79ebf f0b6424 d697159 93c85b9 2110f9e f0b6424 33d6a09 e7287b1 a9f16c1 2110f9e 4301cea d7d6da7 4301cea 89a55c6 677b606 4301cea a15384c af7c477 fb36c61 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
from pathlib import Path
HERE = Path(__file__).parent
import sys
sys.path.append(HERE)
from utils import GParse_Paper, Get_Bibliography, Normalize_Section
import solara
from bs4 import BeautifulSoup
from solara.components.file_drop import FileInfo
from packaging.version import Version, InvalidVersion
from bs4 import NavigableString,Tag
import requests
app_style = (HERE / "style.css").read_text()
def Get_HTMLTop(title):
# Top part of HTML
html_top = f"""
<body>
<h1>{title}</h1>
<span typeof="schema:Person" resource="http://orcid.org/0000-0003-1279-3709">
</span>
"""
return html_top
def Get_Controls():
controls="""
<label for="textSize">Text Size: </label>
<select id="textSize" name="textSize" onchange="adjustTextSize(this.value)">
<option value="10">10px</option>
<option value="12">12px</option>
<option value="14">14px</option>
<option value="16" selected>16px</option>
<option value="18">18px</option>
<option value="20">20px</option>
<option value="24">24px</option>
<option value="28">28px</option>
<option value="32">32px</option>
<option value="36">36px</option>
<option value="40">40px</option>
<option value="44">44px</option>
<option value="48">48px</option>
<option value="50">50px</option>
</select>
<button onclick="toggleOpenDyslexic()">Accessible Font</button>
<button onclick="toggleAccessibleBackground()">Accessible Background</button>
"""
return controls
def Get_Sections(soup):
# Generate sections from divs
sections_content = ""
sections_list = []
raw_text=""
bib = Get_Bibliography(soup)
citation_modals = []
for div in soup.find_all("div"):
header = div.find("head")
if header is not None:
section_number = header.get('n', "")
section_id = header.text.replace(" ", "_")
sections_list.append({'num': Normalize_Section(section_number), 'text': section_id})
sections_content += f"<section id='{section_id}'>"
sections_content += f"<h2>{section_number} {header.text}</h2>"
else:
sections_content += f"<section id=''>"
for i,paragraph in enumerate(div.find_all("p")):
new_paragraph = ""
for ii,element in enumerate(paragraph.contents):
if isinstance(element, NavigableString):
new_paragraph += element
elif isinstance(element, Tag) and element.name == "ref" and element.get("target")!=None:
ref_id = element.get("target").lstrip("#")
if ref_id in bib.keys():
citation = f"""<span class="text-area" onclick="openDialog(event, '{ref_id}')">{element.text}</span>"""
new_paragraph += citation
cit_info = bib[ref_id]
citation_modals.append(f"""<div id="{ref_id}" class="dialog">
<b>{element.text}</b><br>
<b>Title:</b> {cit_info['title']}<br>
<b>Authors:</b> {", ".join(cit_info['authors'])}<br>
<b>Year:</b> {cit_info['year']}<br>
<b>Journal:</b> {cit_info['journal']}<br>
<b>DOI:</b> <a href="https://doi.org/{cit_info['doi']}">{cit_info['doi']} </a><br>
<button class="close-button" onclick="closeDialog('{ref_id}')">Close</button>
</div>""")
else:
new_paragraph += element.text
sections_content += f"<p>{new_paragraph}</p>"
raw_text += "\n" + paragraph.text
sections_content += "</section>"
return sections_list, sections_content, citation_modals
def Get_Navigation(controls,sections_list):
# Generate navigation for sections
navigation = "<div class='sticky-content' style='max-height: 100%; overflow-y: auto;'>" + controls + " <h2> Navigation </h2>"
for section in sections_list:
no_tabs = section['num'].count(".")
if no_tabs>0:
left = str(20*no_tabs)+"px" # Adjust the multiplier for desired tab width
else:
left= "0px"
navigation += f'<p style="margin-left: {left}; font-size: 10px;"><a href="#{section["text"]}">{section["num"]} {section["text"]}</a></p>'
navigation += "</div>"
return navigation
def Get_Article_HTML(pdf):
article = GParse_Paper(pdf)
soup = BeautifulSoup(article, "xml")
try:
document_title = soup.find("fileDesc").find("title").text
except:
document_title = ""
html_top = Get_HTMLTop(document_title)
sections_list, sections_content, citation_modals = Get_Sections(soup)
controls = Get_Controls()
navigation = Get_Navigation(controls, sections_list)
# Combine all parts into final HTML
html = "<article id='article'>" + str(html_top) + str(sections_content) + str(navigation) + "n".join(citation_modals)+"</article></body>"
return html
html = solara.reactive("<h1> Article PDF to HTML converter </h1>")
xml = solara.reactive("")
loaded = solara.reactive(False)
@solara.component
def Page():
solara.Style(app_style)
def on_file(f: FileInfo):
html.value = "Please wait..."
html.value = Get_Article_HTML(f["file_obj"])
loaded.value = True
def on_demo():
f=requests.get("https://familymedicine.med.wayne.edu/mph/project/green_2006_narrative_literature_reviews.pdf").content
html.value = "Please wait..."
html.value = Get_Article_HTML(f)
loaded.value = True
if not loaded.value:
solara.Button(label="Use Demo PDF", on_click=on_demo)
solara.FileDrop(label="Drag and drop custom article pdf", on_file=on_file, lazy=True)
solara.HTML(unsafe_innerHTML=html.value)
if loaded.value:
solara.FileDownload(html, filename="articledemo.html", label="Download HTML")
|