Fastify-Reader / app.py
akarshrajsingh7's picture
Version 2 (Encoding fix)
b9648f5
raw
history blame
4.81 kB
import streamlit as st
from PyPDF2 import PdfReader
from concurrent.futures import ThreadPoolExecutor
from base64 import b64encode
from fpdf import FPDF
import io, string, re, math
from io import StringIO
# Importing the Fastify Class
from fast_reader import Fastify_Reader
def pdf_extract_text(pdf_docs):
'''
Basic function for extracting text from the PDFs
'''
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def text_to_pdf_fastify(text):
bold_text = Fastify_Reader(text).fastify()
bold_text = bold_text.encode('latin-1', 'ignore').decode('latin-1')
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size = 12)
pdf.multi_cell(0, 10, txt = bold_text, markdown=True)
return bytes(pdf.output())
def text_to_pdf(text):
text = text.encode('latin-1', 'ignore').decode('latin-1')
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size = 12)
pdf.multi_cell(0, 10, txt = text, markdown=True)
return bytes(pdf.output())
st.set_page_config(page_title="Fastify Reader",
page_icon=":books:",
layout="wide")
with st.sidebar:
st.image("Logo.jpg")
st.markdown("<div style='text-align: center;'>How fast can you read really?</div>", unsafe_allow_html=True)
tab1, tab2= st.tabs(["PDF file", "Input Text"])
with tab1:
st.header("PDF File")
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf", accept_multiple_files=True)
if st.button("Submit", key="pdf"):
with st.spinner("Processing"):
text = pdf_extract_text(uploaded_file)
original_pdf = b64encode(text_to_pdf(text)).decode("utf-8")
base64_pdf = b64encode(text_to_pdf_fastify(text)).decode("utf-8")
original_display = f'<embed src="data:application/pdf;base64,{original_pdf}" width = "100%" height = 600 type="application/pdf">'
pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width = "100%" height = 600 type="application/pdf">'
col1, col2, col3 = st.columns(3)
with col3:
st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
col1, col2 = st.columns([1, 1], gap="small")
with col1:
with st.container(border = True):
st.markdown("<div style='text-align: center;'><strong>Original PDF viewer</strong></div>", unsafe_allow_html=True)
st.markdown(original_display, unsafe_allow_html=True)
with col2:
with st.container(border = True):
st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
st.markdown(pdf_display, unsafe_allow_html=True)
with tab2:
st.header("Input Text")
user_input = st.text_input("Enter some text")
if st.button("Submit", key="input-text"):
with st.spinner("Processing"):
text = user_input
original_pdf = b64encode(text_to_pdf(text)).decode("utf-8")
base64_pdf = b64encode(text_to_pdf_fastify(text)).decode("utf-8")
original_display = f'<embed src="data:application/pdf;base64,{original_pdf}" width = "100%" height = 600 type="application/pdf" download="original.pdf">'
pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width = "100%" height = 600 type="application/pdf" download="Modified.pdf">'
col1, col2, col3 = st.columns(3)
with col3:
st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
col1, col2 = st.columns([1, 1], gap="small")
with col1:
with st.container(border = True):
st.markdown("<div style='text-align: center;'><strong>Original PDF viewer</strong></div>", unsafe_allow_html=True)
st.markdown(original_display, unsafe_allow_html=True)
with col2:
with st.container(border = True):
st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
st.markdown(pdf_display, unsafe_allow_html=True)