pdf-tools-suite / app.py
bodhak's picture
Upload 6 files
92c0981 verified
import streamlit as st
from textsumm import summarizer
from pdfsum import extract_text_from_pdf, summarize_text, split_text_into_chunks
from pdfpass import remove_pdf_password
from papersearch import fetch_papers, filter_papers_by_year
from io import BytesIO
from datetime import datetime
from pypdf import PdfReader, PdfWriter
# Streamlit App Config
st.set_page_config(page_title="PDF Tools Suite", page_icon="πŸ“„", layout="wide")
# Sidebar Navigation
st.sidebar.title("πŸ“„ PDF Tools Suite")
page = st.sidebar.radio("Select a tool", ["Text Summarizer", "PDF Summarizer", "PDF Password Remover", "Research Paper Search", "PDF Merger", "PDF Splitter", "PDF to Text Converter"])
# Tool: Text Summarizer
if page == "Text Summarizer":
st.title("πŸ“ Text Summarizer")
user_input = st.text_area("Enter text to summarize")
if st.button("Summarize"):
summary = summarizer(user_input, max_length=130, min_length=30, do_sample=False)
st.subheader("Summary")
st.write(summary[0]["summary_text"])
# Tool: PDF Summarizer
elif page == "PDF Summarizer":
st.title("πŸ“œ PDF Summarizer")
uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
if uploaded_file is not None:
pdf_text = extract_text_from_pdf(uploaded_file)
chunks = split_text_into_chunks(pdf_text)
summaries = summarize_text(chunks)
full_summary = " ".join(summaries)
st.subheader("Summary")
st.write(full_summary)
# Tool: PDF Password Remover
elif page == "PDF Password Remover":
st.title("πŸ”‘ Remove PDF Password")
uploaded_file = st.file_uploader("Choose a password-protected PDF", type=["pdf"])
password = st.text_input("Enter the PDF password", type="password")
if uploaded_file and password and st.button("Remove Password"):
output = remove_pdf_password(uploaded_file, password)
if isinstance(output, BytesIO):
st.success("Password removed successfully!")
st.download_button("Download PDF", data=output, file_name="unlocked_pdf.pdf", mime="application/pdf")
else:
st.error(f"Error: {output}")
# Tool: Research Paper Search
elif page == "Research Paper Search":
st.title("πŸ” Research Paper Search (arXiv)")
query = st.text_input("Enter topic or keywords", placeholder="e.g., machine learning")
max_results = st.slider("Number of results", 1, 50, 10)
col1, col2 = st.columns(2)
with col1:
start_year = st.number_input("Start Year", min_value=1900, max_value=datetime.now().year, value=2000)
with col2:
end_year = st.number_input("End Year", min_value=1900, max_value=datetime.now().year, value=datetime.now().year)
if st.button("Search"):
papers = fetch_papers(query, max_results)
papers_filtered = filter_papers_by_year(papers, start_year, end_year)
if papers_filtered:
for idx, paper in enumerate(papers_filtered, start=1):
st.write(f"### {idx}. {paper['title']}")
st.write(f"**Authors**: {', '.join(paper['authors'])}")
st.write(f"**Published**: {paper['published']}")
st.write(f"[Read More]({paper['link']})")
st.write("---")
else:
st.warning("No papers found in the selected range.")
# Tool: PDF Merger
elif page == "PDF Merger":
st.title("πŸ“Ž Merge Multiple PDFs")
uploaded_files = st.file_uploader("Upload multiple PDF files", type=["pdf"], accept_multiple_files=True)
if uploaded_files and st.button("Merge PDFs"):
pdf_writer = PdfWriter()
for file in uploaded_files:
pdf_reader = PdfReader(file)
for page in pdf_reader.pages:
pdf_writer.add_page(page)
output = BytesIO()
pdf_writer.write(output)
output.seek(0)
st.download_button("Download Merged PDF", data=output, file_name="merged.pdf", mime="application/pdf")
# Tool: PDF Splitter
elif page == "PDF Splitter":
st.title("βœ‚οΈ Split PDF into Pages")
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
if uploaded_file:
pdf_reader = PdfReader(uploaded_file)
for i, page in enumerate(pdf_reader.pages):
pdf_writer = PdfWriter()
pdf_writer.add_page(page)
output = BytesIO()
pdf_writer.write(output)
output.seek(0)
st.download_button(f"Download Page {i+1}", data=output, file_name=f"page_{i+1}.pdf", mime="application/pdf")
# Tool: PDF to Text Converter
elif page == "PDF to Text Converter":
st.title("πŸ“œ Extract Text from PDF")
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
if uploaded_file:
pdf_text = extract_text_from_pdf(uploaded_file)
st.text_area("Extracted Text", pdf_text, height=300)