pdf_search / app.py
Vihang28's picture
Update app.py
641ffd1 verified
raw
history blame
No virus
2.09 kB
import streamlit as st
import PyPDF2
from io import BytesIO
def search_pdf(pdf_file, search_term):
with BytesIO(pdf_file.read()) as file:
reader = PyPDF2.PdfReader(file)
num_pages = len(reader.pages)
search_results = []
for page_num in range(num_pages):
page = reader.pages[page_num]
text = page.extract_text()
if search_term.lower() in text.lower():
search_results.append((page_num + 1, text))
return search_results
def final_result(pdf_file, search_term):
results = search_pdf(pdf_file, search_term)
output_text = ""
if results:
for page_num, text in results:
# output_text += f"Found \033[1m'{search_term}'\033[0m on page {page_num}:\n{text}\n\n"
output_text += f"'{search_term}' on page {page_num}:\n-{text}\n\n"
else:
output_text = f"No results found for '{search_term}'."
return output_text
st.set_page_config(page_title="Search in PDF", layout="wide",initial_sidebar_state="expanded")
st.markdown("<h3 style='text-align:center; font-size:24px;'>Search in PDF</h3>", unsafe_allow_html=True)
st.write("---")
col1, col2 = st.columns(spec=[0.4,0.6])
# col3, col4 = st.columns(spec=[0.5,0.5])
with col1:
input_file = st.file_uploader(label="Upload .pdf File", type='pdf')
search_term = st.text_input(label="Enter Search-term", placeholder="Search here...")
col3, col4 = st.columns(spec=[0.5,0.5])
with col3:
all_data = st.button("Submit")
# with col4:
# st.write("")
# clear_button = st.button("Clear")
# if clear_button:
# input_file = None
# search_term = ""
with col2:
if all_data:
if input_file is not None and search_term.strip() != "":
result = final_result(input_file, search_term)
st.text_area("Search Results", result, height=400)
elif input_file is None:
st.error("Please upload a PDF file.")
elif search_term.strip() == "":
st.error("Please enter a search term.")