Spaces:
Sleeping
Sleeping
import streamlit as st | |
import PyPDF2 | |
from io import BytesIO | |
def search_pdf(pdf_file, search_term): | |
search_results = [] | |
doc = fitz.open(pdf_file) | |
for page_num in range(doc.page_count): | |
page = doc.load_page(page_num) | |
text = page.get_text() | |
if search_term.lower() in text.lower(): | |
search_results.append((page_num + 1, text)) | |
return search_results | |
def final_result(pdf_file, search_term): | |
results = search_pdf(pdf_file, search_term) | |
output_text = "" | |
if results: | |
for page_num, text in results: | |
# output_text += f"Found \033[1m'{search_term}'\033[0m on page {page_num}:\n{text}\n\n" | |
output_text += f"'{search_term}' on page {page_num}:\n-{text}\n\n" | |
else: | |
output_text = f"No results found for '{search_term}'." | |
return output_text | |
st.set_page_config(page_title="Search in PDF", layout="wide",initial_sidebar_state="expanded") | |
st.markdown("<h3 style='text-align:center; font-size:24px;'>Search in PDF</h3>", unsafe_allow_html=True) | |
st.write("---") | |
col1, col2 = st.columns(spec=[0.4,0.6]) | |
# col3, col4 = st.columns(spec=[0.5,0.5]) | |
with col1: | |
input_file = st.file_uploader(label="Upload .pdf File", type='pdf') | |
search_term = st.text_input(label="Enter Search-term", placeholder="Search here...") | |
col3, col4 = st.columns(spec=[0.5,0.5]) | |
with col3: | |
all_data = st.button("Submit") | |
# with col4: | |
# st.write("") | |
# clear_button = st.button("Clear") | |
# if clear_button: | |
# input_file = None | |
# search_term = "" | |
with col2: | |
if all_data: | |
if input_file is not None and search_term.strip() != "": | |
result = final_result(input_file, search_term) | |
st.text_area("Search Results", result, height=400) | |
elif input_file is None: | |
st.error("Please upload a PDF file.") | |
elif search_term.strip() == "": | |
st.error("Please enter a search term.") | |