import streamlit as st # import PyPDF2 import fitz import io def search_pdf(pdf_file, split_search): search_results = [] if isinstance(pdf_file, io.BytesIO): # Handling Streamlit case doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf") else: # Handling local file case doc = fitz.open(pdf_file) for page_num in range(doc.page_count): page = doc.load_page(page_num) text = page.get_text() # Split the text into lines and filter out empty lines lines = [line.strip() for line in text.split('\n') if line.strip()] cleaned_text = '\n'.join(lines) k = 0 for i in range(len(split_search)): if split_search[i].lower() in cleaned_text.lower(): k = k + 1 if k == len(split_search): search_results.append((page_num + 1, cleaned_text)) return search_results def final_result(pdf_file, search_term): split_search = search_term.split(' ') results = search_pdf(pdf_file, split_search) output_text = "" if results: for page_num, text in results: # output_text += f"Found \033[1m'{search_term}'\033[0m on page {page_num}:\n{text}\n\n" output_text += f"'{search_term}' on page {page_num}:\n-{text}\n\n" else: output_text = f"No results found for '{search_term}'." return output_text st.set_page_config(page_title="Search in PDF", layout="wide",initial_sidebar_state="expanded") st.markdown("