import streamlit as st # import PyPDF2 import fitz import io def search_pdf(pdf_file, split_search): search_results = [] if isinstance(pdf_file, io.BytesIO): # Handling Streamlit case doc = fitz.open(stream=pdf_file.getvalue(), filetype="pdf") else: # Handling local file case doc = fitz.open(pdf_file) for page_num in range(doc.page_count): page = doc.load_page(page_num) text = page.get_text() # Split the text into lines and filter out empty lines lines = [line.strip() for line in text.split('\n') if line.strip()] cleaned_text = '\n'.join(lines) k = 0 for i in range(len(split_search)): if split_search[i].lower() in cleaned_text.lower(): k = k + 1 if k == len(split_search): search_results.append((page_num + 1, cleaned_text)) return search_results def final_result(pdf_file, search_term): split_search = search_term.split(' ') results = search_pdf(pdf_file, split_search) output_text = "" if results: for page_num, text in results: # output_text += f"Found \033[1m'{search_term}'\033[0m on page {page_num}:\n{text}\n\n" output_text += f"'{search_term}' on page {page_num}:\n-{text}\n\n" else: output_text = f"No results found for '{search_term}'." return output_text st.set_page_config(page_title="Search in PDF", layout="wide",initial_sidebar_state="expanded") st.markdown("

Search in PDF

", unsafe_allow_html=True) st.write("---") col1, col2 = st.columns(spec=[0.4,0.6]) # col3, col4 = st.columns(spec=[0.5,0.5]) with col1: input_file = st.file_uploader(label="Upload .pdf File", type='pdf') search_term = st.text_input(label="Enter Search-term", placeholder="Search here...") col3, col4 = st.columns(spec=[0.5,0.5]) with col3: all_data = st.button("Submit") # with col4: # st.write("") # clear_button = st.button("Clear") # if clear_button: # input_file = None # search_term = "" with col2: if all_data: if input_file is not None and search_term.strip() != "": result = final_result(input_file, search_term) st.text_area("Search Results", result, height=400) elif input_file is None: st.error("Please upload a PDF file") elif search_term.strip() == "": st.error("Please enter a search term")