import streamlit as st import PyPDF2 from io import BytesIO def search_pdf(pdf_file, search_term): search_results = [] doc = fitz.open(pdf_file) for page_num in range(doc.page_count): page = doc.load_page(page_num) text = page.get_text() if search_term.lower() in text.lower(): search_results.append((page_num + 1, text)) return search_results def final_result(pdf_file, search_term): results = search_pdf(pdf_file, search_term) output_text = "" if results: for page_num, text in results: # output_text += f"Found \033[1m'{search_term}'\033[0m on page {page_num}:\n{text}\n\n" output_text += f"'{search_term}' on page {page_num}:\n-{text}\n\n" else: output_text = f"No results found for '{search_term}'." return output_text st.set_page_config(page_title="Search in PDF", layout="wide",initial_sidebar_state="expanded") st.markdown("

Search in PDF

", unsafe_allow_html=True) st.write("---") col1, col2 = st.columns(spec=[0.4,0.6]) # col3, col4 = st.columns(spec=[0.5,0.5]) with col1: input_file = st.file_uploader(label="Upload .pdf File", type='pdf') search_term = st.text_input(label="Enter Search-term", placeholder="Search here...") col3, col4 = st.columns(spec=[0.5,0.5]) with col3: all_data = st.button("Submit") # with col4: # st.write("") # clear_button = st.button("Clear") # if clear_button: # input_file = None # search_term = "" with col2: if all_data: if input_file is not None and search_term.strip() != "": result = final_result(input_file, search_term) st.text_area("Search Results", result, height=400) elif input_file is None: st.error("Please upload a PDF file.") elif search_term.strip() == "": st.error("Please enter a search term.")