vnosri's picture
sophia
8332c01
import os
import re
class TextFinder:
def __init__(self, folder):
self.folder = folder
def find_matches(self, pattern):
matches = []
pattern = pattern.lower() # Convert pattern to lowercase
for root, _, files in os.walk(self.folder):
for file in files:
file_path = os.path.join(root, file)
if os.path.isfile(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
paragraphs = re.split(r'\n\s*\n', content) # Split text into paragraphs
for paragraph in paragraphs:
if pattern in paragraph.lower(): # Convert paragraph to lowercase for comparison
matches.append({"Find_text":paragraph.strip(),"Book_source":file_path.split("/")[-1].split(".")[0]})
return matches
# Example usage:
if __name__ == "__main__":
finder = TextFinder('example_folder')
matches = finder.find_matches('text_to_find')
print(matches)