pdf_ocr_extraction_1 / extraction_service.py
Gangadhar123's picture
Update extraction_service.py
0af6ba4 verified
raw
history blame contribute delete
503 Bytes
import re
import json
class ExtractionService:
def __init__(self, config_path):
with open(config_path, "r") as f:
self.fields_config = json.load(f)
def extract_fields(self, text):
extracted = {}
for field, meta in self.fields_config.items():
pattern = meta.get("pattern")
match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE)
extracted[field] = match.group(1).strip() if match else None
return extracted