from typing import Literal import sys from pathlib import Path sys.path.append(str(Path(__file__).parent.parent.parent)) from src.utils import determine_file_type, get_bytes_from_path, get_PIL_image_from_path from .content import AzureContentExtractor from .keypair import AzureKeyPairsExtractor class AzureExtractor: def __init__(self, endpoint: str, key: str): self._content_extractor = AzureContentExtractor(endpoint, key) self._keypair_extractor = AzureKeyPairsExtractor(endpoint, key) self._extractor = None def extract(self, file_path: str, mode: Literal['content', 'key_pair']) -> dict[str, str]: self._set_extractor(mode) file_type = determine_file_type(file_path) if file_type == 'pdf': pdf_bytes = get_bytes_from_path(file_path) result = self._extractor.extract_pdf(pdf_bytes) elif file_type == 'image': image = get_PIL_image_from_path(file_path) result = self._extractor.extract_image(image) return result def _set_extractor(self, mode: Literal['content', 'key_pair']): extractor_mapping = { 'content': self._content_extractor, 'key_pair': self._keypair_extractor, } self._extractor = extractor_mapping[mode]