import os import pytest from src.extractor import AzureExtractor @pytest.fixture def extractor(): return AzureExtractor(endpoint=os.environ['AZURE_ENDPOINT'], key=os.environ['AZURE_KEY']) def test_extract_image_in_content_mode(extractor, image_path): result = extractor.extract(image_path, mode='content') assert isinstance(result, dict) assert 'content' in result assert len(result) == 1 assert 'CREDIT APPLICATION' in result['content'] def test_extract_image_in_keypair_mode(extractor, image_path): result = extractor.extract(image_path, mode='key_pair') assert isinstance(result, dict) assert len(result) > 1 def test_extract_pdf_in_content_mode(extractor, pdf_path): result = extractor.extract(pdf_path, mode='content') assert 'content' in result assert isinstance(result, dict) assert len(result['content']) > 10 assert 'CREDIT APPLICATION' in result['content'] assert 'Student ID' in result['content'] def test_extract_pdf_in_keypair_mode(extractor, pdf_path): result = extractor.extract(pdf_path, mode='key_pair') assert isinstance(result, dict) assert result['TELEPHONE NO.'] == '(243) 555-2309' assert result['Student e-mail'] == 'john.doe@example.com'