File size: 1,254 Bytes
2548af8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import os

import pytest

from src.extractor import AzureExtractor


@pytest.fixture
def extractor():
    return AzureExtractor(endpoint=os.environ['AZURE_ENDPOINT'], key=os.environ['AZURE_KEY'])


def test_extract_image_in_content_mode(extractor, image_path):
    result = extractor.extract(image_path, mode='content')

    assert isinstance(result, dict)
    assert 'content' in result
    assert len(result) == 1
    assert 'CREDIT APPLICATION' in result['content']


def test_extract_image_in_keypair_mode(extractor, image_path):
    result = extractor.extract(image_path, mode='key_pair')

    assert isinstance(result, dict)
    assert len(result) > 1


def test_extract_pdf_in_content_mode(extractor, pdf_path):
    result = extractor.extract(pdf_path, mode='content')

    assert 'content' in result
    assert isinstance(result, dict)
    assert len(result['content']) > 10
    assert 'CREDIT APPLICATION' in result['content']
    assert 'Student ID' in result['content']


def test_extract_pdf_in_keypair_mode(extractor, pdf_path):
    result = extractor.extract(pdf_path, mode='key_pair')

    assert isinstance(result, dict)
    assert result['TELEPHONE NO.'] == '(243) 555-2309'
    assert result['Student e-mail'] == 'john.doe@example.com'