Spaces:

Bonosa2
/

Scribbled-docs-notes

Sleeping

File size: 3,255 Bytes

37d6469

import pytest
from PIL import Image, ImageDraw, ImageFont
import numpy as np

class TestOCRFunctionality:
    """Test OCR text extraction functionality"""
    
    def test_preprocess_image_for_ocr(self, sample_image):
        """Test image preprocessing for OCR"""
        # Mock the preprocessing function
        def mock_preprocess_image_for_ocr(image):
            img_array = np.array(image)
            if len(img_array.shape) == 3:
                gray = np.mean(img_array, axis=2).astype(np.uint8)
            else:
                gray = img_array
            return gray
        
        result = mock_preprocess_image_for_ocr(sample_image)
        assert isinstance(result, np.ndarray)
        assert len(result.shape) == 2  # Should be grayscale

    def test_extract_text_from_image_none_input(self):
        """Test OCR with None input"""
        def mock_extract_text_from_image(image):
            if image is None:
                return "❌ No image provided"
            return "Sample extracted text"
        
        result = mock_extract_text_from_image(None)
        assert result == "❌ No image provided"

    def test_extract_text_from_valid_image(self, sample_image):
        """Test OCR with valid image"""
        def mock_extract_text_from_image(image):
            if image is None:
                return "❌ No image provided"
            # Simulate successful OCR
            return "Patient: John Doe, 35-year-old male with chest pain"
        
        result = mock_extract_text_from_image(sample_image)
        assert "Patient:" in result
        assert "chest pain" in result

    def test_clean_extracted_text(self):
        """Test text cleaning functionality"""
        def mock_clean_extracted_text(text):
            lines = [line.strip() for line in text.split('\n') if line.strip()]
            cleaned_text = '\n'.join(lines)
            cleaned_text = cleaned_text.replace('|', '').replace('_', ' ')
            return cleaned_text.strip()
        
        dirty_text = "  Line 1  \n\n  Line 2 with | pipes  \n   \n  Line_3  "
        clean_text = mock_clean_extracted_text(dirty_text)
        
        assert "Line 1" in clean_text
        assert "Line 2 with  pipes" in clean_text
        assert "Line 3" in clean_text
        assert "|" not in clean_text

    def test_image_preprocessing_edge_cases(self):
        """Test image preprocessing with edge cases"""
        def mock_preprocess_image_for_ocr(image):
            if image is None:
                return None
            
            img_array = np.array(image)
            # Handle grayscale images
            if len(img_array.shape) == 2:
                return img_array
            # Handle RGB images
            elif len(img_array.shape) == 3:
                return np.mean(img_array, axis=2).astype(np.uint8)
            else:
                raise ValueError("Unsupported image format")
        
        # Test with None
        assert mock_preprocess_image_for_ocr(None) is None
        
        # Test with small grayscale image
        small_gray = Image.new('L', (50, 50), color=128)
        result = mock_preprocess_image_for_ocr(small_gray)
        assert isinstance(result, np.ndarray)
        assert len(result.shape) == 2