resumate / tests /test_linkedin_resume.py
gperdrizet's picture
Removed mocking from tests in favor of uof using actual sample PDF, cleaned up
71c8aa1 verified
"""
Unit tests for the linkedin_resume module.
"""
import unittest
import tempfile
import os
from pathlib import Path
from functions import linkedin_resume
# pylint: disable=protected-access
class TestExtractText(unittest.TestCase):
"""Test cases for the extract_text function."""
def test_extract_text_with_real_pdf(self):
"""Test text extraction using the actual test PDF file."""
# Get path to the test PDF file
test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"
# Verify the test file exists
self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")
# Call extract_text with the real PDF
result = linkedin_resume.extract_text(str(test_pdf_path))
# Verify we get a result (should be a dict with sections)
if result is not None:
self.assertIsInstance(result, dict)
# Check that we have at least some content
self.assertGreater(len(result), 0)
# Each value should be a string
for _, content in result.items():
self.assertIsInstance(content, str)
else:
# If result is None, it means the PDF couldn't be processed
# This might happen with some PDF formats, which is acceptable
self.assertIsNone(result)
def test_extract_text_success(self):
"""Test successful text extraction from the actual test PDF file."""
# Get path to the test PDF file
test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"
# Verify the test file exists
self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")
# Call extract_text with the real PDF
result = linkedin_resume.extract_text(str(test_pdf_path))
# Verify we get a result (should be a dict with sections)
if result is not None:
self.assertIsInstance(result, dict)
# Check that we have at least some content
self.assertGreater(len(result), 0)
# Each value should be a string
for section_name, content in result.items():
self.assertIsInstance(content, str)
self.assertGreater(
len(content.strip()),
0,
f"Section {section_name} should have content"
)
else:
# If result is None, it means the PDF couldn't be processed
# This might happen with some PDF formats, which is acceptable
self.assertIsNone(result)
def test_extract_text_with_invalid_pdf(self):
"""Test handling of invalid PDF content by creating a temporary invalid file."""
# Create a temporary file with invalid content
with tempfile.NamedTemporaryFile(mode='w', suffix='.pdf', delete=False) as temp_file:
temp_file.write("This is not a valid PDF file")
temp_path = temp_file.name
try:
# This should return None due to invalid PDF format
result = linkedin_resume.extract_text(temp_path)
self.assertIsNone(result)
finally:
# Clean up the temporary file
os.unlink(temp_path)
def test_extract_text_parsing_behavior(self):
"""Test text extraction and parsing with the real PDF file."""
# Get path to the test PDF file
test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"
# Verify the test file exists
self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")
# Call extract_text with the real PDF
result = linkedin_resume.extract_text(str(test_pdf_path))
# Test the parsing behavior - if we get a result, it should be structured properly
if result is not None:
self.assertIsInstance(result, dict)
# If we have content, verify it's been parsed into logical sections
for _, content in result.items():
self.assertIsInstance(content, str)
# Content should be cleaned (no excessive whitespace at start/end)
self.assertEqual(content, content.strip())
def test_extract_text_file_not_found(self):
"""Test handling when file doesn't exist."""
result = linkedin_resume.extract_text("/nonexistent/file.pdf")
# Should return None when file not found
self.assertIsNone(result)
class TestParseResumeText(unittest.TestCase):
"""Test cases for the _parse_resume_text function."""
def test_parse_with_sections(self):
"""Test parsing text with recognizable sections."""
text = """
Contact Information
John Doe
john@example.com
Summary
Experienced software engineer with 5 years experience
Experience
Software Engineer at Tech Company
Built web applications
Skills
Python, JavaScript, React
Education
Bachelor's in Computer Science
University of Technology
"""
result = linkedin_resume._parse_resume_text(text)
self.assertIsInstance(result, dict)
self.assertIn("contact_info", result)
self.assertIn("summary", result)
self.assertIn("experience", result)
self.assertIn("skills", result)
self.assertIn("education", result)
def test_parse_empty_text(self):
"""Test parsing empty or None text."""
self.assertIsNone(linkedin_resume._parse_resume_text(""))
self.assertIsNone(linkedin_resume._parse_resume_text(None))
def test_parse_text_no_sections(self):
"""Test parsing text without recognizable sections."""
text = "Just some random text without any section headers"
result = linkedin_resume._parse_resume_text(text)
self.assertIsInstance(result, dict)
# Should still return a dict with at least the general section
self.assertIn("general", result)
def test_parse_calls_clean_section(self):
"""Test that parsing calls _clean_section on each section using real text processing."""
text = """
Summary
Some summary text with extra spaces
Experience
Some experience text
"""
result = linkedin_resume._parse_resume_text(text)
# Should be called and content should be cleaned
if result:
for _, content in result.items():
# Verify that cleaning has occurred (no excessive spaces)
self.assertNotIn(" ", content) # No triple spaces should remain
self.assertEqual(content, content.strip()) # Should be stripped
class TestCleanSection(unittest.TestCase):
"""Test cases for the _clean_section function."""
def test_clean_unicode_normalization(self):
"""Test unicode normalization."""
text = "Café résumé naïve" # Text with accented characters
result = linkedin_resume._clean_section(text)
# Should normalize unicode characters
self.assertIsInstance(result, str)
self.assertNotEqual(result, "")
def test_clean_remove_page_numbers(self):
"""Test removal of LinkedIn page numbers."""
text = "Some content\nPage 1 of 3\nMore content"
result = linkedin_resume._clean_section(text)
# Should remove page indicators
self.assertNotIn("Page 1 of 3", result)
self.assertIn("Some content", result)
self.assertIn("More content", result)
def test_clean_calls_whitespace_cleaner(self):
"""Test that _clean_section properly cleans whitespace."""
text = "Some text with spaces"
result = linkedin_resume._clean_section(text)
# Should clean multiple spaces to single spaces
self.assertNotIn(" ", result) # No double spaces should remain
self.assertIn("Some text with spaces", result) # Should have single spaces
def test_clean_strip_whitespace(self):
"""Test stripping leading/trailing whitespace."""
text = " Some content "
result = linkedin_resume._clean_section(text)
# Should strip leading and trailing whitespace
self.assertFalse(result.startswith(" "))
self.assertFalse(result.endswith(" "))
def test_clean_empty_input(self):
"""Test handling of empty input."""
self.assertEqual(linkedin_resume._clean_section(""), "")
self.assertEqual(linkedin_resume._clean_section(" "), "")
if __name__ == '__main__':
unittest.main()