Spaces:
Configuration error
Configuration error
""" | |
Unit tests for the linkedin_resume module. | |
""" | |
import unittest | |
import tempfile | |
import os | |
from pathlib import Path | |
from functions import linkedin_resume | |
# pylint: disable=protected-access | |
class TestExtractText(unittest.TestCase): | |
"""Test cases for the extract_text function.""" | |
def test_extract_text_with_real_pdf(self): | |
"""Test text extraction using the actual test PDF file.""" | |
# Get path to the test PDF file | |
test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf" | |
# Verify the test file exists | |
self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}") | |
# Call extract_text with the real PDF | |
result = linkedin_resume.extract_text(str(test_pdf_path)) | |
# Verify we get a result (should be a dict with sections) | |
if result is not None: | |
self.assertIsInstance(result, dict) | |
# Check that we have at least some content | |
self.assertGreater(len(result), 0) | |
# Each value should be a string | |
for _, content in result.items(): | |
self.assertIsInstance(content, str) | |
else: | |
# If result is None, it means the PDF couldn't be processed | |
# This might happen with some PDF formats, which is acceptable | |
self.assertIsNone(result) | |
def test_extract_text_success(self): | |
"""Test successful text extraction from the actual test PDF file.""" | |
# Get path to the test PDF file | |
test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf" | |
# Verify the test file exists | |
self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}") | |
# Call extract_text with the real PDF | |
result = linkedin_resume.extract_text(str(test_pdf_path)) | |
# Verify we get a result (should be a dict with sections) | |
if result is not None: | |
self.assertIsInstance(result, dict) | |
# Check that we have at least some content | |
self.assertGreater(len(result), 0) | |
# Each value should be a string | |
for section_name, content in result.items(): | |
self.assertIsInstance(content, str) | |
self.assertGreater( | |
len(content.strip()), | |
0, | |
f"Section {section_name} should have content" | |
) | |
else: | |
# If result is None, it means the PDF couldn't be processed | |
# This might happen with some PDF formats, which is acceptable | |
self.assertIsNone(result) | |
def test_extract_text_with_invalid_pdf(self): | |
"""Test handling of invalid PDF content by creating a temporary invalid file.""" | |
# Create a temporary file with invalid content | |
with tempfile.NamedTemporaryFile(mode='w', suffix='.pdf', delete=False) as temp_file: | |
temp_file.write("This is not a valid PDF file") | |
temp_path = temp_file.name | |
try: | |
# This should return None due to invalid PDF format | |
result = linkedin_resume.extract_text(temp_path) | |
self.assertIsNone(result) | |
finally: | |
# Clean up the temporary file | |
os.unlink(temp_path) | |
def test_extract_text_parsing_behavior(self): | |
"""Test text extraction and parsing with the real PDF file.""" | |
# Get path to the test PDF file | |
test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf" | |
# Verify the test file exists | |
self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}") | |
# Call extract_text with the real PDF | |
result = linkedin_resume.extract_text(str(test_pdf_path)) | |
# Test the parsing behavior - if we get a result, it should be structured properly | |
if result is not None: | |
self.assertIsInstance(result, dict) | |
# If we have content, verify it's been parsed into logical sections | |
for _, content in result.items(): | |
self.assertIsInstance(content, str) | |
# Content should be cleaned (no excessive whitespace at start/end) | |
self.assertEqual(content, content.strip()) | |
def test_extract_text_file_not_found(self): | |
"""Test handling when file doesn't exist.""" | |
result = linkedin_resume.extract_text("/nonexistent/file.pdf") | |
# Should return None when file not found | |
self.assertIsNone(result) | |
class TestParseResumeText(unittest.TestCase): | |
"""Test cases for the _parse_resume_text function.""" | |
def test_parse_with_sections(self): | |
"""Test parsing text with recognizable sections.""" | |
text = """ | |
Contact Information | |
John Doe | |
john@example.com | |
Summary | |
Experienced software engineer with 5 years experience | |
Experience | |
Software Engineer at Tech Company | |
Built web applications | |
Skills | |
Python, JavaScript, React | |
Education | |
Bachelor's in Computer Science | |
University of Technology | |
""" | |
result = linkedin_resume._parse_resume_text(text) | |
self.assertIsInstance(result, dict) | |
self.assertIn("contact_info", result) | |
self.assertIn("summary", result) | |
self.assertIn("experience", result) | |
self.assertIn("skills", result) | |
self.assertIn("education", result) | |
def test_parse_empty_text(self): | |
"""Test parsing empty or None text.""" | |
self.assertIsNone(linkedin_resume._parse_resume_text("")) | |
self.assertIsNone(linkedin_resume._parse_resume_text(None)) | |
def test_parse_text_no_sections(self): | |
"""Test parsing text without recognizable sections.""" | |
text = "Just some random text without any section headers" | |
result = linkedin_resume._parse_resume_text(text) | |
self.assertIsInstance(result, dict) | |
# Should still return a dict with at least the general section | |
self.assertIn("general", result) | |
def test_parse_calls_clean_section(self): | |
"""Test that parsing calls _clean_section on each section using real text processing.""" | |
text = """ | |
Summary | |
Some summary text with extra spaces | |
Experience | |
Some experience text | |
""" | |
result = linkedin_resume._parse_resume_text(text) | |
# Should be called and content should be cleaned | |
if result: | |
for _, content in result.items(): | |
# Verify that cleaning has occurred (no excessive spaces) | |
self.assertNotIn(" ", content) # No triple spaces should remain | |
self.assertEqual(content, content.strip()) # Should be stripped | |
class TestCleanSection(unittest.TestCase): | |
"""Test cases for the _clean_section function.""" | |
def test_clean_unicode_normalization(self): | |
"""Test unicode normalization.""" | |
text = "Café résumé naïve" # Text with accented characters | |
result = linkedin_resume._clean_section(text) | |
# Should normalize unicode characters | |
self.assertIsInstance(result, str) | |
self.assertNotEqual(result, "") | |
def test_clean_remove_page_numbers(self): | |
"""Test removal of LinkedIn page numbers.""" | |
text = "Some content\nPage 1 of 3\nMore content" | |
result = linkedin_resume._clean_section(text) | |
# Should remove page indicators | |
self.assertNotIn("Page 1 of 3", result) | |
self.assertIn("Some content", result) | |
self.assertIn("More content", result) | |
def test_clean_calls_whitespace_cleaner(self): | |
"""Test that _clean_section properly cleans whitespace.""" | |
text = "Some text with spaces" | |
result = linkedin_resume._clean_section(text) | |
# Should clean multiple spaces to single spaces | |
self.assertNotIn(" ", result) # No double spaces should remain | |
self.assertIn("Some text with spaces", result) # Should have single spaces | |
def test_clean_strip_whitespace(self): | |
"""Test stripping leading/trailing whitespace.""" | |
text = " Some content " | |
result = linkedin_resume._clean_section(text) | |
# Should strip leading and trailing whitespace | |
self.assertFalse(result.startswith(" ")) | |
self.assertFalse(result.endswith(" ")) | |
def test_clean_empty_input(self): | |
"""Test handling of empty input.""" | |
self.assertEqual(linkedin_resume._clean_section(""), "") | |
self.assertEqual(linkedin_resume._clean_section(" "), "") | |
if __name__ == '__main__': | |
unittest.main() | |