Spaces:

gperdrizet
/

resumate

Configuration error

App Files Files Community

resumate / tests /test_linkedin_resume.py

gperdrizet

Removed mocking from tests in favor of uof using actual sample PDF, cleaned up

71c8aa1 verified about 1 month ago

raw

history blame contribute delete

8.73 kB

	"""
	Unit tests for the linkedin_resume module.
	"""

	import unittest
	import tempfile
	import os
	from pathlib import Path
	from functions import linkedin_resume

	# pylint: disable=protected-access


	class TestExtractText(unittest.TestCase):
	"""Test cases for the extract_text function."""

	def test_extract_text_with_real_pdf(self):
	"""Test text extraction using the actual test PDF file."""
	# Get path to the test PDF file
	test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"

	# Verify the test file exists
	self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")

	# Call extract_text with the real PDF
	result = linkedin_resume.extract_text(str(test_pdf_path))

	# Verify we get a result (should be a dict with sections)
	if result is not None:
	self.assertIsInstance(result, dict)
	# Check that we have at least some content
	self.assertGreater(len(result), 0)
	# Each value should be a string
	for _, content in result.items():
	self.assertIsInstance(content, str)
	else:
	# If result is None, it means the PDF couldn't be processed
	# This might happen with some PDF formats, which is acceptable
	self.assertIsNone(result)

	def test_extract_text_success(self):
	"""Test successful text extraction from the actual test PDF file."""
	# Get path to the test PDF file
	test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"

	# Verify the test file exists
	self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")

	# Call extract_text with the real PDF
	result = linkedin_resume.extract_text(str(test_pdf_path))

	# Verify we get a result (should be a dict with sections)
	if result is not None:
	self.assertIsInstance(result, dict)

	# Check that we have at least some content
	self.assertGreater(len(result), 0)

	# Each value should be a string
	for section_name, content in result.items():
	self.assertIsInstance(content, str)
	self.assertGreater(
	len(content.strip()),
	0,
	f"Section {section_name} should have content"
	)

	else:
	# If result is None, it means the PDF couldn't be processed
	# This might happen with some PDF formats, which is acceptable
	self.assertIsNone(result)

	def test_extract_text_with_invalid_pdf(self):
	"""Test handling of invalid PDF content by creating a temporary invalid file."""

	# Create a temporary file with invalid content
	with tempfile.NamedTemporaryFile(mode='w', suffix='.pdf', delete=False) as temp_file:
	temp_file.write("This is not a valid PDF file")
	temp_path = temp_file.name

	try:
	# This should return None due to invalid PDF format
	result = linkedin_resume.extract_text(temp_path)
	self.assertIsNone(result)

	finally:
	# Clean up the temporary file
	os.unlink(temp_path)

	def test_extract_text_parsing_behavior(self):
	"""Test text extraction and parsing with the real PDF file."""

	# Get path to the test PDF file
	test_pdf_path = Path(__file__).parent / "test_data" / "linkedin_profile.pdf"

	# Verify the test file exists
	self.assertTrue(test_pdf_path.exists(), f"Test PDF file not found: {test_pdf_path}")

	# Call extract_text with the real PDF
	result = linkedin_resume.extract_text(str(test_pdf_path))

	# Test the parsing behavior - if we get a result, it should be structured properly
	if result is not None:
	self.assertIsInstance(result, dict)

	# If we have content, verify it's been parsed into logical sections
	for _, content in result.items():
	self.assertIsInstance(content, str)

	# Content should be cleaned (no excessive whitespace at start/end)
	self.assertEqual(content, content.strip())

	def test_extract_text_file_not_found(self):
	"""Test handling when file doesn't exist."""

	result = linkedin_resume.extract_text("/nonexistent/file.pdf")

	# Should return None when file not found
	self.assertIsNone(result)


	class TestParseResumeText(unittest.TestCase):
	"""Test cases for the _parse_resume_text function."""

	def test_parse_with_sections(self):
	"""Test parsing text with recognizable sections."""
	text = """
	Contact Information
	John Doe
	john@example.com

	Summary
	Experienced software engineer with 5 years experience

	Experience
	Software Engineer at Tech Company
	Built web applications

	Skills
	Python, JavaScript, React

	Education
	Bachelor's in Computer Science
	University of Technology
	"""

	result = linkedin_resume._parse_resume_text(text)

	self.assertIsInstance(result, dict)
	self.assertIn("contact_info", result)
	self.assertIn("summary", result)
	self.assertIn("experience", result)
	self.assertIn("skills", result)
	self.assertIn("education", result)

	def test_parse_empty_text(self):
	"""Test parsing empty or None text."""

	self.assertIsNone(linkedin_resume._parse_resume_text(""))
	self.assertIsNone(linkedin_resume._parse_resume_text(None))

	def test_parse_text_no_sections(self):
	"""Test parsing text without recognizable sections."""

	text = "Just some random text without any section headers"

	result = linkedin_resume._parse_resume_text(text)

	self.assertIsInstance(result, dict)

	# Should still return a dict with at least the general section
	self.assertIn("general", result)

	def test_parse_calls_clean_section(self):
	"""Test that parsing calls _clean_section on each section using real text processing."""

	text = """
	Summary
	Some summary text with extra spaces

	Experience
	Some experience text
	"""

	result = linkedin_resume._parse_resume_text(text)

	# Should be called and content should be cleaned
	if result:
	for _, content in result.items():
	# Verify that cleaning has occurred (no excessive spaces)
	self.assertNotIn(" ", content) # No triple spaces should remain
	self.assertEqual(content, content.strip()) # Should be stripped


	class TestCleanSection(unittest.TestCase):
	"""Test cases for the _clean_section function."""

	def test_clean_unicode_normalization(self):
	"""Test unicode normalization."""

	text = "Café résumé naïve" # Text with accented characters
	result = linkedin_resume._clean_section(text)

	# Should normalize unicode characters
	self.assertIsInstance(result, str)
	self.assertNotEqual(result, "")

	def test_clean_remove_page_numbers(self):
	"""Test removal of LinkedIn page numbers."""

	text = "Some content\nPage 1 of 3\nMore content"
	result = linkedin_resume._clean_section(text)

	# Should remove page indicators
	self.assertNotIn("Page 1 of 3", result)
	self.assertIn("Some content", result)
	self.assertIn("More content", result)

	def test_clean_calls_whitespace_cleaner(self):
	"""Test that _clean_section properly cleans whitespace."""

	text = "Some text with spaces"
	result = linkedin_resume._clean_section(text)

	# Should clean multiple spaces to single spaces
	self.assertNotIn(" ", result) # No double spaces should remain
	self.assertIn("Some text with spaces", result) # Should have single spaces

	def test_clean_strip_whitespace(self):
	"""Test stripping leading/trailing whitespace."""

	text = " Some content "
	result = linkedin_resume._clean_section(text)

	# Should strip leading and trailing whitespace
	self.assertFalse(result.startswith(" "))
	self.assertFalse(result.endswith(" "))

	def test_clean_empty_input(self):
	"""Test handling of empty input."""

	self.assertEqual(linkedin_resume._clean_section(""), "")
	self.assertEqual(linkedin_resume._clean_section(" "), "")


	if __name__ == '__main__':
	unittest.main()